1 2/* 3=============================================================================== 4 5This C source fragment is part of the SoftFloat IEC/IEEE Floating-point 6Arithmetic Package, Release 2a. 7 8Written by John R. Hauser. This work was made possible in part by the 9International Computer Science Institute, located at Suite 600, 1947 Center 10Street, Berkeley, California 94704. Funding was partially provided by the 11National Science Foundation under grant MIP-9311980. The original version 12of this code was written as part of a project to build a fixed-point vector 13processor in collaboration with the University of California at Berkeley, 14overseen by Profs. Nelson Morgan and John Wawrzynek. More information 15is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ 16arithmetic/SoftFloat.html'. 17 18THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort 19has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT 20TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO 21PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY 22AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. 23 24Derivative works are acceptable, even for commercial purposes, so long as 25(1) they include prominent notice that the work is derivative, and (2) they 26include prominent notice akin to these four paragraphs for those parts of 27this code that are retained. 28 29=============================================================================== 30*/ 31 32/* 33------------------------------------------------------------------------------- 34Underflow tininess-detection mode, statically initialized to default value. 35(The declaration in `softfloat.h' must match the `int8' type here.) 36------------------------------------------------------------------------------- 37*/ 38int8 float_detect_tininess = float_tininess_after_rounding; 39 40/* 41------------------------------------------------------------------------------- 42Raises the exceptions specified by `flags'. Floating-point traps can be 43defined here if desired. It is currently not possible for such a trap to 44substitute a result value. If traps are not implemented, this routine 45should be simply `float_exception_flags |= flags;'. 46------------------------------------------------------------------------------- 47*/ 48void float_raise( int8 flags ) 49{ 50 51 float_exception_flags |= flags; 52 53} 54 55/* 56------------------------------------------------------------------------------- 57Internal canonical NaN format. 58------------------------------------------------------------------------------- 59*/ 60typedef struct { 61 flag sign; 62 bits64 high, low; 63} commonNaNT; 64 65/* 66------------------------------------------------------------------------------- 67The pattern for a default generated single-precision NaN. 68------------------------------------------------------------------------------- 69*/ 70#define float32_default_nan 0xFFFFFFFF 71 72/* 73------------------------------------------------------------------------------- 74Returns 1 if the single-precision floating-point value `a' is a NaN; 75otherwise returns 0. 76------------------------------------------------------------------------------- 77*/ 78flag float32_is_nan( float32 a ) 79{ 80 81 return ( 0xFF000000 < (bits32) ( a<<1 ) ); 82 83} 84 85/* 86------------------------------------------------------------------------------- 87Returns 1 if the single-precision floating-point value `a' is a signaling 88NaN; otherwise returns 0. 89------------------------------------------------------------------------------- 90*/ 91flag float32_is_signaling_nan( float32 a ) 92{ 93 94 return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF ); 95 96} 97 98/* 99------------------------------------------------------------------------------- 100Returns the result of converting the single-precision floating-point NaN 101`a' to the canonical NaN format. If `a' is a signaling NaN, the invalid 102exception is raised. 103------------------------------------------------------------------------------- 104*/ 105static commonNaNT float32ToCommonNaN( float32 a ) 106{ 107 commonNaNT z; 108 109 if ( float32_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); 110 z.sign = a>>31; 111 z.low = 0; 112 z.high = ( (bits64) a )<<41; 113 return z; 114 115} 116 117/* 118------------------------------------------------------------------------------- 119Returns the result of converting the canonical NaN `a' to the single- 120precision floating-point format. 121------------------------------------------------------------------------------- 122*/ 123static float32 commonNaNToFloat32( commonNaNT a ) 124{ 125 126 return ( ( (bits32) a.sign )<<31 ) | 0x7FC00000 | ( a.high>>41 ); 127 128} 129 130/* 131------------------------------------------------------------------------------- 132Takes two single-precision floating-point values `a' and `b', one of which 133is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a 134signaling NaN, the invalid exception is raised. 135------------------------------------------------------------------------------- 136*/ 137static float32 propagateFloat32NaN( float32 a, float32 b ) 138{ 139 flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; 140 141 aIsNaN = float32_is_nan( a ); 142 aIsSignalingNaN = float32_is_signaling_nan( a ); 143 bIsNaN = float32_is_nan( b ); 144 bIsSignalingNaN = float32_is_signaling_nan( b ); 145 a |= 0x00400000; 146 b |= 0x00400000; 147 if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); 148 if ( aIsNaN ) { 149 return ( aIsSignalingNaN & bIsNaN ) ? b : a; 150 } 151 else { 152 return b; 153 } 154 155} 156 157/* 158------------------------------------------------------------------------------- 159The pattern for a default generated double-precision NaN. 160------------------------------------------------------------------------------- 161*/ 162#define float64_default_nan LIT64( 0xFFFFFFFFFFFFFFFF ) 163 164/* 165------------------------------------------------------------------------------- 166Returns 1 if the double-precision floating-point value `a' is a NaN; 167otherwise returns 0. 168------------------------------------------------------------------------------- 169*/ 170flag float64_is_nan( float64 a ) 171{ 172 173 return ( LIT64( 0xFFE0000000000000 ) < (bits64) ( a<<1 ) ); 174 175} 176 177/* 178------------------------------------------------------------------------------- 179Returns 1 if the double-precision floating-point value `a' is a signaling 180NaN; otherwise returns 0. 181------------------------------------------------------------------------------- 182*/ 183flag float64_is_signaling_nan( float64 a ) 184{ 185 186 return 187 ( ( ( a>>51 ) & 0xFFF ) == 0xFFE ) 188 && ( a & LIT64( 0x0007FFFFFFFFFFFF ) ); 189 190} 191 192/* 193------------------------------------------------------------------------------- 194Returns the result of converting the double-precision floating-point NaN 195`a' to the canonical NaN format. If `a' is a signaling NaN, the invalid 196exception is raised. 197------------------------------------------------------------------------------- 198*/ 199static commonNaNT float64ToCommonNaN( float64 a ) 200{ 201 commonNaNT z; 202 203 if ( float64_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); 204 z.sign = a>>63; 205 z.low = 0; 206 z.high = a<<12; 207 return z; 208 209} 210 211/* 212------------------------------------------------------------------------------- 213Returns the result of converting the canonical NaN `a' to the double- 214precision floating-point format. 215------------------------------------------------------------------------------- 216*/ 217static float64 commonNaNToFloat64( commonNaNT a ) 218{ 219 220 return 221 ( ( (bits64) a.sign )<<63 ) 222 | LIT64( 0x7FF8000000000000 ) 223 | ( a.high>>12 ); 224 225} 226 227/* 228------------------------------------------------------------------------------- 229Takes two double-precision floating-point values `a' and `b', one of which 230is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a 231signaling NaN, the invalid exception is raised. 232------------------------------------------------------------------------------- 233*/ 234static float64 propagateFloat64NaN( float64 a, float64 b ) 235{ 236 flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; 237 238 aIsNaN = float64_is_nan( a ); 239 aIsSignalingNaN = float64_is_signaling_nan( a ); 240 bIsNaN = float64_is_nan( b ); 241 bIsSignalingNaN = float64_is_signaling_nan( b ); 242 a |= LIT64( 0x0008000000000000 ); 243 b |= LIT64( 0x0008000000000000 ); 244 if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); 245 if ( aIsNaN ) { 246 return ( aIsSignalingNaN & bIsNaN ) ? b : a; 247 } 248 else { 249 return b; 250 } 251 252} 253 254#ifdef FLOATX80 255 256/* 257------------------------------------------------------------------------------- 258The pattern for a default generated extended double-precision NaN. The 259`high' and `low' values hold the most- and least-significant bits, 260respectively. 261------------------------------------------------------------------------------- 262*/ 263#define floatx80_default_nan_high 0xFFFF 264#define floatx80_default_nan_low LIT64( 0xFFFFFFFFFFFFFFFF ) 265 266/* 267------------------------------------------------------------------------------- 268Returns 1 if the extended double-precision floating-point value `a' is a 269NaN; otherwise returns 0. 270------------------------------------------------------------------------------- 271*/ 272flag floatx80_is_nan( floatx80 a ) 273{ 274 275 return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (bits64) ( a.low<<1 ); 276 277} 278 279/* 280------------------------------------------------------------------------------- 281Returns 1 if the extended double-precision floating-point value `a' is a 282signaling NaN; otherwise returns 0. 283------------------------------------------------------------------------------- 284*/ 285flag floatx80_is_signaling_nan( floatx80 a ) 286{ 287 bits64 aLow; 288 289 aLow = a.low & ~ LIT64( 0x4000000000000000 ); 290 return 291 ( ( a.high & 0x7FFF ) == 0x7FFF ) 292 && (bits64) ( aLow<<1 ) 293 && ( a.low == aLow ); 294 295} 296 297/* 298------------------------------------------------------------------------------- 299Returns the result of converting the extended double-precision floating- 300point NaN `a' to the canonical NaN format. If `a' is a signaling NaN, the 301invalid exception is raised. 302------------------------------------------------------------------------------- 303*/ 304static commonNaNT floatx80ToCommonNaN( floatx80 a ) 305{ 306 commonNaNT z; 307 308 if ( floatx80_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); 309 z.sign = a.high>>15; 310 z.low = 0; 311 z.high = a.low<<1; 312 return z; 313 314} 315 316/* 317------------------------------------------------------------------------------- 318Returns the result of converting the canonical NaN `a' to the extended 319double-precision floating-point format. 320------------------------------------------------------------------------------- 321*/ 322static floatx80 commonNaNToFloatx80( commonNaNT a ) 323{ 324 floatx80 z; 325 326 z.low = LIT64( 0xC000000000000000 ) | ( a.high>>1 ); 327 z.high = ( ( (bits16) a.sign )<<15 ) | 0x7FFF; 328 return z; 329 330} 331 332/* 333------------------------------------------------------------------------------- 334Takes two extended double-precision floating-point values `a' and `b', one 335of which is a NaN, and returns the appropriate NaN result. If either `a' or 336`b' is a signaling NaN, the invalid exception is raised. 337------------------------------------------------------------------------------- 338*/ 339static floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b ) 340{ 341 flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; 342 343 aIsNaN = floatx80_is_nan( a ); 344 aIsSignalingNaN = floatx80_is_signaling_nan( a ); 345 bIsNaN = floatx80_is_nan( b ); 346 bIsSignalingNaN = floatx80_is_signaling_nan( b ); 347 a.low |= LIT64( 0xC000000000000000 ); 348 b.low |= LIT64( 0xC000000000000000 ); 349 if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); 350 if ( aIsNaN ) { 351 return ( aIsSignalingNaN & bIsNaN ) ? b : a; 352 } 353 else { 354 return b; 355 } 356 357} 358 359#endif 360 361#ifdef FLOAT128 362 363/* 364------------------------------------------------------------------------------- 365The pattern for a default generated quadruple-precision NaN. The `high' and 366`low' values hold the most- and least-significant bits, respectively. 367------------------------------------------------------------------------------- 368*/ 369#define float128_default_nan_high LIT64( 0xFFFFFFFFFFFFFFFF ) 370#define float128_default_nan_low LIT64( 0xFFFFFFFFFFFFFFFF ) 371 372/* 373------------------------------------------------------------------------------- 374Returns 1 if the quadruple-precision floating-point value `a' is a NaN; 375otherwise returns 0. 376------------------------------------------------------------------------------- 377*/ 378flag float128_is_nan( float128 a ) 379{ 380 381 return 382 ( LIT64( 0xFFFE000000000000 ) <= (bits64) ( a.high<<1 ) ) 383 && ( a.low || ( a.high & LIT64( 0x0000FFFFFFFFFFFF ) ) ); 384 385} 386 387/* 388------------------------------------------------------------------------------- 389Returns 1 if the quadruple-precision floating-point value `a' is a 390signaling NaN; otherwise returns 0. 391------------------------------------------------------------------------------- 392*/ 393flag float128_is_signaling_nan( float128 a ) 394{ 395 396 return 397 ( ( ( a.high>>47 ) & 0xFFFF ) == 0xFFFE ) 398 && ( a.low || ( a.high & LIT64( 0x00007FFFFFFFFFFF ) ) ); 399 400} 401 402/* 403------------------------------------------------------------------------------- 404Returns the result of converting the quadruple-precision floating-point NaN 405`a' to the canonical NaN format. If `a' is a signaling NaN, the invalid 406exception is raised. 407------------------------------------------------------------------------------- 408*/ 409static commonNaNT float128ToCommonNaN( float128 a ) 410{ 411 commonNaNT z; 412 413 if ( float128_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); 414 z.sign = a.high>>63; 415 shortShift128Left( a.high, a.low, 16, &z.high, &z.low ); 416 return z; 417 418} 419 420/* 421------------------------------------------------------------------------------- 422Returns the result of converting the canonical NaN `a' to the quadruple- 423precision floating-point format. 424------------------------------------------------------------------------------- 425*/ 426static float128 commonNaNToFloat128( commonNaNT a ) 427{ 428 float128 z; 429 430 shift128Right( a.high, a.low, 16, &z.high, &z.low ); 431 z.high |= ( ( (bits64) a.sign )<<63 ) | LIT64( 0x7FFF800000000000 ); 432 return z; 433 434} 435 436/* 437------------------------------------------------------------------------------- 438Takes two quadruple-precision floating-point values `a' and `b', one of 439which is a NaN, and returns the appropriate NaN result. If either `a' or 440`b' is a signaling NaN, the invalid exception is raised. 441------------------------------------------------------------------------------- 442*/ 443static float128 propagateFloat128NaN( float128 a, float128 b ) 444{ 445 flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; 446 447 aIsNaN = float128_is_nan( a ); 448 aIsSignalingNaN = float128_is_signaling_nan( a ); 449 bIsNaN = float128_is_nan( b ); 450 bIsSignalingNaN = float128_is_signaling_nan( b ); 451 a.high |= LIT64( 0x0000800000000000 ); 452 b.high |= LIT64( 0x0000800000000000 ); 453 if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); 454 if ( aIsNaN ) { 455 return ( aIsSignalingNaN & bIsNaN ) ? b : a; 456 } 457 else { 458 return b; 459 } 460 461} 462 463#endif 464 465