1 /* $NetBSD: systfloat.c,v 1.8 2008/04/28 20:23:04 martin Exp $ */ 2 3 /* This is a derivative work. */ 4 5 /*- 6 * Copyright (c) 2001 The NetBSD Foundation, Inc. 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to The NetBSD Foundation 10 * by Ross Harvey. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 /* 35 =============================================================================== 36 37 This C source file is part of TestFloat, Release 2a, a package of programs 38 for testing the correctness of floating-point arithmetic complying to the 39 IEC/IEEE Standard for Floating-Point. 40 41 Written by John R. Hauser. More information is available through the Web 42 page `http://HTTP.CS.Berkeley.EDU/~jhauser/arithmetic/TestFloat.html'. 43 44 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort 45 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT 46 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO 47 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY 48 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. 49 50 Derivative works are acceptable, even for commercial purposes, so long as 51 (1) they include prominent notice that the work is derivative, and (2) they 52 include prominent notice akin to these four paragraphs for those parts of 53 this code that are retained. 54 55 =============================================================================== 56 */ 57 58 #include <sys/cdefs.h> 59 #ifndef __lint 60 __RCSID("$NetBSD: systfloat.c,v 1.8 2008/04/28 20:23:04 martin Exp $"); 61 #endif 62 63 #include <math.h> 64 #include <ieeefp.h> 65 #include "milieu.h" 66 #include "softfloat.h" 67 #include "systfloat.h" 68 #include "systflags.h" 69 #include "systmodes.h" 70 71 typedef union { 72 float32 f32; 73 float f; 74 } union32; 75 typedef union { 76 float64 f64; 77 double d; 78 } union64; 79 #if defined( FLOATX80 ) && defined( LONG_DOUBLE_IS_FLOATX80 ) 80 typedef union { 81 floatx80 fx80; 82 long double ld; 83 } unionx80; 84 #endif 85 #if defined( FLOAT128 ) && defined( LONG_DOUBLE_IS_FLOAT128 ) 86 typedef union { 87 float128 f128; 88 long double ld; 89 } union128; 90 #endif 91 92 fp_except 93 syst_float_flags_clear(void) 94 { 95 return fpsetsticky(0) 96 & (FP_X_IMP | FP_X_UFL | FP_X_OFL | FP_X_DZ | FP_X_INV); 97 } 98 99 void 100 syst_float_set_rounding_mode(fp_rnd direction) 101 { 102 fpsetround(direction); 103 fpsetmask(0); 104 } 105 106 float32 syst_int32_to_float32( int32 a ) 107 { 108 const union32 uz = { .f = a }; 109 110 return uz.f32; 111 112 } 113 114 float64 syst_int32_to_float64( int32 a ) 115 { 116 const union64 uz = { .d = a }; 117 118 return uz.f64; 119 120 } 121 122 #if defined( FLOATX80 ) && defined( LONG_DOUBLE_IS_FLOATX80 ) 123 124 floatx80 syst_int32_to_floatx80( int32 a ) 125 { 126 const unionx80 uz = { .ld = a }; 127 128 return uz.fx80; 129 130 } 131 132 #endif 133 134 #if defined( FLOAT128 ) && defined( LONG_DOUBLE_IS_FLOAT128 ) 135 136 float128 syst_int32_to_float128( int32 a ) 137 { 138 const union128 uz = { .ld = a }; 139 140 return uz.f128; 141 142 } 143 144 #endif 145 146 #ifdef BITS64 147 148 float32 syst_int64_to_float32( int64 a ) 149 { 150 const union32 uz = { .f = a }; 151 152 return uz.f32; 153 } 154 155 float64 syst_int64_to_float64( int64 a ) 156 { 157 const union64 uz = { .d = a }; 158 159 return uz.f64; 160 } 161 162 #if defined( FLOATX80 ) && defined( LONG_DOUBLE_IS_FLOATX80 ) 163 164 floatx80 syst_int64_to_floatx80( int64 a ) 165 { 166 const unionx80 uz = { .ld = a }; 167 168 return uz.fx80; 169 } 170 171 #endif 172 173 #if defined( FLOAT128 ) && defined( LONG_DOUBLE_IS_FLOAT128 ) 174 175 float128 syst_int64_to_float128( int64 a ) 176 { 177 const union128 uz = { .ld = a }; 178 179 return uz.f128; 180 } 181 182 #endif 183 184 #endif 185 186 int32 syst_float32_to_int32_round_to_zero( float32 a ) 187 { 188 const union32 uz = { .f32 = a }; 189 190 return uz.f; 191 192 } 193 194 #ifdef BITS64 195 196 int64 syst_float32_to_int64_round_to_zero( float32 a ) 197 { 198 const union32 uz = { .f32 = a }; 199 200 return uz.f; 201 } 202 203 #endif 204 205 float64 syst_float32_to_float64( float32 a ) 206 { 207 const union32 ua = { .f32 = a }; 208 union64 uz; 209 210 uz.d = ua.f; 211 return uz.f64; 212 213 } 214 215 #if defined( FLOATX80 ) && defined( LONG_DOUBLE_IS_FLOATX80 ) 216 217 floatx80 syst_float32_to_floatx80( float32 a ) 218 { 219 const union32 ua = { .f32 = a }; 220 unionx80 uz; 221 222 uz.ld = ua.f; 223 return uz.fx80; 224 } 225 226 #endif 227 228 #if defined( FLOAT128 ) && defined( LONG_DOUBLE_IS_FLOAT128 ) 229 230 float128 syst_float32_to_float128( float32 a ) 231 { 232 const union32 ua = { .f32 = a }; 233 union128 ub; 234 235 ub.ld = ua.f; 236 return ub.f128; 237 } 238 239 #endif 240 241 float32 syst_float32_add( float32 a, float32 b ) 242 { 243 const union32 ua = { .f32 = a }, ub = { .f32 = b }; 244 union32 uz; 245 246 uz.f = ua.f + ub.f; 247 return uz.f32; 248 } 249 250 float32 syst_float32_sub( float32 a, float32 b ) 251 { 252 const union32 ua = { .f32 = a }, ub = { .f32 = b }; 253 union32 uz; 254 255 uz.f = ua.f - ub.f; 256 return uz.f32; 257 } 258 259 float32 syst_float32_mul( float32 a, float32 b ) 260 { 261 const union32 ua = { .f32 = a }, ub = { .f32 = b }; 262 union32 uz; 263 264 uz.f = ua.f * ub.f; 265 return uz.f32; 266 } 267 268 float32 syst_float32_div( float32 a, float32 b ) 269 { 270 const union32 ua = { .f32 = a }, ub = { .f32 = b }; 271 union32 uz; 272 273 uz.f = ua.f / ub.f; 274 return uz.f32; 275 } 276 277 flag syst_float32_eq( float32 a, float32 b ) 278 { 279 const union32 ua = { .f32 = a }, ub = { .f32 = b }; 280 281 return ua.f == ub.f; 282 } 283 284 flag syst_float32_le( float32 a, float32 b ) 285 { 286 const union32 ua = { .f32 = a }, ub = { .f32 = b }; 287 288 return ua.f <= ub.f; 289 } 290 291 flag syst_float32_lt( float32 a, float32 b ) 292 { 293 const union32 ua = { .f32 = a }, ub = { .f32 = b }; 294 295 return ua.f < ub.f; 296 } 297 298 int32 syst_float64_to_int32_round_to_zero( float64 a ) 299 { 300 const union64 uz = { .f64 = a }; 301 302 return uz.d; 303 } 304 305 #ifdef BITS64 306 307 int64 syst_float64_to_int64_round_to_zero( float64 a ) 308 { 309 const union64 uz = { .f64 = a }; 310 311 return uz.d; 312 } 313 314 #endif 315 316 float32 syst_float64_to_float32( float64 a ) 317 { 318 const union64 ua = { .f64 = a }; 319 union32 uz; 320 321 uz.f = ua.d; 322 return uz.f32; 323 } 324 325 #if defined( FLOATX80 ) && defined( LONG_DOUBLE_IS_FLOATX80 ) 326 327 floatx80 syst_float64_to_floatx80( float64 a ) 328 { 329 const union64 ua = { .f64 = a }; 330 unionx80 u; 331 332 u.ld = ua.d; 333 return u.fx80; 334 } 335 336 #endif 337 338 #if defined( FLOAT128 ) && defined( LONG_DOUBLE_IS_FLOAT128 ) 339 340 float128 syst_float64_to_float128( float64 a ) 341 { 342 const union64 ua = { .f64 = a }; 343 union128 uz; 344 345 uz.ld = ua.d; 346 return uz.f128; 347 } 348 349 #endif 350 351 float64 syst_float64_add( float64 a, float64 b ) 352 { 353 const union64 ua = { .f64 = a }, ub = { .f64 = b }; 354 union64 uz; 355 356 uz.d = ua.d + ub.d; 357 return uz.f64; 358 } 359 360 float64 syst_float64_sub( float64 a, float64 b ) 361 { 362 const union64 ua = { .f64 = a }, ub = { .f64 = b }; 363 union64 uz; 364 365 uz.d = ua.d - ub.d; 366 return uz.f64; 367 } 368 369 float64 syst_float64_mul( float64 a, float64 b ) 370 { 371 const union64 ua = { .f64 = a }, ub = { .f64 = b }; 372 union64 uz; 373 374 uz.d = ua.d * ub.d; 375 return uz.f64; 376 } 377 378 float64 syst_float64_div( float64 a, float64 b ) 379 { 380 const union64 ua = { .f64 = a }, ub = { .f64 = b }; 381 union64 uz; 382 383 uz.d = ua.d / ub.d; 384 return uz.f64; 385 } 386 387 float64 syst_float64_sqrt( float64 a ) 388 { 389 const union64 ua = { .f64 = a }; 390 union64 uz; 391 392 uz.d = sqrt(ua.d); 393 return uz.f64; 394 } 395 396 flag syst_float64_eq( float64 a, float64 b ) 397 { 398 const union64 ua = { .f64 = a }, ub = { .f64 = b }; 399 400 return ua.d == ub.d; 401 } 402 403 flag syst_float64_le( float64 a, float64 b ) 404 { 405 const union64 ua = { .f64 = a }, ub = { .f64 = b }; 406 407 return ua.d <= ub.d; 408 } 409 410 flag syst_float64_lt( float64 a, float64 b ) 411 { 412 const union64 ua = { .f64 = a }, ub = { .f64 = b }; 413 414 return ua.d < ub.d; 415 } 416 417 #if defined( FLOATX80 ) && defined( LONG_DOUBLE_IS_FLOATX80 ) 418 419 int32 syst_floatx80_to_int32_round_to_zero( floatx80 a ) 420 { 421 const unionx80 uz = { .fx80 = a }; 422 423 return uz.ld; 424 } 425 426 #ifdef BITS64 427 428 int64 syst_floatx80_to_int64_round_to_zero( floatx80 a ) 429 { 430 const unionx80 uz = { .fx80 = a }; 431 432 return uz.ld; 433 } 434 435 #endif 436 437 float32 syst_floatx80_to_float32( floatx80 a ) 438 { 439 const unionx80 ua = { .fx80 = a }; 440 union32 uz; 441 442 uz.f = ua.ld; 443 return uz.f32; 444 } 445 446 float64 syst_floatx80_to_float64( floatx80 a ) 447 { 448 const unionx80 ua = { .fx80 = a }; 449 union64 uz; 450 451 uz.d = ua.ld; 452 return uz.f64; 453 } 454 455 floatx80 syst_floatx80_add( floatx80 a, floatx80 b ) 456 { 457 const unionx80 ua = { .fx80 = a }, ub = { .fx80 = b }; 458 unionx80 uz; 459 460 uz.ld = ua.ld + ub.ld; 461 return uz.fx80; 462 } 463 464 floatx80 syst_floatx80_sub( floatx80 a, floatx80 b ) 465 { 466 const unionx80 ua = { .fx80 = a }, ub = { .fx80 = b }; 467 unionx80 uz; 468 469 uz.ld = ua.ld - ub.ld; 470 return uz.fx80; 471 } 472 473 floatx80 syst_floatx80_mul( floatx80 a, floatx80 b ) 474 { 475 const unionx80 ua = { .fx80 = a }, ub = { .fx80 = b }; 476 unionx80 uz; 477 478 uz.ld = ua.ld * ub.ld; 479 return uz.fx80; 480 } 481 482 floatx80 syst_floatx80_div( floatx80 a, floatx80 b ) 483 { 484 const unionx80 ua = { .fx80 = a }, ub = { .fx80 = b }; 485 unionx80 uz; 486 487 uz.ld = ua.ld / ub.ld; 488 return uz.fx80; 489 } 490 491 flag syst_floatx80_eq( floatx80 a, floatx80 b ) 492 { 493 const unionx80 ua = { .fx80 = a }, ub = { .fx80 = b }; 494 495 return ua.ld == ub.ld; 496 } 497 498 flag syst_floatx80_le( floatx80 a, floatx80 b ) 499 { 500 const unionx80 ua = { .fx80 = a }, ub = { .fx80 = b }; 501 502 return ua.ld <= ub.ld; 503 } 504 505 flag syst_floatx80_lt( floatx80 a, floatx80 b ) 506 { 507 const unionx80 ua = { .fx80 = a }, ub = { .fx80 = b }; 508 509 return ua.ld < ub.ld; 510 } 511 512 #endif 513 514 #if defined( FLOAT128 ) && defined( LONG_DOUBLE_IS_FLOAT128 ) 515 516 int32 syst_float128_to_int32_round_to_zero( float128 a ) 517 { 518 const union128 ua = { .f128 = a }; 519 520 return ua.ld; 521 } 522 523 #ifdef BITS64 524 525 int64 syst_float128_to_int64_round_to_zero( float128 a ) 526 { 527 const union128 ua = { .f128 = a }; 528 529 return ua.ld; 530 } 531 532 #endif 533 534 float32 syst_float128_to_float32( float128 a ) 535 { 536 const union128 ua = { .f128 = a }; 537 union32 uz; 538 539 uz.f = ua.ld; 540 return uz.f32; 541 542 } 543 544 float64 syst_float128_to_float64( float128 a ) 545 { 546 const union128 ua = { .f128 = a }; 547 union64 uz; 548 549 uz.d = ua.ld; 550 return uz.f64; 551 } 552 553 float128 syst_float128_add( float128 a, float128 b ) 554 { 555 const union128 ua = { .f128 = a }, ub = { .f128 = b }; 556 union128 uz; 557 558 uz.ld = ua.ld + ub.ld; 559 return uz.f128; 560 561 } 562 563 float128 syst_float128_sub( float128 a, float128 b ) 564 { 565 const union128 ua = { .f128 = a }, ub = { .f128 = b }; 566 union128 uz; 567 568 uz.ld = ua.ld - ub.ld; 569 return uz.f128; 570 } 571 572 float128 syst_float128_mul( float128 a, float128 b ) 573 { 574 const union128 ua = { .f128 = a }, ub = { .f128 = b }; 575 union128 uz; 576 577 uz.ld = ua.ld * ub.ld; 578 return uz.f128; 579 } 580 581 float128 syst_float128_div( float128 a, float128 b ) 582 { 583 const union128 ua = { .f128 = a }, ub = { .f128 = b }; 584 union128 uz; 585 586 uz.ld = ua.ld / ub.ld; 587 return uz.f128; 588 } 589 590 flag syst_float128_eq( float128 a, float128 b ) 591 { 592 const union128 ua = { .f128 = a }, ub = { .f128 = b }; 593 594 return ua.ld == ub.ld; 595 } 596 597 flag syst_float128_le( float128 a, float128 b ) 598 { 599 const union128 ua = { .f128 = a }, ub = { .f128 = b }; 600 601 return ua.ld <= ub.ld; 602 } 603 604 flag syst_float128_lt( float128 a, float128 b ) 605 { 606 const union128 ua = { .f128 = a }, ub = { .f128 = b }; 607 608 return ua.ld < ub.ld; 609 } 610 611 #endif 612