1 /* $NetBSD: fenv.c,v 1.6 2013/11/11 00:31:51 joerg Exp $ */ 2 3 /*- 4 * Copyright (c) 2004-2005 David Schultz <das@FreeBSD.ORG> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __RCSID("$NetBSD: fenv.c,v 1.6 2013/11/11 00:31:51 joerg Exp $"); 31 32 #include <sys/param.h> 33 #include <sys/sysctl.h> 34 #include <assert.h> 35 #include <fenv.h> 36 #include <stddef.h> 37 #include <string.h> 38 39 /* Load x87 Control Word */ 40 #define __fldcw(__cw) __asm__ __volatile__ \ 41 ("fldcw %0" : : "m" (__cw)) 42 43 /* No-Wait Store Control Word */ 44 #define __fnstcw(__cw) __asm__ __volatile__ \ 45 ("fnstcw %0" : "=m" (*(__cw))) 46 47 /* No-Wait Store Status Word */ 48 #define __fnstsw(__sw) __asm__ __volatile__ \ 49 ("fnstsw %0" : "=am" (*(__sw))) 50 51 /* No-Wait Clear Exception Flags */ 52 #define __fnclex() __asm__ __volatile__ \ 53 ("fnclex") 54 55 /* Load x87 Environment */ 56 #define __fldenv(__env) __asm__ __volatile__ \ 57 ("fldenv %0" : : "m" (__env)) 58 59 /* No-Wait Store x87 environment */ 60 #define __fnstenv(__env) __asm__ __volatile__ \ 61 ("fnstenv %0" : "=m" (*(__env))) 62 63 /* Check for and handle pending unmasked x87 pending FPU exceptions */ 64 #define __fwait(__env) __asm__ __volatile__ \ 65 ("fwait") 66 67 /* Load the MXCSR register */ 68 #define __ldmxcsr(__mxcsr) __asm__ __volatile__ \ 69 ("ldmxcsr %0" : : "m" (__mxcsr)) 70 71 /* Store the MXCSR register state */ 72 #define __stmxcsr(__mxcsr) __asm__ __volatile__ \ 73 ("stmxcsr %0" : "=m" (*(__mxcsr))) 74 75 /* 76 * The following constant represents the default floating-point environment 77 * (that is, the one installed at program startup) and has type pointer to 78 * const-qualified fenv_t. 79 * 80 * It can be used as an argument to the functions within the <fenv.h> header 81 * that manage the floating-point environment, namely fesetenv() and 82 * feupdateenv(). 83 * 84 * x87 fpu registers are 16bit wide. The upper bits, 31-16, are marked as 85 * RESERVED. We provide a partial floating-point environment, where we 86 * define only the lower bits. The reserved bits are extracted and set by the 87 * consumers of FE_DFL_ENV, during runtime. 88 */ 89 fenv_t __fe_dfl_env = { 90 { 91 __NetBSD_NPXCW__, /* Control word register */ 92 0x0, /* Unused */ 93 0x0000, /* Status word register */ 94 0x0, /* Unused */ 95 0x0000ffff, /* Tag word register */ 96 0x0, /* Unused */ 97 { 98 0x0000, 0x0000, 99 0x0000, 0xffff 100 } 101 }, 102 __INITIAL_MXCSR__ /* MXCSR register */ 103 }; 104 105 /* 106 * Test for SSE support on this processor. 107 * 108 * We need to use ldmxcsr/stmxcsr to get correct results if any part 109 * of the program was compiled to use SSE floating-point, but we can't 110 * use SSE on older processors. 111 * 112 * In order to do so, we need to query the processor capabilities via the CPUID 113 * instruction. We can make it even simpler though, by querying the machdep.sse 114 * sysctl. 115 */ 116 static int __HAS_SSE = 0; 117 118 static void __init_libm(void) __attribute__ ((constructor, used)); 119 120 static void __init_libm(void) 121 { 122 #if !defined(__minix) 123 size_t oldlen = sizeof(__HAS_SSE); 124 int rv; 125 uint16_t control; 126 127 rv = sysctlbyname("machdep.sse", &__HAS_SSE, &oldlen, NULL, 0); 128 if (rv == -1) 129 __HAS_SSE = 0; 130 #else 131 uint16_t control; 132 __HAS_SSE = 0; 133 #endif /* !defined(__minix) */ 134 135 __fnstcw(&control); 136 __fe_dfl_env.x87.control = control; 137 } 138 139 /* 140 * The feclearexcept() function clears the supported floating-point exceptions 141 * represented by `excepts'. 142 */ 143 int 144 feclearexcept(int excepts) 145 { 146 fenv_t env; 147 uint32_t mxcsr; 148 int ex; 149 150 _DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0); 151 152 ex = excepts & FE_ALL_EXCEPT; 153 154 /* It's ~3x faster to call fnclex, than store/load fp env */ 155 if (ex == FE_ALL_EXCEPT) { 156 __fnclex(); 157 } else { 158 __fnstenv(&env); 159 env.x87.status &= ~ex; 160 __fldenv(env); 161 } 162 163 if (__HAS_SSE) { 164 __stmxcsr(&mxcsr); 165 mxcsr &= ~ex; 166 __ldmxcsr(mxcsr); 167 } 168 169 /* Success */ 170 return (0); 171 } 172 173 /* 174 * The fegetexceptflag() function stores an implementation-defined 175 * representation of the states of the floating-point status flags indicated by 176 * the argument excepts in the object pointed to by the argument flagp. 177 */ 178 int 179 fegetexceptflag(fexcept_t *flagp, int excepts) 180 { 181 uint32_t mxcsr; 182 uint16_t status; 183 int ex; 184 185 _DIAGASSERT(flagp != NULL); 186 _DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0); 187 188 ex = excepts & FE_ALL_EXCEPT; 189 190 __fnstsw(&status); 191 if (__HAS_SSE) 192 __stmxcsr(&mxcsr); 193 else 194 mxcsr = 0; 195 196 *flagp = (mxcsr | status) & ex; 197 198 /* Success */ 199 return (0); 200 } 201 202 /* 203 * The feraiseexcept() function raises the supported floating-point exceptions 204 * represented by the argument `excepts'. 205 * 206 * The standard explicitly allows us to execute an instruction that has the 207 * exception as a side effect, but we choose to manipulate the status register 208 * directly. 209 * 210 * The validation of input is being deferred to fesetexceptflag(). 211 */ 212 int 213 feraiseexcept(int excepts) 214 { 215 fexcept_t ex; 216 217 _DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0); 218 219 ex = excepts & FE_ALL_EXCEPT; 220 fesetexceptflag(&ex, excepts); 221 __fwait(); 222 223 /* Success */ 224 return (0); 225 } 226 227 /* 228 * This function sets the floating-point status flags indicated by the argument 229 * `excepts' to the states stored in the object pointed to by `flagp'. It does 230 * NOT raise any floating-point exceptions, but only sets the state of the flags. 231 */ 232 int 233 fesetexceptflag(const fexcept_t *flagp, int excepts) 234 { 235 fenv_t env; 236 uint32_t mxcsr; 237 int ex; 238 239 _DIAGASSERT(flagp != NULL); 240 _DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0); 241 242 ex = excepts & FE_ALL_EXCEPT; 243 244 __fnstenv(&env); 245 env.x87.status &= ~ex; 246 env.x87.status |= *flagp & ex; 247 __fldenv(env); 248 249 if (__HAS_SSE) { 250 __stmxcsr(&mxcsr); 251 mxcsr &= ~ex; 252 mxcsr |= *flagp & ex; 253 __ldmxcsr(mxcsr); 254 } 255 256 /* Success */ 257 return (0); 258 } 259 260 /* 261 * The fetestexcept() function determines which of a specified subset of the 262 * floating-point exception flags are currently set. The `excepts' argument 263 * specifies the floating-point status flags to be queried. 264 */ 265 int 266 fetestexcept(int excepts) 267 { 268 uint32_t mxcsr; 269 uint16_t status; 270 int ex; 271 272 _DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0); 273 274 ex = excepts & FE_ALL_EXCEPT; 275 276 __fnstsw(&status); 277 if (__HAS_SSE) 278 __stmxcsr(&mxcsr); 279 else 280 mxcsr = 0; 281 282 return ((status | mxcsr) & ex); 283 } 284 285 int 286 fegetround(void) 287 { 288 uint16_t control; 289 290 /* 291 * We assume that the x87 and the SSE unit agree on the 292 * rounding mode. Reading the control word on the x87 turns 293 * out to be about 5 times faster than reading it on the SSE 294 * unit on an Opteron 244. 295 */ 296 __fnstcw(&control); 297 298 return (control & __X87_ROUND_MASK); 299 } 300 301 /* 302 * The fesetround() function shall establish the rounding direction represented 303 * by its argument round. If the argument is not equal to the value of a 304 * rounding direction macro, the rounding direction is not changed. 305 */ 306 int 307 fesetround(int round) 308 { 309 uint32_t mxcsr; 310 uint16_t control; 311 312 if (round & ~__X87_ROUND_MASK) { 313 /* Failure */ 314 return (-1); 315 } 316 317 __fnstcw(&control); 318 control &= ~__X87_ROUND_MASK; 319 control |= round; 320 __fldcw(control); 321 322 if (__HAS_SSE) { 323 __stmxcsr(&mxcsr); 324 mxcsr &= ~(__X87_ROUND_MASK << __SSE_ROUND_SHIFT); 325 mxcsr |= round << __SSE_ROUND_SHIFT; 326 __ldmxcsr(mxcsr); 327 } 328 329 /* Success */ 330 return (0); 331 } 332 333 /* 334 * The fegetenv() function attempts to store the current floating-point 335 * environment in the object pointed to by envp. 336 */ 337 int 338 fegetenv(fenv_t *envp) 339 { 340 uint32_t mxcsr; 341 342 _DIAGASSERT(flagp != NULL); 343 344 /* 345 * fnstenv masks all exceptions, so we need to restore the old control 346 * word to avoid this side effect. 347 */ 348 __fnstenv(envp); 349 __fldcw(envp->x87.control); 350 if (__HAS_SSE) { 351 __stmxcsr(&mxcsr); 352 envp->mxcsr = mxcsr; 353 } 354 355 /* Success */ 356 return (0); 357 } 358 359 /* 360 * The feholdexcept() function saves the current floating-point environment in 361 * the object pointed to by envp, clears the floating-point status flags, and 362 * then installs a non-stop (continue on floating-point exceptions) mode, if 363 * available, for all floating-point exceptions. 364 */ 365 int 366 feholdexcept(fenv_t *envp) 367 { 368 uint32_t mxcsr; 369 370 _DIAGASSERT(envp != NULL); 371 372 __fnstenv(envp); 373 __fnclex(); 374 if (__HAS_SSE) { 375 __stmxcsr(&mxcsr); 376 envp->mxcsr = mxcsr; 377 mxcsr &= ~FE_ALL_EXCEPT; 378 mxcsr |= FE_ALL_EXCEPT << __SSE_EMASK_SHIFT; 379 __ldmxcsr(mxcsr); 380 } 381 382 /* Success */ 383 return (0); 384 } 385 386 /* 387 * The fesetenv() function attempts to establish the floating-point environment 388 * represented by the object pointed to by envp. The argument `envp' points 389 * to an object set by a call to fegetenv() or feholdexcept(), or equal a 390 * floating-point environment macro. The fesetenv() function does not raise 391 * floating-point exceptions, but only installs the state of the floating-point 392 * status flags represented through its argument. 393 */ 394 int 395 fesetenv(const fenv_t *envp) 396 { 397 fenv_t env; 398 399 _DIAGASSERT(envp != NULL); 400 401 /* Store the x87 floating-point environment */ 402 memset(&env, 0, sizeof(env)); 403 __fnstenv(&env); 404 405 __fe_dfl_env.x87.unused1 = env.x87.unused1; 406 __fe_dfl_env.x87.unused2 = env.x87.unused2; 407 __fe_dfl_env.x87.unused3 = env.x87.unused3; 408 memcpy(__fe_dfl_env.x87.others, 409 env.x87.others, 410 sizeof(__fe_dfl_env.x87.others) / sizeof(uint32_t)); 411 412 __fldenv(envp->x87); 413 if (__HAS_SSE) 414 __ldmxcsr(envp->mxcsr); 415 416 /* Success */ 417 return (0); 418 } 419 420 /* 421 * The feupdateenv() function saves the currently raised floating-point 422 * exceptions in its automatic storage, installs the floating-point environment 423 * represented by the object pointed to by `envp', and then raises the saved 424 * floating-point exceptions. The argument `envp' shall point to an object set 425 * by a call to feholdexcept() or fegetenv(), or equal a floating-point 426 * environment macro. 427 */ 428 int 429 feupdateenv(const fenv_t *envp) 430 { 431 fenv_t env; 432 uint32_t mxcsr; 433 uint16_t status; 434 435 _DIAGASSERT(envp != NULL); 436 437 /* Store the x87 floating-point environment */ 438 memset(&env, 0, sizeof(env)); 439 __fnstenv(&env); 440 441 __fe_dfl_env.x87.unused1 = env.x87.unused1; 442 __fe_dfl_env.x87.unused2 = env.x87.unused2; 443 __fe_dfl_env.x87.unused3 = env.x87.unused3; 444 memcpy(__fe_dfl_env.x87.others, 445 env.x87.others, 446 sizeof(__fe_dfl_env.x87.others) / sizeof(uint32_t)); 447 448 __fnstsw(&status); 449 if (__HAS_SSE) 450 __stmxcsr(&mxcsr); 451 else 452 mxcsr = 0; 453 fesetenv(envp); 454 feraiseexcept((mxcsr | status) & FE_ALL_EXCEPT); 455 456 /* Success */ 457 return (0); 458 } 459 460 /* 461 * The following functions are extentions to the standard 462 */ 463 int 464 feenableexcept(int mask) 465 { 466 uint32_t mxcsr, omask; 467 uint16_t control; 468 469 mask &= FE_ALL_EXCEPT; 470 __fnstcw(&control); 471 if (__HAS_SSE) 472 __stmxcsr(&mxcsr); 473 else 474 mxcsr = 0; 475 476 omask = (control | mxcsr >> __SSE_EMASK_SHIFT) & FE_ALL_EXCEPT; 477 control &= ~mask; 478 __fldcw(control); 479 if (__HAS_SSE) { 480 mxcsr &= ~(mask << __SSE_EMASK_SHIFT); 481 __ldmxcsr(mxcsr); 482 } 483 484 return (FE_ALL_EXCEPT & ~omask); 485 } 486 487 int 488 fedisableexcept(int mask) 489 { 490 uint32_t mxcsr, omask; 491 uint16_t control; 492 493 mask &= FE_ALL_EXCEPT; 494 __fnstcw(&control); 495 if (__HAS_SSE) 496 __stmxcsr(&mxcsr); 497 else 498 mxcsr = 0; 499 500 omask = (control | mxcsr >> __SSE_EMASK_SHIFT) & FE_ALL_EXCEPT; 501 control |= mask; 502 __fldcw(control); 503 if (__HAS_SSE) { 504 mxcsr |= mask << __SSE_EMASK_SHIFT; 505 __ldmxcsr(mxcsr); 506 } 507 508 return (FE_ALL_EXCEPT & ~omask); 509 } 510 511 int 512 fegetexcept(void) 513 { 514 uint16_t control; 515 516 /* 517 * We assume that the masks for the x87 and the SSE unit are 518 * the same. 519 */ 520 __fnstcw(&control); 521 522 return (~control & FE_ALL_EXCEPT); 523 } 524