xref: /minix/lib/libm/arch/i387/fenv.c (revision ebfedea0)
1 /* $NetBSD: fenv.c,v 1.6 2013/11/11 00:31:51 joerg Exp $ */
2 
3 /*-
4  * Copyright (c) 2004-2005 David Schultz <das@FreeBSD.ORG>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __RCSID("$NetBSD: fenv.c,v 1.6 2013/11/11 00:31:51 joerg Exp $");
31 
32 #include <sys/param.h>
33 #include <sys/sysctl.h>
34 #include <assert.h>
35 #include <fenv.h>
36 #include <stddef.h>
37 #include <string.h>
38 
39 /* Load x87 Control Word */
40 #define	__fldcw(__cw)		__asm__ __volatile__	\
41 	("fldcw %0" : : "m" (__cw))
42 
43 /* No-Wait Store Control Word */
44 #define	__fnstcw(__cw)		__asm__ __volatile__	\
45 	("fnstcw %0" : "=m" (*(__cw)))
46 
47 /* No-Wait Store Status Word */
48 #define	__fnstsw(__sw)		__asm__ __volatile__	\
49 	("fnstsw %0" : "=am" (*(__sw)))
50 
51 /* No-Wait Clear Exception Flags */
52 #define	__fnclex()		__asm__ __volatile__	\
53 	("fnclex")
54 
55 /* Load x87 Environment */
56 #define	__fldenv(__env)		__asm__ __volatile__	\
57 	("fldenv %0" : : "m" (__env))
58 
59 /* No-Wait Store x87 environment */
60 #define	__fnstenv(__env)	__asm__ __volatile__	\
61 	("fnstenv %0" : "=m" (*(__env)))
62 
63 /* Check for and handle pending unmasked x87 pending FPU exceptions */
64 #define	__fwait(__env)		__asm__	__volatile__	\
65 	("fwait")
66 
67 /* Load the MXCSR register */
68 #define	__ldmxcsr(__mxcsr)	__asm__ __volatile__	\
69 	("ldmxcsr %0" : : "m" (__mxcsr))
70 
71 /* Store the MXCSR register state */
72 #define	__stmxcsr(__mxcsr)	__asm__ __volatile__	\
73 	("stmxcsr %0" : "=m" (*(__mxcsr)))
74 
75 /*
76  * The following constant represents the default floating-point environment
77  * (that is, the one installed at program startup) and has type pointer to
78  * const-qualified fenv_t.
79  *
80  * It can be used as an argument to the functions within the <fenv.h> header
81  * that manage the floating-point environment, namely fesetenv() and
82  * feupdateenv().
83  *
84  * x87 fpu registers are 16bit wide. The upper bits, 31-16, are marked as
85  * RESERVED. We provide a partial floating-point environment, where we
86  * define only the lower bits. The reserved bits are extracted and set by the
87  * consumers of FE_DFL_ENV, during runtime.
88  */
89 fenv_t __fe_dfl_env = {
90 	{
91 		__NetBSD_NPXCW__,       /* Control word register */
92 		0x0,			/* Unused */
93 		0x0000,                 /* Status word register */
94 		0x0,			/* Unused */
95 		0x0000ffff,             /* Tag word register */
96 		0x0,			/* Unused */
97 		{
98 			0x0000, 0x0000,
99 			0x0000, 0xffff
100 		}
101 	},
102 	__INITIAL_MXCSR__		/* MXCSR register */
103 };
104 
105 /*
106  * Test for SSE support on this processor.
107  *
108  * We need to use ldmxcsr/stmxcsr to get correct results if any part
109  * of the program was compiled to use SSE floating-point, but we can't
110  * use SSE on older processors.
111  *
112  * In order to do so, we need to query the processor capabilities via the CPUID
113  * instruction. We can make it even simpler though, by querying the machdep.sse
114  * sysctl.
115  */
116 static int __HAS_SSE = 0;
117 
118 static void __init_libm(void) __attribute__ ((constructor, used));
119 
120 static void __init_libm(void)
121 {
122 #if !defined(__minix)
123 	size_t oldlen = sizeof(__HAS_SSE);
124 	int rv;
125 	uint16_t control;
126 
127 	rv = sysctlbyname("machdep.sse", &__HAS_SSE, &oldlen, NULL, 0);
128 	if (rv == -1)
129 		__HAS_SSE = 0;
130 #else
131 	uint16_t control;
132 	__HAS_SSE = 0;
133 #endif /* !defined(__minix) */
134 
135 	__fnstcw(&control);
136 	__fe_dfl_env.x87.control = control;
137 }
138 
139 /*
140  * The feclearexcept() function clears the supported floating-point exceptions
141  * represented by `excepts'.
142  */
143 int
144 feclearexcept(int excepts)
145 {
146 	fenv_t env;
147 	uint32_t mxcsr;
148 	int ex;
149 
150 	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
151 
152 	ex = excepts & FE_ALL_EXCEPT;
153 
154 	/* It's ~3x faster to call fnclex, than store/load fp env */
155 	if (ex == FE_ALL_EXCEPT) {
156 		__fnclex();
157 	} else {
158 		__fnstenv(&env);
159 		env.x87.status &= ~ex;
160 		__fldenv(env);
161 	}
162 
163 	if (__HAS_SSE) {
164 		__stmxcsr(&mxcsr);
165 		mxcsr &= ~ex;
166 		__ldmxcsr(mxcsr);
167 	}
168 
169 	/* Success */
170 	return (0);
171 }
172 
173 /*
174  * The fegetexceptflag() function stores an implementation-defined
175  * representation of the states of the floating-point status flags indicated by
176  * the argument excepts in the object pointed to by the argument flagp.
177  */
178 int
179 fegetexceptflag(fexcept_t *flagp, int excepts)
180 {
181 	uint32_t mxcsr;
182 	uint16_t status;
183 	int ex;
184 
185 	_DIAGASSERT(flagp != NULL);
186 	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
187 
188 	ex = excepts & FE_ALL_EXCEPT;
189 
190 	__fnstsw(&status);
191 	if (__HAS_SSE)
192 		__stmxcsr(&mxcsr);
193 	else
194 		mxcsr = 0;
195 
196 	*flagp = (mxcsr | status) & ex;
197 
198 	/* Success */
199 	return (0);
200 }
201 
202 /*
203  * The feraiseexcept() function raises the supported floating-point exceptions
204  * represented by the argument `excepts'.
205  *
206  * The standard explicitly allows us to execute an instruction that has the
207  * exception as a side effect, but we choose to manipulate the status register
208  * directly.
209  *
210  * The validation of input is being deferred to fesetexceptflag().
211  */
212 int
213 feraiseexcept(int excepts)
214 {
215 	fexcept_t ex;
216 
217 	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
218 
219 	ex = excepts & FE_ALL_EXCEPT;
220 	fesetexceptflag(&ex, excepts);
221 	__fwait();
222 
223 	/* Success */
224 	return (0);
225 }
226 
227 /*
228  * This function sets the floating-point status flags indicated by the argument
229  * `excepts' to the states stored in the object pointed to by `flagp'. It does
230  * NOT raise any floating-point exceptions, but only sets the state of the flags.
231  */
232 int
233 fesetexceptflag(const fexcept_t *flagp, int excepts)
234 {
235 	fenv_t env;
236 	uint32_t mxcsr;
237 	int ex;
238 
239 	_DIAGASSERT(flagp != NULL);
240 	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
241 
242 	ex = excepts & FE_ALL_EXCEPT;
243 
244 	__fnstenv(&env);
245 	env.x87.status &= ~ex;
246 	env.x87.status |= *flagp & ex;
247 	__fldenv(env);
248 
249 	if (__HAS_SSE) {
250 		__stmxcsr(&mxcsr);
251 		mxcsr &= ~ex;
252 		mxcsr |= *flagp & ex;
253 		__ldmxcsr(mxcsr);
254 	}
255 
256 	/* Success */
257 	return (0);
258 }
259 
260 /*
261  * The fetestexcept() function determines which of a specified subset of the
262  * floating-point exception flags are currently set. The `excepts' argument
263  * specifies the floating-point status flags to be queried.
264  */
265 int
266 fetestexcept(int excepts)
267 {
268 	uint32_t mxcsr;
269 	uint16_t status;
270 	int ex;
271 
272 	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
273 
274 	ex = excepts & FE_ALL_EXCEPT;
275 
276 	__fnstsw(&status);
277 	if (__HAS_SSE)
278 		__stmxcsr(&mxcsr);
279 	else
280 		mxcsr = 0;
281 
282 	return ((status | mxcsr) & ex);
283 }
284 
285 int
286 fegetround(void)
287 {
288 	uint16_t control;
289 
290 	/*
291 	 * We assume that the x87 and the SSE unit agree on the
292 	 * rounding mode.  Reading the control word on the x87 turns
293 	 * out to be about 5 times faster than reading it on the SSE
294 	 * unit on an Opteron 244.
295 	 */
296 	__fnstcw(&control);
297 
298 	return (control & __X87_ROUND_MASK);
299 }
300 
301 /*
302  * The fesetround() function shall establish the rounding direction represented
303  * by its argument round. If the argument is not equal to the value of a
304  * rounding direction macro, the rounding direction is not changed.
305  */
306 int
307 fesetround(int round)
308 {
309 	uint32_t mxcsr;
310 	uint16_t control;
311 
312 	if (round & ~__X87_ROUND_MASK) {
313 		/* Failure */
314 		return (-1);
315 	}
316 
317 	__fnstcw(&control);
318 	control &= ~__X87_ROUND_MASK;
319 	control |= round;
320 	__fldcw(control);
321 
322 	if (__HAS_SSE) {
323 		__stmxcsr(&mxcsr);
324 		mxcsr &= ~(__X87_ROUND_MASK << __SSE_ROUND_SHIFT);
325 		mxcsr |= round << __SSE_ROUND_SHIFT;
326 		__ldmxcsr(mxcsr);
327 	}
328 
329 	/* Success */
330 	return (0);
331 }
332 
333 /*
334  * The fegetenv() function attempts to store the current floating-point
335  * environment in the object pointed to by envp.
336  */
337 int
338 fegetenv(fenv_t *envp)
339 {
340 	uint32_t mxcsr;
341 
342 	_DIAGASSERT(flagp != NULL);
343 
344 	/*
345 	 * fnstenv masks all exceptions, so we need to restore the old control
346 	 * word to avoid this side effect.
347 	 */
348 	__fnstenv(envp);
349 	__fldcw(envp->x87.control);
350 	if (__HAS_SSE) {
351 		__stmxcsr(&mxcsr);
352 		envp->mxcsr = mxcsr;
353 	}
354 
355 	/* Success */
356 	return (0);
357 }
358 
359 /*
360  * The feholdexcept() function saves the current floating-point environment in
361  * the object pointed to by envp, clears the floating-point status flags, and
362  * then installs a non-stop (continue on floating-point exceptions) mode, if
363  * available, for all floating-point exceptions.
364  */
365 int
366 feholdexcept(fenv_t *envp)
367 {
368 	uint32_t mxcsr;
369 
370 	_DIAGASSERT(envp != NULL);
371 
372 	__fnstenv(envp);
373 	__fnclex();
374 	if (__HAS_SSE) {
375 		__stmxcsr(&mxcsr);
376 		envp->mxcsr = mxcsr;
377 		mxcsr &= ~FE_ALL_EXCEPT;
378 		mxcsr |= FE_ALL_EXCEPT << __SSE_EMASK_SHIFT;
379 		__ldmxcsr(mxcsr);
380 	}
381 
382 	/* Success */
383 	return (0);
384 }
385 
386 /*
387  * The fesetenv() function attempts to establish the floating-point environment
388  * represented by the object pointed to by envp. The argument `envp' points
389  * to an object set by a call to fegetenv() or feholdexcept(), or equal a
390  * floating-point environment macro. The fesetenv() function does not raise
391  * floating-point exceptions, but only installs the state of the floating-point
392  * status flags represented through its argument.
393  */
394 int
395 fesetenv(const fenv_t *envp)
396 {
397 	fenv_t env;
398 
399 	_DIAGASSERT(envp != NULL);
400 
401 	/* Store the x87 floating-point environment */
402 	memset(&env, 0, sizeof(env));
403 	__fnstenv(&env);
404 
405 	__fe_dfl_env.x87.unused1 = env.x87.unused1;
406 	__fe_dfl_env.x87.unused2 = env.x87.unused2;
407 	__fe_dfl_env.x87.unused3 = env.x87.unused3;
408 	memcpy(__fe_dfl_env.x87.others,
409 	       env.x87.others,
410 	       sizeof(__fe_dfl_env.x87.others) / sizeof(uint32_t));
411 
412 	__fldenv(envp->x87);
413 	if (__HAS_SSE)
414 		__ldmxcsr(envp->mxcsr);
415 
416 	/* Success */
417 	return (0);
418 }
419 
420 /*
421  * The feupdateenv() function saves the currently raised floating-point
422  * exceptions in its automatic storage, installs the floating-point environment
423  * represented by the object pointed to by `envp', and then raises the saved
424  * floating-point exceptions. The argument `envp' shall point to an object set
425  * by a call to feholdexcept() or fegetenv(), or equal a floating-point
426  * environment macro.
427  */
428 int
429 feupdateenv(const fenv_t *envp)
430 {
431 	fenv_t env;
432 	uint32_t mxcsr;
433 	uint16_t status;
434 
435 	_DIAGASSERT(envp != NULL);
436 
437 	/* Store the x87 floating-point environment */
438 	memset(&env, 0, sizeof(env));
439 	__fnstenv(&env);
440 
441 	__fe_dfl_env.x87.unused1 = env.x87.unused1;
442 	__fe_dfl_env.x87.unused2 = env.x87.unused2;
443 	__fe_dfl_env.x87.unused3 = env.x87.unused3;
444 	memcpy(__fe_dfl_env.x87.others,
445 	       env.x87.others,
446 	       sizeof(__fe_dfl_env.x87.others) / sizeof(uint32_t));
447 
448 	__fnstsw(&status);
449 	if (__HAS_SSE)
450 		__stmxcsr(&mxcsr);
451 	else
452 		mxcsr = 0;
453 	fesetenv(envp);
454 	feraiseexcept((mxcsr | status) & FE_ALL_EXCEPT);
455 
456 	/* Success */
457 	return (0);
458 }
459 
460 /*
461  * The following functions are extentions to the standard
462  */
463 int
464 feenableexcept(int mask)
465 {
466 	uint32_t mxcsr, omask;
467 	uint16_t control;
468 
469 	mask &= FE_ALL_EXCEPT;
470 	__fnstcw(&control);
471 	if (__HAS_SSE)
472 		__stmxcsr(&mxcsr);
473 	else
474 		mxcsr = 0;
475 
476 	omask = (control | mxcsr >> __SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
477 	control &= ~mask;
478 	__fldcw(control);
479 	if (__HAS_SSE) {
480 		mxcsr &= ~(mask << __SSE_EMASK_SHIFT);
481 		__ldmxcsr(mxcsr);
482 	}
483 
484 	return (FE_ALL_EXCEPT & ~omask);
485 }
486 
487 int
488 fedisableexcept(int mask)
489 {
490 	uint32_t mxcsr, omask;
491 	uint16_t control;
492 
493 	mask &= FE_ALL_EXCEPT;
494 	__fnstcw(&control);
495 	if (__HAS_SSE)
496 		__stmxcsr(&mxcsr);
497 	else
498 		mxcsr = 0;
499 
500 	omask = (control | mxcsr >> __SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
501 	control |= mask;
502 	__fldcw(control);
503 	if (__HAS_SSE) {
504 		mxcsr |= mask << __SSE_EMASK_SHIFT;
505 		__ldmxcsr(mxcsr);
506 	}
507 
508 	return (FE_ALL_EXCEPT & ~omask);
509 }
510 
511 int
512 fegetexcept(void)
513 {
514 	uint16_t control;
515 
516 	/*
517 	 * We assume that the masks for the x87 and the SSE unit are
518 	 * the same.
519 	 */
520 	__fnstcw(&control);
521 
522 	return (~control & FE_ALL_EXCEPT);
523 }
524