xref: /openbsd/lib/libm/arch/i387/fenv.c (revision 898184e3)
1 /*	$OpenBSD: fenv.c,v 1.3 2012/12/05 23:20:02 deraadt Exp $	*/
2 /*	$NetBSD: fenv.c,v 1.3 2010/08/01 06:34:38 taca Exp $	*/
3 
4 /*-
5  * Copyright (c) 2004-2005 David Schultz <das@FreeBSD.ORG>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/param.h>
31 #include <sys/sysctl.h>
32 #include <machine/cpu.h>
33 #include <machine/npx.h>
34 
35 #include <fenv.h>
36 
37 /*
38  * The following constant represents the default floating-point environment
39  * (that is, the one installed at program startup) and has type pointer to
40  * const-qualified fenv_t.
41  *
42  * It can be used as an argument to the functions within the <fenv.h> header
43  * that manage the floating-point environment, namely fesetenv() and
44  * feupdateenv().
45  *
46  * x87 fpu registers are 16bit wide. The upper bits, 31-16, are marked as
47  * RESERVED.
48  */
49 fenv_t __fe_dfl_env = {
50 	{
51 		0xffff0000 | __INITIAL_NPXCW__,	/* Control word register */
52 		0xffff0000,			/* Status word register */
53 		0xffffffff,			/* Tag word register */
54 		{
55 			0x00000000,
56 			0x00000000,
57 			0x00000000,
58 			0xffff0000
59 		}
60 	},
61 	__INITIAL_MXCSR__		/* MXCSR register */
62 };
63 
64 /*
65  * Test for SSE support on this processor.
66  *
67  * We need to use ldmxcsr/stmxcsr to get correct results if any part
68  * of the program was compiled to use SSE floating-point, but we can't
69  * use SSE on older processors.
70  *
71  * In order to do so, we need to query the processor capabilities via the CPUID
72  * instruction. We can make it even simpler though, by querying the machdep.sse
73  * sysctl.
74  */
75 static int __HAS_SSE = 0;
76 
77 static void __test_sse(void) __attribute__ ((constructor));
78 
79 static void __test_sse(void)
80 {
81 	size_t oldlen = sizeof(__HAS_SSE);
82 	int mib[2] = { CTL_MACHDEP, CPU_SSE };
83 	int rv;
84 
85 	rv = sysctl(mib, 2, &__HAS_SSE, &oldlen, NULL, 0);
86 	if (rv == -1)
87 		__HAS_SSE = 0;
88 }
89 
90 /*
91  * The feclearexcept() function clears the supported floating-point exceptions
92  * represented by `excepts'.
93  */
94 int
95 feclearexcept(int excepts)
96 {
97 	fenv_t fenv;
98 	unsigned int mxcsr;
99 
100 	excepts &= FE_ALL_EXCEPT;
101 
102 	/* Store the current x87 floating-point environment */
103 	__asm__ __volatile__ ("fnstenv %0" : "=m" (fenv));
104 
105 	/* Clear the requested floating-point exceptions */
106 	fenv.__x87.__status &= ~excepts;
107 
108 	/* Load the x87 floating-point environent */
109 	__asm__ __volatile__ ("fldenv %0" : : "m" (fenv));
110 
111 	/* Same for SSE environment */
112 	if (__HAS_SSE) {
113 		__asm__ __volatile__ ("stmxcsr %0" : "=m" (mxcsr));
114 		mxcsr &= ~excepts;
115 		__asm__ __volatile__ ("ldmxcsr %0" : : "m" (mxcsr));
116 	}
117 
118 	return (0);
119 }
120 
121 /*
122  * The fegetexceptflag() function stores an implementation-defined
123  * representation of the states of the floating-point status flags indicated by
124  * the argument excepts in the object pointed to by the argument flagp.
125  */
126 int
127 fegetexceptflag(fexcept_t *flagp, int excepts)
128 {
129 	unsigned short status;
130 	unsigned int mxcsr = 0;
131 
132 	excepts &= FE_ALL_EXCEPT;
133 
134 	/* Store the current x87 status register */
135 	__asm__ __volatile__ ("fnstsw %0" : "=am" (status));
136 
137 	/* Store the MXCSR register */
138 	if (__HAS_SSE)
139 		__asm__ __volatile__ ("stmxcsr %0" : "=m" (mxcsr));
140 
141 	/* Store the results in flagp */
142 	*flagp = (status | mxcsr) & excepts;
143 
144 	return (0);
145 }
146 
147 /*
148  * The feraiseexcept() function raises the supported floating-point exceptions
149  * represented by the argument `excepts'.
150  *
151  * The standard explicitly allows us to execute an instruction that has the
152  * exception as a side effect, but we choose to manipulate the status register
153  * directly.
154  *
155  * The validation of input is being deferred to fesetexceptflag().
156  */
157 int
158 feraiseexcept(int excepts)
159 {
160 	excepts &= FE_ALL_EXCEPT;
161 
162 	fesetexceptflag((fexcept_t *)&excepts, excepts);
163 	__asm__ __volatile__ ("fwait");
164 
165 	return (0);
166 }
167 
168 /*
169  * This function sets the floating-point status flags indicated by the argument
170  * `excepts' to the states stored in the object pointed to by `flagp'. It does
171  * NOT raise any floating-point exceptions, but only sets the state of the flags.
172  */
173 int
174 fesetexceptflag(const fexcept_t *flagp, int excepts)
175 {
176 	fenv_t fenv;
177 	unsigned int mxcsr;
178 
179 	excepts &= FE_ALL_EXCEPT;
180 
181 	/* Store the current x87 floating-point environment */
182 	__asm__ __volatile__ ("fnstenv %0" : "=m" (fenv));
183 
184 	/* Set the requested status flags */
185 	fenv.__x87.__status &= ~excepts;
186 	fenv.__x87.__status |= *flagp & excepts;
187 
188 	/* Load the x87 floating-point environent */
189 	__asm__ __volatile__ ("fldenv %0" : : "m" (fenv));
190 
191 	/* Same for SSE environment */
192 	if (__HAS_SSE) {
193 		__asm__ __volatile__ ("stmxcsr %0" : "=m" (mxcsr));
194 		mxcsr &= ~excepts;
195 		mxcsr |= *flagp & excepts;
196 		__asm__ __volatile__ ("ldmxcsr %0" : : "m" (mxcsr));
197 	}
198 
199 	return (0);
200 }
201 
202 /*
203  * The fetestexcept() function determines which of a specified subset of the
204  * floating-point exception flags are currently set. The `excepts' argument
205  * specifies the floating-point status flags to be queried.
206  */
207 int
208 fetestexcept(int excepts)
209 {
210 	unsigned short status;
211 	unsigned int mxcsr = 0;
212 
213 	excepts &= FE_ALL_EXCEPT;
214 
215 	/* Store the current x87 status register */
216 	__asm__ __volatile__ ("fnstsw %0" : "=am" (status));
217 
218 	/* Store the MXCSR register state */
219 	if (__HAS_SSE)
220 		__asm__ __volatile__ ("stmxcsr %0" : "=m" (mxcsr));
221 
222 	return ((status | mxcsr) & excepts);
223 }
224 
225 /*
226  * The fegetround() function gets the current rounding direction.
227  */
228 int
229 fegetround(void)
230 {
231 	unsigned short control;
232 
233 	/*
234 	 * We assume that the x87 and the SSE unit agree on the
235 	 * rounding mode.  Reading the control word on the x87 turns
236 	 * out to be about 5 times faster than reading it on the SSE
237 	 * unit on an Opteron 244.
238 	 */
239 	__asm__ __volatile__ ("fnstcw %0" : "=m" (control));
240 
241 	return (control & _X87_ROUND_MASK);
242 }
243 
244 /*
245  * The fesetround() function establishes the rounding direction represented by
246  * its argument `round'. If the argument is not equal to the value of a rounding
247  * direction macro, the rounding direction is not changed.
248  */
249 int
250 fesetround(int round)
251 {
252 	unsigned short control;
253 	unsigned int mxcsr;
254 
255 	/* Check whether requested rounding direction is supported */
256 	if (round & ~_X87_ROUND_MASK)
257 		return (-1);
258 
259 	/* Store the current x87 control word register */
260 	__asm__ __volatile__ ("fnstcw %0" : "=m" (control));
261 
262 	/* Set the rounding direction */
263 	control &= ~_X87_ROUND_MASK;
264 	control |= round;
265 
266 	/* Load the x87 control word register */
267 	__asm__ __volatile__ ("fldcw %0" : : "m" (control));
268 
269 	/* Same for the SSE environment */
270 	if (__HAS_SSE) {
271 		__asm__ __volatile__ ("stmxcsr %0" : "=m" (mxcsr));
272 		mxcsr &= ~(_X87_ROUND_MASK << _SSE_ROUND_SHIFT);
273 		mxcsr |= round << _SSE_ROUND_SHIFT;
274 		__asm__ __volatile__ ("ldmxcsr %0" : : "m" (mxcsr));
275 	}
276 
277 	return (0);
278 }
279 
280 /*
281  * The fegetenv() function attempts to store the current floating-point
282  * environment in the object pointed to by envp.
283  */
284 int
285 fegetenv(fenv_t *envp)
286 {
287 	/* Store the current x87 floating-point environment */
288 	__asm__ __volatile__ ("fnstenv %0" : "=m" (*envp));
289 
290 	/* Store the MXCSR register state */
291 	if (__HAS_SSE)
292 		__asm__ __volatile__ ("stmxcsr %0" : "=m" (envp->__mxcsr));
293 
294 	/*
295 	 * When an FNSTENV instruction is executed, all pending exceptions are
296 	 * essentially lost (either the x87 FPU status register is cleared or
297 	 * all exceptions are masked).
298 	 *
299 	 * 8.6 X87 FPU EXCEPTION SYNCHRONIZATION -
300 	 * Intel(R) 64 and IA-32 Architectures Softare Developer's Manual - Vol1
301 	 */
302 	__asm__ __volatile__ ("fldcw %0" : : "m" (envp->__x87.__control));
303 
304 	return (0);
305 }
306 
307 /*
308  * The feholdexcept() function saves the current floating-point environment
309  * in the object pointed to by envp, clears the floating-point status flags, and
310  * then installs a non-stop (continue on floating-point exceptions) mode, if
311  * available, for all floating-point exceptions.
312  */
313 int
314 feholdexcept(fenv_t *envp)
315 {
316 	unsigned int mxcsr;
317 
318 	/* Store the current x87 floating-point environment */
319 	__asm__ __volatile__ ("fnstenv %0" : "=m" (*envp));
320 
321 	/* Clear all exception flags in FPU */
322 	__asm__ __volatile__ ("fnclex");
323 
324 	if (__HAS_SSE) {
325 		/* Store the MXCSR register state */
326 		__asm__ __volatile__ ("stmxcsr %0" : "=m" (envp->__mxcsr));
327 
328 		/* Clear exception flags in MXCSR */
329 		mxcsr = envp->__mxcsr;
330 		mxcsr &= ~FE_ALL_EXCEPT;
331 
332 		/* Mask all exceptions */
333 		mxcsr |= FE_ALL_EXCEPT << _SSE_MASK_SHIFT;
334 
335 		/* Store the MXCSR register */
336 		__asm__ __volatile__ ("ldmxcsr %0" : : "m" (mxcsr));
337 	}
338 
339 	return (0);
340 }
341 
342 /*
343  * The fesetenv() function attempts to establish the floating-point environment
344  * represented by the object pointed to by envp. The argument `envp' points
345  * to an object set by a call to fegetenv() or feholdexcept(), or equal a
346  * floating-point environment macro. The fesetenv() function does not raise
347  * floating-point exceptions, but only installs the state of the floating-point
348  * status flags represented through its argument.
349  */
350 int
351 fesetenv(const fenv_t *envp)
352 {
353 	/* Load the x87 floating-point environent */
354 	__asm__ __volatile__ ("fldenv %0" : : "m" (*envp));
355 
356 	/* Store the MXCSR register */
357 	if (__HAS_SSE)
358 		__asm__ __volatile__ ("ldmxcsr %0" : : "m" (envp->__mxcsr));
359 
360 	return (0);
361 }
362 
363 /*
364  * The feupdateenv() function saves the currently raised floating-point
365  * exceptions in its automatic storage, installs the floating-point environment
366  * represented by the object pointed to by `envp', and then raises the saved
367  * floating-point exceptions. The argument `envp' shall point to an object set
368  * by a call to feholdexcept() or fegetenv(), or equal a floating-point
369  * environment macro.
370  */
371 int
372 feupdateenv(const fenv_t *envp)
373 {
374 	unsigned short status;
375 	unsigned int mxcsr = 0;
376 
377 	/* Store the x87 status register */
378 	__asm__ __volatile__ ("fnstsw %0" : "=am" (status));
379 
380 	/* Store the MXCSR register */
381 	if (__HAS_SSE)
382 		__asm__ __volatile__ ("stmxcsr %0" : "=m" (mxcsr));
383 
384 	/* Install new floating-point environment */
385 	fesetenv(envp);
386 
387 	/* Raise any previously accumulated exceptions */
388 	feraiseexcept(status | mxcsr);
389 
390 	return (0);
391 }
392 
393 /*
394  * The following functions are extentions to the standard
395  */
396 int
397 feenableexcept(int mask)
398 {
399 	unsigned int mxcsr = 0, omask;
400 	unsigned short control;
401 
402 	mask &= FE_ALL_EXCEPT;
403 
404 	__asm__ __volatile__ ("fnstcw %0" : "=m" (control));
405 	if (__HAS_SSE)
406 		__asm__ __volatile__ ("stmxcsr %0" : "=m" (mxcsr));
407 
408 	omask = ~(control | (mxcsr >> _SSE_MASK_SHIFT)) & FE_ALL_EXCEPT;
409 	control &= ~mask;
410 	__asm__ __volatile__ ("fldcw %0" : : "m" (control));
411 
412 	if (__HAS_SSE) {
413 		mxcsr &= ~(mask << _SSE_MASK_SHIFT);
414 		__asm__ __volatile__ ("ldmxcsr %0" : : "m" (mxcsr));
415 	}
416 
417 	return (omask);
418 }
419 
420 int
421 fedisableexcept(int mask)
422 {
423 	unsigned int mxcsr = 0, omask;
424 	unsigned short control;
425 
426 	mask &= FE_ALL_EXCEPT;
427 
428 	__asm__ __volatile__ ("fnstcw %0" : "=m" (control));
429 	if (__HAS_SSE)
430 		__asm__ __volatile__ ("stmxcsr %0" : "=m" (mxcsr));
431 
432 	omask = ~(control | (mxcsr >> _SSE_MASK_SHIFT)) & FE_ALL_EXCEPT;
433 	control |= mask;
434 	__asm__ __volatile__ ("fldcw %0" : : "m" (control));
435 
436 	if (__HAS_SSE) {
437 		mxcsr |= mask << _SSE_MASK_SHIFT;
438 		__asm__ __volatile__ ("ldmxcsr %0" : : "m" (mxcsr));
439 	}
440 
441 	return (omask);
442 }
443 
444 int
445 fegetexcept(void)
446 {
447 	unsigned short control;
448 
449 	/*
450 	 * We assume that the masks for the x87 and the SSE unit are
451 	 * the same.
452 	 */
453 	__asm__ __volatile__ ("fnstcw %0" : "=m" (control));
454 
455 	return (~control & FE_ALL_EXCEPT);
456 }
457