1 /*
2 * $Id: fpuset.c,v 1.6 2007-11-08 16:34:30 dhmunro Exp $
3 * set up FPU to trap floating point exceptions
4 * - this is very non-portable, not covered by ANSI C, POSIX, or even C9X
5 * - if you port to a new platform (eg- Ultrix) please contact the author
6 */
7 /* Copyright (c) 2005, The Regents of the University of California.
8 * All rights reserved.
9 * This file is part of yorick (http://yorick.sourceforge.net).
10 * Read the accompanying LICENSE file for details.
11 */
12
13 #ifndef FROM_FPUTEST
14 # include "config.h"
15 # include "playu.h"
16 #else
17 extern void u_fpu_setup(int when);
18 #endif
19
20 /* when = -1 for initial call before setjmp
21 * 0 after each longjmp out of interrupt handler (after setjmp)
22 * 1 inside interrupt handler before signal() re-enables SIGFPE
23 */
24
25 #ifdef FPU_GCC_X86_64
26 # define FPU_GCC_X86
27 #endif
28
29 #if defined(FPU_DIGITAL) || defined(FPU_ALPHA_LINUX)
30
31 /* FPU_ALPHA_LINUX: see README.fpu */
32 /* man pages: exception_intro, ieee */
33 # ifdef FPU_DIGITAL
34 # include <machine/fpu.h>
35 # else
36 extern void ieee_set_fp_control(long);
37 # define IEEE_TRAP_ENABLE_INV 0x000002
38 # define IEEE_TRAP_ENABLE_DZE 0x000004
39 # define IEEE_TRAP_ENABLE_OVF 0x000008
40 # define IEEE_MAP_DMZ (1UL<<12)
41 # define IEEE_MAP_UMZ (1UL<<13)
42 # endif
43 void
u_fpu_setup(int when)44 u_fpu_setup(int when)
45 {
46 /* possibly should include IEEE_MAP_DMZ and IEEE_MAP_UMZ
47 * to map denorm inputs and underflowed outputs to zero
48 * --however, these apparently only have an effect for software
49 * completed operations (the hardware always maps underflows to zero)
50 */
51 if (when < 0) {
52 ieee_set_fp_control(IEEE_TRAP_ENABLE_INV | IEEE_TRAP_ENABLE_DZE |
53 IEEE_TRAP_ENABLE_OVF);
54 }
55 }
56
57 #elif defined(FPU_AIX)
58
59 /* man pages: fp_trap, fp_enable */
60 #include <fptrap.h>
61 void
u_fpu_setup(int when)62 u_fpu_setup(int when)
63 {
64 if (when) {
65 fp_trap(FP_TRAP_FASTMODE);
66 fp_enable(TRP_INVALID | TRP_DIV_BY_ZERO | TRP_OVERFLOW);
67 }
68 }
69
70 #elif defined(FPU_HPUX)
71
72 /* man pages: fpsetmask
73 * library: -lm */
74 /* HPUX turns off FP_X_* without this (_INCLUDE_HPUX_SOURCE) */
75 #ifndef _HPUX_SOURCE
76 #define _HPUX_SOURCE 1
77 #endif
78 #include <math.h>
79 void
u_fpu_setup(int when)80 u_fpu_setup(int when)
81 {
82 if (when <= 0) {
83 fpsetmask(FP_X_INV | FP_X_DZ | FP_X_OFL); /* 0x1c */
84 fpsetfastmode(1); /* fast underflows */
85 }
86 }
87
88 #elif defined(FPU_IRIX)
89
90 /* man pages: handle_sigfpes, note lethal TRAP_FPE environment variable
91 * library: -lfpe
92 * note: earlier versions used get_fpc_csr/set_fpc_csr?, sys/fpu.h */
93 #include <sigfpe.h>
94 void
u_fpu_setup(int when)95 u_fpu_setup(int when)
96 {
97 if (when < 0) {
98 extern void u_sigfpe(int sig); /* from handler.c (or fputest.c) */
99 handle_sigfpes(_ON, _EN_OVERFL|_EN_DIVZERO|_EN_INVALID,
100 (void (*)())0, _USER_HANDLER, (void (*)())&u_sigfpe);
101 }
102 }
103
104 #elif defined(FPU_SOLARIS)
105
106 /* man pages: fpsetmask
107 * Sun's -fnonstd compiler switch switches between __fnonstd.o
108 * and __fstd.o under Solaris, as far as I can tell. Use FPU_IGNORE
109 * if you do this. */
110 #include <ieeefp.h>
111 void
u_fpu_setup(int when)112 u_fpu_setup(int when)
113 {
114 if (when < 0) {
115 fpsetmask(FP_X_INV | FP_X_DZ | FP_X_OFL);
116 /* this doesn't set the "nonstandard arithmetic" bit, which prevents
117 * software emulation of IEEE gradual underflow
118 * -- apparently no way to do this in libc (see FPU_GCC_SPARC) */
119 }
120 }
121
122 #elif defined(FPU_SUN4)
123
124 /* man pages: ieee_handler
125 * nonstandard_arithmetic is undocumented, but rumored
126 * to be important to get rapid underflows
127 * library: -lsunmath (under /usr/lang hierarchy)
128 * may also be in -lm (standard libm)?
129 * note: libsunmath.a is provided by Sun only if you purchase their
130 * compilers; if you are trying to compile with gcc on a SPARC
131 * architecture, try FPU_GCC_SPARC
132 * Sun's -fnonstd compiler switch buggers crt1.o under SunOS 4,
133 * as far as I can tell. Use FPU_IGNORE if you do this
134 * (not possible with gcc?). */
135 void
u_fpu_setup(int when)136 u_fpu_setup(int when)
137 {
138 if (when < 0) {
139 extern void u_sigfpe(int sig); /* from handler.c (or fputest.c) */
140 nonstandard_arithmetic();
141 ieee_handler("set","common", &u_sigfpe);
142 }
143 }
144
145 #elif defined(FPU_UNICOS)
146
147 /* delivers SIGFPE by default, this just arranges to trap on
148 * libm errors as well */
149 void
u_fpu_setup(int when)150 u_fpu_setup(int when)
151 {
152 if (when < 0) {
153 int flag = -1;
154 libmset(&flag);
155 }
156 }
157
158 #elif defined(FPU_GCC_X86)
159 /* This branch is now preferred over FPU_GNU_FENV or FPU_GNU_I86 for
160 * modern Intel Pentium and AMD machines. The GNU fenv.h extension
161 * will unmask interrupts properly, but provides no means for setting
162 * the flush-to-zero and denormals-are-zero bits required for high
163 * performance with the SSE/SSE2 floating point unit.
164 */
165 # undef X86_PREFIX
166 # ifdef FPU_GCC_X86_64
167 # define X86_PREFIX "r"
168 # else
169 # define X86_PREFIX "e"
170 # endif
171
172 static unsigned int u_fpu_detect(void);
173 static unsigned int u_mxcsr_mask = 0;
174 static unsigned int u_fpu_features = 0x8000;
175 /* 0x001 x87 fpu on chip
176 * 0x002 mmx technology
177 * 0x004 fxsave/fxrestor instructions present
178 * 0x008 sse extensions
179 * 0x010 sse2 extensions
180 * 0x020 sse3 extensions
181 * 0x040 daz supported
182 */
183 static unsigned int
u_fpu_detect(void)184 u_fpu_detect(void)
185 {
186 unsigned int features = 0;
187 unsigned int x, y;
188 __asm __volatile ("pushf\n\t pop %%"X86_PREFIX"ax" : "=a" (x));
189 y = x; /* set x and y to original value of eflags */
190 x ^= 0x200000; /* flip bit 21, ID */
191 __asm __volatile ("push %%"X86_PREFIX"ax\n\t popf" : : "a" (x));
192 __asm __volatile ("pushf\n\t pop %%"X86_PREFIX"ax" : "=a" (x));
193 if (x ^ y) {
194 unsigned long ecx, edx;
195 /* this cpu has the cpuid instruction, restore original eflags */
196 __asm __volatile ("push %%"X86_PREFIX"ax\n\t popf" : : "a" (y));
197 /* get mmx, sse related feature bits from cpuid */
198 __asm __volatile ("mov %%"X86_PREFIX"bx, %%"X86_PREFIX"si \n\t"
199 "cpuid \n\t"
200 "xchg %%"X86_PREFIX"bx, %%"X86_PREFIX"si"
201 : "=c" (ecx), "=d" (edx) : "a" (1) : "si");
202 features = (edx & 1) | ((edx & 0x7800000) >> 22) | ((ecx & 1) >> 5);
203 if ((features & 0x004) && (features & 0x018)) {
204 /* get mxcsr_mask to find out if DAZ supported */
205 unsigned char pstate[528], *state;
206 int i;
207 for (state=pstate,i=0 ; i<528 ; i++) *state++ = '\0';
208 /* get a 16-byte aligned state buffer for fxsave */
209 state = pstate + 15;
210 state = ((state - (unsigned char *)0)&(~0xfL)) + (unsigned char *)0;
211 __asm __volatile ("fxsave %0" : : "m" (*state));
212 u_mxcsr_mask = *((unsigned int *)(state + 28));
213 if (!u_mxcsr_mask) u_mxcsr_mask = 0xffbf;
214 else if (u_mxcsr_mask & 0x40) features |= 0x040;
215 }
216 }
217 return features;
218 }
219
220 /* interrupt mask bits for x87 and sse (xmm) fpu's */
221 /* typical initial values: fctrl=0x037f mxcsr=0x1f80 */
222 #define U_FPU_X87CW 0x1372
223 #define U_FPU_MXCSR 0x9940
224 /* bit meaning x87 sse yes=mask=1, no=deliver=0
225 * flush to zero -- 0x8000 yes
226 * infinity control 0x1000 -- (for 287 coprocessor compatibility)
227 * rounding control 0x0c00 0x6000 0
228 * precison control 0x0300 -- (0x0300 means 64-bit extended)
229 * precision mask 0x0020 0x1000 yes
230 * underflow mask 0x0010 0x0800 yes
231 * overflow mask 0x0008 0x0400 no
232 * zero divide mask 0x0004 0x0200 no
233 * denormal op mask 0x0002 0x0100 yes
234 * invalid op mask 0x0001 0x0080 no
235 * denormals are zero -- 0x0040 yes
236 * sse exception bits 0x003f correspond to mask bits 0x1f80 (>>7)
237 */
238 void
u_fpu_setup(int when)239 u_fpu_setup(int when)
240 {
241 if (u_fpu_features == 0x8000)
242 u_fpu_features = u_fpu_detect();
243 # if defined(__CYGWIN__)
244 __asm__ ("fclex" : : ); /* clear i87 fp exception bits */
245 if (u_mxcsr_mask) {
246 /* clear sse fp exception bits as a side-effect */
247 unsigned int mxcsr = U_FPU_MXCSR & u_mxcsr_mask;
248 __asm__ ("ldmxcsr %0" : : "m" (mxcsr));
249 }
250 if (when<0) {
251 # elif defined(__NeXT)
252 if (when<=0) {
253 # else
254 if (when) {
255 # endif
256 unsigned int fpucw = U_FPU_X87CW;
257 __asm__ ("fldcw %0" : : "m" (fpucw));
258 # if ! defined(__CYGWIN__)
259 if (u_mxcsr_mask) {
260 unsigned int mxcsr = U_FPU_MXCSR & u_mxcsr_mask;
261 __asm__ ("ldmxcsr %0" : : "m" (mxcsr));
262 /* get mxscr: __asm__ ("stmxcsr %0" : : "m" (mxcsr)); */
263 }
264 # endif
265 }
266 }
267
268 #elif defined(FPU_GNU_FENV)
269
270 /* GCC enhanced C9X fenv.h interface by adding feenableexcept */
271 #ifndef _GNU_SOURCE
272 #define _GNU_SOURCE 1
273 #endif
274 #include <fenv.h>
275 void
276 u_fpu_setup(int when)
277 {
278 if (when <= 0)
279 feenableexcept(FE_DIVBYZERO | FE_OVERFLOW | FE_INVALID);
280 }
281
282 #elif defined(FPU_GCC_I86)
283
284 /* see also: fpu_control.h or i386/fpu_control.h, __setfpucw function */
285 void
286 u_fpu_setup(int when)
287 {
288 # if defined(__CYGWIN__)
289 __asm__ ("fclex" : : );
290 if (when<0) {
291 # elif defined(__NeXT)
292 if (when<=0) {
293 # else
294 if (when) {
295 # endif
296 unsigned int fpucw = 0x1372;
297 # ifndef __CYGWIN__
298 /* CygWin currently broken (20/Aug/04) so treat as FPU_IGNORE */
299 __asm__ ("fldcw %0" : : "m" (fpucw));
300 # endif
301 }
302 }
303
304 #elif defined(FPU_GCC_POWERPC)
305
306 void
307 u_fpu_setup(int when)
308 {
309 if (when) {
310 unsigned int tmp[2] __attribute__ ((__aligned__(8)));
311 tmp[0] = 0xFFF80000; /* More-or-less arbitrary; this is a QNaN. */
312 tmp[1] = 0xd0;
313 __asm__ ("lfd 0,%0; mtfsf 255,0" : : "m" (*tmp) : "fr0");
314 }
315 }
316
317 #elif defined(FPU_GCC_SPARC)
318
319 void
320 u_fpu_setup(int when)
321 {
322 if (when < 0) {
323 unsigned int fpucw = 0xd400000; /* the 4 is nonstandard arithmetic bit */
324 __asm__ ("ld %0,%%fsr" : : "m" (fpucw));
325 }
326 }
327
328 #elif defined(FPU_GCC_M68K)
329
330 /* works on NeXT as well as m68k Linux */
331 void
332 u_fpu_setup(int when)
333 {
334 if (when <= 0) {
335 asm("fmovel #0x7400,fpcr"); /* set OVFL and ZD bits */
336 /* unsigned int fpucw = 0x7400;
337 * __asm__ volatile ("fmove%.l %0, %!" : : "dm" (fpucw)); */
338 /* includes bit to trap on signalling NaN (may affect libm behavior) */
339 }
340 }
341
342 #elif defined(FPU_GCC_ARM)
343
344 void
345 u_fpu_setup(int when)
346 {
347 if (when <= 0) {
348 unsigned int fpucw = 0x70200;
349 __asm__ ("wfs %0" : : "r" (fpucw));
350 /* includes bit to trap on signalling NaN (may affect libm behavior) */
351 }
352 }
353
354 #elif defined(FPU_IGNORE)
355
356 void
357 u_fpu_setup(int when)
358 {
359 }
360
361 #elif defined(FPU_MACOSX)
362
363 /* MacOS X 10.3.0, 10.3.1, and 10.3.2 (at least) shipped with
364 * broken libm (/usr/lib/libm.dylib), for which the functions
365 * sqrt, tanh, sinh cause incorrect SIGFPE for legal arguments
366 * when FPEs are enabled. See README.fpu for more information.
367 */
368
369 #include <architecture/ppc/fp_regs.h>
370
371 # ifdef FPU_MACOSX_10_1
372 /* for Darwin version 6.0 (MacOS X 10.2) FE0=FE1=1 initially
373 * for Darwin version 5.5 (MacOS X <=10.1) FE0=FE1=0 initially
374 * Darwin 5.5 resets MSR to FE0=FE1=0 after each SIGFPE
375 * A thread cannot set its own MSR, so we have to create a second thread
376 * to change our MSR to a value which permits FPE unmasking bits in SCR
377 * to have any effect (yuck).
378 */
379 #include <mach/mach.h>
380 #include <pthread.h>
381
382 static void *fpu_fpe_enable(void *arg);
383 #define FE0_MASK (1<<11)
384 #define FE1_MASK (1<<8)
385 /* FE0 FE1 exceptions enabled if either FE0 or FE1 set
386 * 0 0 -- floating-point exceptions disabled
387 * 0 1 -- floating-point imprecise nonrecoverable
388 * 1 0 -- floating-point imprecise recoverable
389 * 1 1 -- floating-point precise mode
390 */
391
392 /* a thread cannot get or set its own MSR bits */
393 static void *
394 fpu_fpe_enable(void *arg)
395 {
396 thread_t t = *(thread_t *)arg;
397 struct ppc_thread_state state;
398 unsigned int state_size = PPC_THREAD_STATE_COUNT;
399 if (thread_get_state(t, PPC_THREAD_STATE,
400 (natural_t *)&state, &state_size) == KERN_SUCCESS) {
401 state.srr1 |= FE1_MASK;
402 state.srr1 &= ~FE0_MASK;
403 thread_set_state(t, PPC_THREAD_STATE, (natural_t *)&state, state_size);
404 }
405 return 0;
406 }
407 # endif
408
409 void
410 u_fpu_setup(int when)
411 {
412 static volatile int looping = 0;
413 if (when) {
414 ppc_fp_scr_t r = get_fp_scr();
415 /* turn off exception bits to prevent immediate re-fault */
416 r.fx = r.fex = r.vx = r.ox = r.ux = r.zx = r.xx = r.vx_snan = r.vx_isi =
417 r.vx_idi = r.vx_zdz = r.vx_imz = r.vx_xvc = r.vx_cvi = r.vx_soft = 0;
418 /* rsvd2 is actually vx_sqrt, set by fsqrt instruction
419 * fsqrt is optional, not present on G4 and earlier Macs (but on G5)
420 */
421 r.rsvd2 = 0;
422 /* these only have to be set once, but may as well set anyway */
423 r.ve = 1; /* invalid */
424 r.oe = 1; /* overflow */
425 r.ue = 0; /* underflow */
426 r.ze = 1; /* zero divide */
427 r.xe = 0; /* inexact */
428 if (!looping) {
429 looping |= 1;
430 set_fp_scr(r);
431 looping &= ~1;
432 }
433 }
434 # ifdef FPU_MACOSX_10_1
435 if (when <= 0) {
436 thread_t self = mach_thread_self();
437 pthread_t enabler;
438 if (!looping) {
439 looping |= 2;
440 if (!pthread_create(&enabler, 0, fpu_fpe_enable, &self))
441 pthread_join(enabler, 0);
442 looping &= ~2;
443 }
444 }
445 # endif
446 looping = 0;
447 }
448
449 #else
450
451 #error <read play/unix/README.fpu for help>
452
453 #endif
454