1 /*
2  * $Id: fpuset.c,v 1.6 2007-11-08 16:34:30 dhmunro Exp $
3  * set up FPU to trap floating point exceptions
4  * - this is very non-portable, not covered by ANSI C, POSIX, or even C9X
5  * - if you port to a new platform (eg- Ultrix) please contact the author
6  */
7 /* Copyright (c) 2005, The Regents of the University of California.
8  * All rights reserved.
9  * This file is part of yorick (http://yorick.sourceforge.net).
10  * Read the accompanying LICENSE file for details.
11  */
12 
13 #ifndef FROM_FPUTEST
14 # include "config.h"
15 # include "playu.h"
16 #else
17 extern void u_fpu_setup(int when);
18 #endif
19 
20 /* when = -1 for initial call before setjmp
21  *         0 after each longjmp out of interrupt handler (after setjmp)
22  *         1 inside interrupt handler before signal() re-enables SIGFPE
23  */
24 
25 #ifdef FPU_GCC_X86_64
26 # define FPU_GCC_X86
27 #endif
28 
29 #if defined(FPU_DIGITAL) || defined(FPU_ALPHA_LINUX)
30 
31 /* FPU_ALPHA_LINUX: see README.fpu */
32 /* man pages: exception_intro, ieee */
33 # ifdef FPU_DIGITAL
34 #  include <machine/fpu.h>
35 # else
36    extern void ieee_set_fp_control(long);
37 #  define IEEE_TRAP_ENABLE_INV 0x000002
38 #  define IEEE_TRAP_ENABLE_DZE 0x000004
39 #  define IEEE_TRAP_ENABLE_OVF 0x000008
40 #  define IEEE_MAP_DMZ         (1UL<<12)
41 #  define IEEE_MAP_UMZ         (1UL<<13)
42 # endif
43 void
u_fpu_setup(int when)44 u_fpu_setup(int when)
45 {
46   /* possibly should include IEEE_MAP_DMZ and IEEE_MAP_UMZ
47    * to map denorm inputs and underflowed outputs to zero
48    * --however, these apparently only have an effect for software
49    * completed operations (the hardware always maps underflows to zero)
50    */
51   if (when < 0) {
52     ieee_set_fp_control(IEEE_TRAP_ENABLE_INV | IEEE_TRAP_ENABLE_DZE |
53                         IEEE_TRAP_ENABLE_OVF);
54   }
55 }
56 
57 #elif defined(FPU_AIX)
58 
59 /* man pages: fp_trap, fp_enable */
60 #include <fptrap.h>
61 void
u_fpu_setup(int when)62 u_fpu_setup(int when)
63 {
64   if (when) {
65     fp_trap(FP_TRAP_FASTMODE);
66     fp_enable(TRP_INVALID | TRP_DIV_BY_ZERO | TRP_OVERFLOW);
67   }
68 }
69 
70 #elif defined(FPU_HPUX)
71 
72 /* man pages: fpsetmask
73  * library: -lm */
74 /* HPUX turns off FP_X_* without this (_INCLUDE_HPUX_SOURCE) */
75 #ifndef _HPUX_SOURCE
76 #define _HPUX_SOURCE 1
77 #endif
78 #include <math.h>
79 void
u_fpu_setup(int when)80 u_fpu_setup(int when)
81 {
82   if (when <= 0) {
83     fpsetmask(FP_X_INV | FP_X_DZ | FP_X_OFL);  /* 0x1c */
84     fpsetfastmode(1);    /* fast underflows */
85   }
86 }
87 
88 #elif defined(FPU_IRIX)
89 
90 /* man pages: handle_sigfpes, note lethal TRAP_FPE environment variable
91  * library: -lfpe
92  * note: earlier versions used get_fpc_csr/set_fpc_csr?, sys/fpu.h */
93 #include <sigfpe.h>
94 void
u_fpu_setup(int when)95 u_fpu_setup(int when)
96 {
97   if (when < 0) {
98     extern void u_sigfpe(int sig);  /* from handler.c (or fputest.c) */
99     handle_sigfpes(_ON, _EN_OVERFL|_EN_DIVZERO|_EN_INVALID,
100                    (void (*)())0, _USER_HANDLER, (void (*)())&u_sigfpe);
101   }
102 }
103 
104 #elif defined(FPU_SOLARIS)
105 
106 /* man pages: fpsetmask
107  *    Sun's -fnonstd compiler switch switches between __fnonstd.o
108  *      and __fstd.o under Solaris, as far as I can tell.  Use FPU_IGNORE
109  *        if you do this.  */
110 #include <ieeefp.h>
111 void
u_fpu_setup(int when)112 u_fpu_setup(int when)
113 {
114   if (when < 0) {
115     fpsetmask(FP_X_INV | FP_X_DZ | FP_X_OFL);
116     /* this doesn't set the "nonstandard arithmetic" bit, which prevents
117      * software emulation of IEEE gradual underflow
118      * -- apparently no way to do this in libc (see FPU_GCC_SPARC) */
119   }
120 }
121 
122 #elif defined(FPU_SUN4)
123 
124 /* man pages: ieee_handler
125  *               nonstandard_arithmetic is undocumented, but rumored
126  *               to be important to get rapid underflows
127  * library: -lsunmath (under /usr/lang hierarchy)
128  *          may also be in -lm (standard libm)?
129  *   note: libsunmath.a is provided by Sun only if you purchase their
130  *         compilers; if you are trying to compile with gcc on a SPARC
131  *         architecture, try FPU_GCC_SPARC
132  *    Sun's -fnonstd compiler switch buggers crt1.o under SunOS 4,
133  *      as far as I can tell.  Use FPU_IGNORE if you do this
134  *      (not possible with gcc?).  */
135 void
u_fpu_setup(int when)136 u_fpu_setup(int when)
137 {
138   if (when < 0) {
139     extern void u_sigfpe(int sig);  /* from handler.c (or fputest.c) */
140     nonstandard_arithmetic();
141     ieee_handler("set","common", &u_sigfpe);
142   }
143 }
144 
145 #elif defined(FPU_UNICOS)
146 
147 /* delivers SIGFPE by default, this just arranges to trap on
148  * libm errors as well */
149 void
u_fpu_setup(int when)150 u_fpu_setup(int when)
151 {
152   if (when < 0) {
153     int flag = -1;
154     libmset(&flag);
155   }
156 }
157 
158 #elif defined(FPU_GCC_X86)
159 /* This branch is now preferred over FPU_GNU_FENV or FPU_GNU_I86 for
160  * modern Intel Pentium and AMD machines.  The GNU fenv.h extension
161  * will unmask interrupts properly, but provides no means for setting
162  * the flush-to-zero and denormals-are-zero bits required for high
163  * performance with the SSE/SSE2 floating point unit.
164  */
165 # undef X86_PREFIX
166 # ifdef FPU_GCC_X86_64
167 #  define X86_PREFIX "r"
168 # else
169 #  define X86_PREFIX "e"
170 # endif
171 
172 static unsigned int u_fpu_detect(void);
173 static unsigned int u_mxcsr_mask = 0;
174 static unsigned int u_fpu_features = 0x8000;
175 /* 0x001   x87 fpu on chip
176  * 0x002   mmx technology
177  * 0x004   fxsave/fxrestor instructions present
178  * 0x008   sse extensions
179  * 0x010   sse2 extensions
180  * 0x020   sse3 extensions
181  * 0x040   daz supported
182  */
183 static unsigned int
u_fpu_detect(void)184 u_fpu_detect(void)
185 {
186   unsigned int features = 0;
187   unsigned int x, y;
188   __asm __volatile ("pushf\n\t pop %%"X86_PREFIX"ax" : "=a" (x));
189   y = x;          /* set x and y to original value of eflags */
190   x ^= 0x200000;  /* flip bit 21, ID */
191   __asm __volatile ("push %%"X86_PREFIX"ax\n\t popf" : : "a" (x));
192   __asm __volatile ("pushf\n\t pop %%"X86_PREFIX"ax" : "=a" (x));
193   if (x ^ y) {
194     unsigned long ecx, edx;
195     /* this cpu has the cpuid instruction, restore original eflags */
196     __asm __volatile ("push %%"X86_PREFIX"ax\n\t popf" : : "a" (y));
197     /* get mmx, sse related feature bits from cpuid */
198     __asm __volatile ("mov %%"X86_PREFIX"bx, %%"X86_PREFIX"si \n\t"
199                       "cpuid \n\t"
200                       "xchg %%"X86_PREFIX"bx, %%"X86_PREFIX"si"
201                       : "=c" (ecx), "=d" (edx) : "a" (1) : "si");
202     features = (edx & 1) | ((edx & 0x7800000) >> 22) | ((ecx & 1) >> 5);
203     if ((features & 0x004) && (features & 0x018)) {
204       /* get mxcsr_mask to find out if DAZ supported */
205       unsigned char pstate[528], *state;
206       int i;
207       for (state=pstate,i=0 ; i<528 ; i++) *state++ = '\0';
208       /* get a 16-byte aligned state buffer for fxsave */
209       state = pstate + 15;
210       state = ((state - (unsigned char *)0)&(~0xfL)) + (unsigned char *)0;
211       __asm __volatile ("fxsave %0" : : "m" (*state));
212       u_mxcsr_mask = *((unsigned int *)(state + 28));
213       if (!u_mxcsr_mask) u_mxcsr_mask = 0xffbf;
214       else if (u_mxcsr_mask & 0x40) features |= 0x040;
215     }
216   }
217   return features;
218 }
219 
220 /* interrupt mask bits for x87 and sse (xmm) fpu's */
221 /* typical initial values: fctrl=0x037f  mxcsr=0x1f80 */
222 #define U_FPU_X87CW 0x1372
223 #define U_FPU_MXCSR 0x9940
224 /*   bit meaning         x87      sse      yes=mask=1, no=deliver=0
225  * flush to zero          --     0x8000       yes
226  * infinity control     0x1000     --    (for 287 coprocessor compatibility)
227  * rounding control     0x0c00   0x6000        0
228  * precison control     0x0300     --    (0x0300 means 64-bit extended)
229  * precision mask       0x0020   0x1000       yes
230  * underflow mask       0x0010   0x0800       yes
231  * overflow mask        0x0008   0x0400       no
232  * zero divide mask     0x0004   0x0200       no
233  * denormal op mask     0x0002   0x0100       yes
234  * invalid op mask      0x0001   0x0080       no
235  * denormals are zero     --     0x0040       yes
236  *   sse exception bits 0x003f correspond to mask bits 0x1f80  (>>7)
237  */
238 void
u_fpu_setup(int when)239 u_fpu_setup(int when)
240 {
241   if (u_fpu_features == 0x8000)
242     u_fpu_features = u_fpu_detect();
243 # if defined(__CYGWIN__)
244   __asm__ ("fclex" : : );  /* clear i87 fp exception bits */
245   if (u_mxcsr_mask) {
246     /* clear sse fp exception bits as a side-effect */
247     unsigned int mxcsr = U_FPU_MXCSR & u_mxcsr_mask;
248     __asm__ ("ldmxcsr %0" : : "m" (mxcsr));
249   }
250   if (when<0) {
251 # elif defined(__NeXT)
252   if (when<=0) {
253 # else
254   if (when) {
255 # endif
256     unsigned int fpucw = U_FPU_X87CW;
257     __asm__ ("fldcw %0" : : "m" (fpucw));
258 # if ! defined(__CYGWIN__)
259     if (u_mxcsr_mask) {
260       unsigned int mxcsr = U_FPU_MXCSR & u_mxcsr_mask;
261       __asm__ ("ldmxcsr %0" : : "m" (mxcsr));
262       /* get mxscr: __asm__ ("stmxcsr %0" : : "m" (mxcsr)); */
263     }
264 # endif
265   }
266 }
267 
268 #elif defined(FPU_GNU_FENV)
269 
270 /* GCC enhanced C9X fenv.h interface by adding feenableexcept */
271 #ifndef _GNU_SOURCE
272 #define _GNU_SOURCE 1
273 #endif
274 #include <fenv.h>
275 void
276 u_fpu_setup(int when)
277 {
278   if (when <= 0)
279     feenableexcept(FE_DIVBYZERO | FE_OVERFLOW | FE_INVALID);
280 }
281 
282 #elif defined(FPU_GCC_I86)
283 
284 /* see also: fpu_control.h or i386/fpu_control.h, __setfpucw function */
285 void
286 u_fpu_setup(int when)
287 {
288 # if defined(__CYGWIN__)
289   __asm__ ("fclex" : : );
290   if (when<0) {
291 # elif defined(__NeXT)
292   if (when<=0) {
293 # else
294   if (when) {
295 # endif
296     unsigned int fpucw = 0x1372;
297 # ifndef __CYGWIN__
298     /* CygWin currently broken (20/Aug/04) so treat as FPU_IGNORE */
299     __asm__ ("fldcw %0" : : "m" (fpucw));
300 # endif
301   }
302 }
303 
304 #elif defined(FPU_GCC_POWERPC)
305 
306 void
307 u_fpu_setup(int when)
308 {
309   if (when) {
310     unsigned int tmp[2] __attribute__ ((__aligned__(8)));
311     tmp[0] = 0xFFF80000; /* More-or-less arbitrary; this is a QNaN. */
312     tmp[1] = 0xd0;
313     __asm__ ("lfd 0,%0; mtfsf 255,0" : : "m" (*tmp) : "fr0");
314   }
315 }
316 
317 #elif defined(FPU_GCC_SPARC)
318 
319 void
320 u_fpu_setup(int when)
321 {
322   if (when < 0) {
323     unsigned int fpucw = 0xd400000;  /* the 4 is nonstandard arithmetic bit */
324     __asm__ ("ld %0,%%fsr" : : "m" (fpucw));
325   }
326 }
327 
328 #elif defined(FPU_GCC_M68K)
329 
330 /* works on NeXT as well as m68k Linux */
331 void
332 u_fpu_setup(int when)
333 {
334   if (when <= 0) {
335     asm("fmovel     #0x7400,fpcr");   /* set OVFL and ZD bits */
336     /* unsigned int fpucw = 0x7400;
337      * __asm__ volatile ("fmove%.l %0, %!" : : "dm" (fpucw)); */
338     /* includes bit to trap on signalling NaN (may affect libm behavior) */
339   }
340 }
341 
342 #elif defined(FPU_GCC_ARM)
343 
344 void
345 u_fpu_setup(int when)
346 {
347   if (when <= 0) {
348     unsigned int fpucw = 0x70200;
349     __asm__ ("wfs %0" : : "r" (fpucw));
350     /* includes bit to trap on signalling NaN (may affect libm behavior) */
351   }
352 }
353 
354 #elif defined(FPU_IGNORE)
355 
356 void
357 u_fpu_setup(int when)
358 {
359 }
360 
361 #elif defined(FPU_MACOSX)
362 
363 /* MacOS X 10.3.0, 10.3.1, and 10.3.2 (at least) shipped with
364  * broken libm (/usr/lib/libm.dylib), for which the functions
365  * sqrt, tanh, sinh cause incorrect SIGFPE for legal arguments
366  * when FPEs are enabled.  See README.fpu for more information.
367  */
368 
369 #include <architecture/ppc/fp_regs.h>
370 
371 # ifdef FPU_MACOSX_10_1
372 /* for Darwin version 6.0 (MacOS X 10.2) FE0=FE1=1 initially
373  * for Darwin version 5.5 (MacOS X <=10.1) FE0=FE1=0 initially
374  * Darwin 5.5 resets MSR to FE0=FE1=0 after each SIGFPE
375  * A thread cannot set its own MSR, so we have to create a second thread
376  * to change our MSR to a value which permits FPE unmasking bits in SCR
377  * to have any effect (yuck).
378  */
379 #include <mach/mach.h>
380 #include <pthread.h>
381 
382 static void *fpu_fpe_enable(void *arg);
383 #define FE0_MASK (1<<11)
384 #define FE1_MASK (1<<8)
385 /* FE0  FE1   exceptions enabled if either FE0 or FE1 set
386  *  0    0    -- floating-point exceptions disabled
387  *  0    1    -- floating-point imprecise nonrecoverable
388  *  1    0    -- floating-point imprecise recoverable
389  *  1    1    -- floating-point precise mode
390  */
391 
392 /* a thread cannot get or set its own MSR bits */
393 static void *
394 fpu_fpe_enable(void *arg)
395 {
396   thread_t t = *(thread_t *)arg;
397   struct ppc_thread_state state;
398   unsigned int state_size = PPC_THREAD_STATE_COUNT;
399   if (thread_get_state(t, PPC_THREAD_STATE,
400                        (natural_t *)&state, &state_size) == KERN_SUCCESS) {
401     state.srr1 |= FE1_MASK;
402     state.srr1 &= ~FE0_MASK;
403     thread_set_state(t, PPC_THREAD_STATE, (natural_t *)&state, state_size);
404   }
405   return 0;
406 }
407 # endif
408 
409 void
410 u_fpu_setup(int when)
411 {
412   static volatile int looping = 0;
413   if (when) {
414     ppc_fp_scr_t r = get_fp_scr();
415     /* turn off exception bits to prevent immediate re-fault */
416     r.fx = r.fex = r.vx = r.ox = r.ux = r.zx = r.xx = r.vx_snan = r.vx_isi =
417       r.vx_idi = r.vx_zdz = r.vx_imz = r.vx_xvc = r.vx_cvi = r.vx_soft = 0;
418     /* rsvd2 is actually vx_sqrt, set by fsqrt instruction
419      * fsqrt is optional, not present on G4 and earlier Macs (but on G5)
420      */
421     r.rsvd2 = 0;
422     /* these only have to be set once, but may as well set anyway */
423     r.ve = 1;  /* invalid */
424     r.oe = 1;  /* overflow */
425     r.ue = 0;  /* underflow */
426     r.ze = 1;  /* zero divide */
427     r.xe = 0;  /* inexact */
428     if (!looping) {
429       looping |= 1;
430       set_fp_scr(r);
431       looping &= ~1;
432     }
433   }
434 # ifdef FPU_MACOSX_10_1
435   if (when <= 0) {
436     thread_t self = mach_thread_self();
437     pthread_t enabler;
438     if (!looping) {
439       looping |= 2;
440       if (!pthread_create(&enabler, 0, fpu_fpe_enable, &self))
441         pthread_join(enabler, 0);
442       looping &= ~2;
443     }
444   }
445 # endif
446   looping = 0;
447 }
448 
449 #else
450 
451 #error <read play/unix/README.fpu for help>
452 
453 #endif
454