1 ///////////////////////////////////////////////////////////////////////////
2 //
3 // Copyright (c) 1997, Industrial Light & Magic, a division of Lucas
4 // Digital Ltd. LLC
5 //
6 // All rights reserved.
7 //
8 // Redistribution and use in source and binary forms, with or without
9 // modification, are permitted provided that the following conditions are
10 // met:
11 // *       Redistributions of source code must retain the above copyright
12 // notice, this list of conditions and the following disclaimer.
13 // *       Redistributions in binary form must reproduce the above
14 // copyright notice, this list of conditions and the following disclaimer
15 // in the documentation and/or other materials provided with the
16 // distribution.
17 // *       Neither the name of Industrial Light & Magic nor the names of
18 // its contributors may be used to endorse or promote products derived
19 // from this software without specific prior written permission.
20 //
21 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 //
33 ///////////////////////////////////////////////////////////////////////////
34 
35 
36 //------------------------------------------------------------------------
37 //
38 //	Functions to control floating point exceptions.
39 //
40 //------------------------------------------------------------------------
41 
42 #include "IexMathFpu.h"
43 
44 #include <stdint.h>
45 #include <IlmBaseConfig.h>
46 #include <stdio.h>
47 
48 #if 0
49     #include <iostream>
50     #define debug(x) (std::cout << x << std::flush)
51 #else
52     #define debug(x)
53 #endif
54 
55 #if defined(HAVE_UCONTEXT_H) && (defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86))
56 
57 #include <ucontext.h>
58 #include <signal.h>
59 #include <iostream>
60 #include <stdint.h>
61 
62 
63 IEX_INTERNAL_NAMESPACE_SOURCE_ENTER
64 
65 
66 
67 namespace FpuControl
68 {
69 
70 //-------------------------------------------------------------------
71 //
72 //    Modern x86 processors and all AMD64 processors have two
73 //    sets of floating-point control/status registers: cw and sw
74 //    for legacy x87 stack-based arithmetic, and mxcsr for
75 //    SIMD arithmetic.  When setting exception masks or checking
76 //    for exceptions, we must set/check all relevant registers,
77 //    since applications may contain code that uses either FP
78 //    model.
79 //
80 //    These functions handle both FP models for x86 and AMD64.
81 //
82 //-------------------------------------------------------------------
83 
84 //-------------------------------------------------------------------
85 //
86 //    Restore the control register state from a signal handler
87 //    user context, optionally clearing the exception bits
88 //    in the restored control register, if applicable.
89 //
90 //-------------------------------------------------------------------
91 
92 void restoreControlRegs (const ucontext_t & ucon,
93 			 bool clearExceptions = false);
94 
95 
96 //------------------------------------------------------------
97 //
98 //    Set exception mask bits in the control register state.
99 //    A value of 1 means the exception is masked, a value of
100 //    0 means the exception is enabled.
101 //
102 //    setExceptionMask returns the previous mask value.  If
103 //    the 'exceptions' pointer is non-null, it returns in
104 //    this argument the FPU exception bits.
105 //
106 //------------------------------------------------------------
107 
108 const int INVALID_EXC   = (1<<0);
109 const int DENORMAL_EXC  = (1<<1);
110 const int DIVZERO_EXC   = (1<<2);
111 const int OVERFLOW_EXC  = (1<<3);
112 const int UNDERFLOW_EXC = (1<<4);
113 const int INEXACT_EXC   = (1<<5);
114 const int ALL_EXC       = INVALID_EXC  | DENORMAL_EXC  | DIVZERO_EXC |
115                           OVERFLOW_EXC | UNDERFLOW_EXC | INEXACT_EXC;
116 
117 int setExceptionMask (int mask, int * exceptions = 0);
118 int getExceptionMask ();
119 
120 
121 //---------------------------------------------
122 //
123 //    Get/clear the exception bits in the FPU.
124 //
125 //---------------------------------------------
126 
127 int  getExceptions ();
128 void clearExceptions ();
129 
130 
131 //------------------------------------------------------------------
132 //
133 //    Everything below here is implementation.  Do not use these
134 //    constants or functions in your applications or libraries.
135 //    This is not the code you're looking for.  Move along.
136 //
137 //    Optimization notes -- on a Pentium 4, at least, it appears
138 //    to be faster to get the mxcsr first and then the cw; and to
139 //    set the cw first and then the mxcsr.  Also, it seems to
140 //    be faster to clear the sw exception bits after setting
141 //    cw and mxcsr.
142 //
143 //------------------------------------------------------------------
144 
145 static inline uint16_t
getSw()146 getSw ()
147 {
148     uint16_t sw;
149     asm volatile ("fnstsw %0" : "=m" (sw) : );
150     return sw;
151 }
152 
153 static inline void
setCw(uint16_t cw)154 setCw (uint16_t cw)
155 {
156     asm volatile ("fldcw %0" : : "m" (cw) );
157 }
158 
159 static inline uint16_t
getCw()160 getCw ()
161 {
162     uint16_t cw;
163     asm volatile ("fnstcw %0" : "=m" (cw) : );
164     return cw;
165 }
166 
167 static inline void
setMxcsr(uint32_t mxcsr,bool clearExceptions)168 setMxcsr (uint32_t mxcsr, bool clearExceptions)
169 {
170     mxcsr &= clearExceptions ? 0xffffffc0 : 0xffffffff;
171     asm volatile ("ldmxcsr %0" : : "m" (mxcsr) );
172 }
173 
174 static inline uint32_t
getMxcsr()175 getMxcsr ()
176 {
177     uint32_t mxcsr;
178     asm volatile ("stmxcsr %0" : "=m" (mxcsr) : );
179     return mxcsr;
180 }
181 
182 static inline int
calcMask(uint16_t cw,uint32_t mxcsr)183 calcMask (uint16_t cw, uint32_t mxcsr)
184 {
185     //
186     // Hopefully, if the user has been using FpuControl functions,
187     // the masks are the same, but just in case they're not, we
188     // AND them together to report the proper subset of the masks.
189     //
190 
191     return (cw & ALL_EXC) & ((mxcsr >> 7) & ALL_EXC);
192 }
193 
194 inline int
setExceptionMask(int mask,int * exceptions)195 setExceptionMask (int mask, int * exceptions)
196 {
197     uint16_t cw = getCw ();
198     uint32_t mxcsr = getMxcsr ();
199 
200     if (exceptions)
201 	*exceptions = (mxcsr & ALL_EXC) | (getSw () & ALL_EXC);
202 
203     int oldmask = calcMask (cw, mxcsr);
204 
205     //
206     // The exception constants are chosen very carefully so that
207     // we can do a simple mask and shift operation to insert
208     // them into the control words.  The mask operation is for
209     // safety, in case the user accidentally set some other
210     // bits in the exception mask.
211     //
212 
213     mask &= ALL_EXC;
214     cw = (cw & ~ALL_EXC) | mask;
215     mxcsr = (mxcsr & ~(ALL_EXC << 7)) | (mask << 7);
216 
217     setCw (cw);
218     setMxcsr (mxcsr, false);
219 
220     return oldmask;
221 }
222 
223 inline int
getExceptionMask()224 getExceptionMask ()
225 {
226     uint32_t mxcsr = getMxcsr ();
227     uint16_t cw = getCw ();
228     return calcMask (cw, mxcsr);
229 }
230 
231 inline int
getExceptions()232 getExceptions ()
233 {
234     return (getMxcsr () | getSw ()) & ALL_EXC;
235 }
236 
237 void
clearExceptions()238 clearExceptions ()
239 {
240     uint32_t mxcsr = getMxcsr () & 0xffffffc0;
241     asm volatile ("ldmxcsr %0\n"
242 		  "fnclex"
243 		  : : "m" (mxcsr) );
244 }
245 
246 // If the fpe was taken while doing a float-to-int cast using the x87,
247 // the rounding mode and possibly the precision will be wrong.  So instead
248 // of restoring to the state as of the fault, we force the rounding mode
249 // to be 'nearest' and the precision to be double extended.
250 //
251 // rounding mode is in bits 10-11, value 00 == round to nearest
252 // precision is in bits 8-9, value 11 == double extended (80-bit)
253 //
254 const uint16_t cwRestoreMask = ~((3 << 10) | (3 << 8));
255 const uint16_t cwRestoreVal = (0 << 10) | (3 << 8);
256 
257 
258 #ifdef ILMBASE_HAVE_CONTROL_REGISTER_SUPPORT
259 
260 inline void
restoreControlRegs(const ucontext_t & ucon,bool clearExceptions)261 restoreControlRegs (const ucontext_t & ucon, bool clearExceptions)
262 {
263     setCw ((ucon.uc_mcontext.fpregs->cwd & cwRestoreMask) | cwRestoreVal);
264     setMxcsr (ucon.uc_mcontext.fpregs->mxcsr, clearExceptions);
265 }
266 
267 #else
268 
269 //
270 // Ugly, the mxcsr isn't defined in GNU libc ucontext_t, but
271 // it's passed to the signal handler by the kernel.  Use
272 // the kernel's version of the ucontext to get it, see
273 // <asm/sigcontext.h>
274 //
275 
276 #include <asm/sigcontext.h>
277 
278 inline void
restoreControlRegs(const ucontext_t & ucon,bool clearExceptions)279 restoreControlRegs (const ucontext_t & ucon, bool clearExceptions)
280 {
281     setCw ((ucon.uc_mcontext.fpregs->cw & cwRestoreMask) | cwRestoreVal);
282 
283     _fpstate * kfp = reinterpret_cast<_fpstate *> (ucon.uc_mcontext.fpregs);
284     setMxcsr (kfp->magic == 0 ? kfp->mxcsr : 0, clearExceptions);
285 }
286 
287 #endif
288 
289 } // namespace FpuControl
290 
291 
292 namespace {
293 
294 volatile FpExceptionHandler fpeHandler = 0;
295 
296 extern "C" void
catchSigFpe(int sig,siginfo_t * info,ucontext_t * ucon)297 catchSigFpe (int sig, siginfo_t *info, ucontext_t *ucon)
298 {
299     debug ("catchSigFpe (sig = "<< sig << ", ...)\n");
300 
301     FpuControl::restoreControlRegs (*ucon, true);
302 
303     if (fpeHandler == 0)
304 	return;
305 
306     if (info->si_code == SI_USER)
307     {
308 	fpeHandler (0, "Floating-point exception, caused by "
309 		       "a signal sent from another process.");
310 	return;
311     }
312 
313     if (sig == SIGFPE)
314     {
315 	switch (info->si_code)
316 	{
317 	  //
318 	  // IEEE 754 floating point exceptions:
319 	  //
320 
321 	  case FPE_FLTDIV:
322 	    fpeHandler (IEEE_DIVZERO, "Floating-point division by zero.");
323 	    return;
324 
325 	  case FPE_FLTOVF:
326 	    fpeHandler (IEEE_OVERFLOW, "Floating-point overflow.");
327 	    return;
328 
329 	  case FPE_FLTUND:
330 	    fpeHandler (IEEE_UNDERFLOW, "Floating-point underflow.");
331 	    return;
332 
333 	  case FPE_FLTRES:
334 	    fpeHandler (IEEE_INEXACT, "Inexact floating-point result.");
335 	    return;
336 
337 	  case FPE_FLTINV:
338 	    fpeHandler (IEEE_INVALID, "Invalid floating-point operation.");
339 	    return;
340 
341 	  //
342 	  // Other arithmetic exceptions which can also
343 	  // be trapped by the operating system:
344 	  //
345 
346 	  case FPE_INTDIV:
347 	    fpeHandler (0, "Integer division by zero.");
348 	    break;
349 
350 	  case FPE_INTOVF:
351 	    fpeHandler (0, "Integer overflow.");
352 	    break;
353 
354 	  case FPE_FLTSUB:
355 	    fpeHandler (0, "Subscript out of range.");
356 	    break;
357 	}
358     }
359 
360     fpeHandler (0, "Floating-point exception.");
361 }
362 
363 } // namespace
364 
365 void
setFpExceptions(int when)366 setFpExceptions (int when)
367 {
368     int mask = FpuControl::ALL_EXC;
369 
370     if (when & IEEE_OVERFLOW)
371 	mask &= ~FpuControl::OVERFLOW_EXC;
372     if (when & IEEE_UNDERFLOW)
373 	mask &= ~FpuControl::UNDERFLOW_EXC;
374     if (when & IEEE_DIVZERO)
375 	mask &= ~FpuControl::DIVZERO_EXC;
376     if (when & IEEE_INEXACT)
377 	mask &= ~FpuControl::INEXACT_EXC;
378     if (when & IEEE_INVALID)
379 	mask &= ~FpuControl::INVALID_EXC;
380 
381     //
382     // The Linux kernel apparently sometimes passes
383     // incorrect si_info to signal handlers unless
384     // the exception flags are cleared.
385     //
386     // XXX is this still true on 2.4+ kernels?
387     //
388 
389     FpuControl::setExceptionMask (mask);
390     FpuControl::clearExceptions ();
391 }
392 
393 
394 int
fpExceptions()395 fpExceptions ()
396 {
397     int mask = FpuControl::getExceptionMask ();
398 
399     int when = 0;
400 
401     if (!(mask & FpuControl::OVERFLOW_EXC))
402 	when |= IEEE_OVERFLOW;
403     if (!(mask & FpuControl::UNDERFLOW_EXC))
404 	when |= IEEE_UNDERFLOW;
405     if (!(mask & FpuControl::DIVZERO_EXC))
406 	when |= IEEE_DIVZERO;
407     if (!(mask & FpuControl::INEXACT_EXC))
408 	when |= IEEE_INEXACT;
409     if (!(mask & FpuControl::INVALID_EXC))
410 	when |= IEEE_INVALID;
411 
412     return when;
413 }
414 
415 void
handleExceptionsSetInRegisters()416 handleExceptionsSetInRegisters()
417 {
418     if (fpeHandler == 0)
419 	return;
420 
421     int mask = FpuControl::getExceptionMask ();
422 
423     int exc = FpuControl::getExceptions();
424 
425     if (!(mask & FpuControl::DIVZERO_EXC) && (exc & FpuControl::DIVZERO_EXC))
426     {
427         fpeHandler(IEEE_DIVZERO, "Floating-point division by zero.");
428         return;
429     }
430 
431     if (!(mask & FpuControl::OVERFLOW_EXC) && (exc & FpuControl::OVERFLOW_EXC))
432     {
433         fpeHandler(IEEE_OVERFLOW, "Floating-point overflow.");
434         return;
435     }
436 
437     if (!(mask & FpuControl::UNDERFLOW_EXC) && (exc & FpuControl::UNDERFLOW_EXC))
438     {
439         fpeHandler(IEEE_UNDERFLOW, "Floating-point underflow.");
440         return;
441     }
442 
443     if (!(mask & FpuControl::INEXACT_EXC) && (exc & FpuControl::INEXACT_EXC))
444     {
445         fpeHandler(IEEE_INEXACT, "Inexact floating-point result.");
446         return;
447     }
448 
449     if (!(mask & FpuControl::INVALID_EXC) && (exc & FpuControl::INVALID_EXC))
450     {
451         fpeHandler(IEEE_INVALID, "Invalid floating-point operation.");
452         return;
453     }
454 }
455 
456 
457 void
setFpExceptionHandler(FpExceptionHandler handler)458 setFpExceptionHandler (FpExceptionHandler handler)
459 {
460     if (fpeHandler == 0)
461     {
462 	struct sigaction action;
463 	sigemptyset (&action.sa_mask);
464 	action.sa_flags = SA_SIGINFO | SA_NOMASK;
465 	action.sa_sigaction = (void (*) (int, siginfo_t *, void *)) catchSigFpe;
466 	action.sa_restorer = 0;
467 
468 	sigaction (SIGFPE, &action, 0);
469     }
470 
471     fpeHandler = handler;
472 }
473 
474 
475 IEX_INTERNAL_NAMESPACE_SOURCE_EXIT
476 
477 
478 #else
479 
480 #include <signal.h>
481 #include <assert.h>
482 
483 IEX_INTERNAL_NAMESPACE_SOURCE_ENTER
484 
485 
486 namespace
487 {
488 	volatile FpExceptionHandler fpeHandler = 0;
489 	void fpExc_(int x)
490 	{
491 	    if (fpeHandler != 0)
492 	    {
493 		fpeHandler(x, "");
494 	    }
495 	    else
496 	    {
497 		assert(0 != "Floating point exception");
498 	    }
499 	}
500 }
501 
502 void
503 setFpExceptions( int )
504 {
505 }
506 
507 
508 void
509 setFpExceptionHandler (FpExceptionHandler handler)
510 {
511     // improve floating point exception handling nanoscopically above "nothing at all"
512     fpeHandler = handler;
513     signal(SIGFPE, fpExc_);
514 }
515 
516 int
517 fpExceptions()
518 {
519     return 0;
520 }
521 
522 void
523 handleExceptionsSetInRegisters()
524 {
525     // No implementation on this platform
526 }
527 
528 IEX_INTERNAL_NAMESPACE_SOURCE_EXIT
529 
530 #endif
531