1 /* 2 * Copyright (C) 2005, 2007, 2009, 2011 Free Software Foundation, Inc. 3 * 4 * This file is free software; you can redistribute it and/or modify it 5 * under the terms of the GNU General Public License as published by the 6 * Free Software Foundation; either version 3, or (at your option) any 7 * later version. 8 * 9 * This file is distributed in the hope that it will be useful, but 10 * WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 * General Public License for more details. 13 * 14 * Under Section 7 of GPL version 3, you are granted additional 15 * permissions described in the GCC Runtime Library Exception, version 16 * 3.1, as published by the Free Software Foundation. 17 * 18 * You should have received a copy of the GNU General Public License and 19 * a copy of the GCC Runtime Library Exception along with this program; 20 * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 21 * <http://www.gnu.org/licenses/>. 22 */ 23 24 #define MXCSR_DAZ (1 << 6) /* Enable denormals are zero mode */ 25 #define MXCSR_FTZ (1 << 15) /* Enable flush to zero mode */ 26 27 #ifndef __x86_64__ 28 /* All 64-bit targets have SSE and DAZ; 29 only check them explicitly for 32-bit ones. */ 30 #include "cpuid.h" 31 #endif 32 33 #if !defined __x86_64__ && defined __sun__ && defined __svr4__ 34 #include <signal.h> 35 #include <ucontext.h> 36 37 static volatile sig_atomic_t sigill_caught; 38 39 static void 40 sigill_hdlr (int sig __attribute((unused)), 41 siginfo_t *sip __attribute__((unused)), 42 ucontext_t *ucp) 43 { 44 sigill_caught = 1; 45 /* Set PC to the instruction after the faulting one to skip over it, 46 otherwise we enter an infinite loop. 3 is the size of the movaps 47 instruction. */ 48 ucp->uc_mcontext.gregs[EIP] += 3; 49 setcontext (ucp); 50 } 51 #endif 52 53 static void __attribute__((constructor)) 54 #ifndef __x86_64__ 55 /* The i386 ABI only requires 4-byte stack alignment, so this is necessary 56 to make sure the fxsave struct gets correct alignment. 57 See PR27537 and PR28621. */ 58 __attribute__ ((force_align_arg_pointer)) 59 #endif 60 set_fast_math (void) 61 { 62 #ifndef __x86_64__ 63 unsigned int eax, ebx, ecx, edx; 64 65 if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx)) 66 return; 67 68 if (edx & bit_SSE) 69 { 70 unsigned int mxcsr; 71 72 #if defined __sun__ && defined __svr4__ 73 /* Solaris 2 before Solaris 9 4/04 cannot execute SSE instructions even 74 if the CPU supports them. Programs receive SIGILL instead, so check 75 for that at runtime. */ 76 struct sigaction act, oact; 77 78 act.sa_handler = sigill_hdlr; 79 sigemptyset (&act.sa_mask); 80 /* Need to set SA_SIGINFO so a ucontext_t * is passed to the handler. */ 81 act.sa_flags = SA_SIGINFO; 82 sigaction (SIGILL, &act, &oact); 83 84 /* We need a single SSE instruction here so the handler can safely skip 85 over it. */ 86 __asm__ volatile ("movaps %xmm0,%xmm0"); 87 88 sigaction (SIGILL, &oact, NULL); 89 90 if (sigill_caught) 91 return; 92 #endif /* __sun__ && __svr4__ */ 93 94 mxcsr = __builtin_ia32_stmxcsr () | MXCSR_FTZ; 95 96 if (edx & bit_FXSAVE) 97 { 98 /* Check if DAZ is available. */ 99 struct 100 { 101 unsigned short int cwd; 102 unsigned short int swd; 103 unsigned short int twd; 104 unsigned short int fop; 105 long int fip; 106 long int fcs; 107 long int foo; 108 long int fos; 109 long int mxcsr; 110 long int mxcsr_mask; 111 long int st_space[32]; 112 long int xmm_space[32]; 113 long int padding[56]; 114 } __attribute__ ((aligned (16))) fxsave; 115 116 __builtin_memset (&fxsave, 0, sizeof (fxsave)); 117 118 asm volatile ("fxsave %0" : "=m" (fxsave) : "m" (fxsave)); 119 120 if (fxsave.mxcsr_mask & MXCSR_DAZ) 121 mxcsr |= MXCSR_DAZ; 122 } 123 124 __builtin_ia32_ldmxcsr (mxcsr); 125 } 126 #else 127 unsigned int mxcsr = __builtin_ia32_stmxcsr (); 128 mxcsr |= MXCSR_DAZ | MXCSR_FTZ; 129 __builtin_ia32_ldmxcsr (mxcsr); 130 #endif 131 } 132