1 /* vim: set shiftwidth=4 tabstop=8 autoindent cindent expandtab: */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3  * License, v. 2.0. If a copy of the MPL was not distributed with this
4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 
6 /* compile-time and runtime tests for whether to use SSE instructions */
7 
8 #include "SSE.h"
9 
10 #ifdef HAVE_CPUID_H
11 // cpuid.h is available on gcc 4.3 and higher on i386 and x86_64
12 #include <cpuid.h>
13 #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64))
14 // MSVC 2005 or newer on x86-32 or x86-64
15 #include <intrin.h>
16 #endif
17 
18 namespace {
19 
20 // SSE.h has parallel #ifs which declare MOZILLA_SSE_HAVE_CPUID_DETECTION.
21 // We can't declare these functions in the header file, however, because
22 // <intrin.h> conflicts with <windows.h> on MSVC 2005, and some files want to
23 // include both SSE.h and <windows.h>.
24 
25 #ifdef HAVE_CPUID_H
26 
27 enum CPUIDRegister { eax = 0, ebx = 1, ecx = 2, edx = 3 };
28 
29 static bool
has_cpuid_bits(unsigned int level,CPUIDRegister reg,unsigned int bits)30 has_cpuid_bits(unsigned int level, CPUIDRegister reg, unsigned int bits)
31 {
32   unsigned int regs[4];
33   unsigned int eax, ebx, ecx, edx;
34   unsigned max = __get_cpuid_max(0, NULL);
35   if (level > max)
36     return false;
37   __cpuid_count(level, 0, eax, ebx, ecx, edx);
38   regs[0] = eax;
39   regs[1] = ebx;
40   regs[2] = ecx;
41   regs[3] = edx;
42   return (regs[reg] & bits) == bits;
43 }
44 
45 #if !defined(MOZILLA_PRESUME_AVX)
xgetbv(uint32_t xcr)46 static uint64_t xgetbv(uint32_t xcr) {
47     uint32_t eax, edx;
48     __asm__ ( ".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(xcr));
49     return (uint64_t)(edx) << 32 | eax;
50 }
51 #endif
52 
53 #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64))
54 
55 enum CPUIDRegister { eax = 0, ebx = 1, ecx = 2, edx = 3 };
56 
57 static bool
58 has_cpuid_bits(unsigned int level, CPUIDRegister reg, unsigned int bits)
59 {
60   // Check that the level in question is supported.
61   int regs[4];
62   __cpuid(regs, level & 0x80000000u);
63   if (unsigned(regs[0]) < level)
64     return false;
65 
66   // "The __cpuid intrinsic clears the ECX register before calling the cpuid instruction."
67   __cpuid(regs, level);
68   return (unsigned(regs[reg]) & bits) == bits;
69 }
70 
71 #if !defined(MOZILLA_PRESUME_AVX)
72 static uint64_t xgetbv(uint32_t xcr) { return _xgetbv(xcr); }
73 #endif
74 
75 #elif (defined(__GNUC__) || defined(__SUNPRO_CC)) && (defined(__i386) || defined(__x86_64__))
76 
77 enum CPUIDRegister { eax = 0, ebx = 1, ecx = 2, edx = 3 };
78 
79 #ifdef __i386
80 static void
81 moz_cpuid(int CPUInfo[4], int InfoType)
82 {
83   asm (
84     "xchg %esi, %ebx\n"
85     "xor %ecx, %ecx\n" // ecx is the sub-leaf (we only ever need 0)
86     "cpuid\n"
87     "movl %eax, (%edi)\n"
88     "movl %ebx, 4(%edi)\n"
89     "movl %ecx, 8(%edi)\n"
90     "movl %edx, 12(%edi)\n"
91     "xchg %esi, %ebx\n"
92     :
93     : "a"(InfoType), // %eax
94       "D"(CPUInfo) // %edi
95     : "%ecx", "%edx", "%esi"
96   );
97 }
98 #else
99 static void
100 moz_cpuid(int CPUInfo[4], int InfoType)
101 {
102   asm (
103     "xchg %rsi, %rbx\n"
104     "xor %ecx, %ecx\n" // ecx is the sub-leaf (we only ever need 0)
105     "cpuid\n"
106     "movl %eax, (%rdi)\n"
107     "movl %ebx, 4(%rdi)\n"
108     "movl %ecx, 8(%rdi)\n"
109     "movl %edx, 12(%rdi)\n"
110     "xchg %rsi, %rbx\n"
111     :
112     : "a"(InfoType), // %eax
113       "D"(CPUInfo) // %rdi
114     : "%ecx", "%edx", "%rsi"
115   );
116 }
117 #endif
118 
119 static bool
120 has_cpuid_bits(unsigned int level, CPUIDRegister reg, unsigned int bits)
121 {
122   // Check that the level in question is supported.
123   volatile int regs[4];
124   moz_cpuid((int *)regs, level & 0x80000000u);
125   if (unsigned(regs[0]) < level)
126     return false;
127 
128   moz_cpuid((int *)regs, level);
129   return (unsigned(regs[reg]) & bits) == bits;
130 }
131 
132 #endif // end CPUID declarations
133 
134 } // namespace
135 
136 namespace mozilla {
137 
138 namespace sse_private {
139 
140 #if defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
141 
142 #if !defined(MOZILLA_PRESUME_MMX)
143   bool mmx_enabled = has_cpuid_bits(1u, edx, (1u<<23));
144 #endif
145 
146 #if !defined(MOZILLA_PRESUME_SSE)
147   bool sse_enabled = has_cpuid_bits(1u, edx, (1u<<25));
148 #endif
149 
150 #if !defined(MOZILLA_PRESUME_SSE2)
151   bool sse2_enabled = has_cpuid_bits(1u, edx, (1u<<26));
152 #endif
153 
154 #if !defined(MOZILLA_PRESUME_SSE3)
155   bool sse3_enabled = has_cpuid_bits(1u, ecx, (1u<<0));
156 #endif
157 
158 #if !defined(MOZILLA_PRESUME_SSSE3)
159   bool ssse3_enabled = has_cpuid_bits(1u, ecx, (1u<<9));
160 #endif
161 
162 #if !defined(MOZILLA_PRESUME_SSE4A)
163   bool sse4a_enabled = has_cpuid_bits(0x80000001u, ecx, (1u<<6));
164 #endif
165 
166 #if !defined(MOZILLA_PRESUME_SSE4_1)
167   bool sse4_1_enabled = has_cpuid_bits(1u, ecx, (1u<<19));
168 #endif
169 
170 #if !defined(MOZILLA_PRESUME_SSE4_2)
171   bool sse4_2_enabled = has_cpuid_bits(1u, ecx, (1u<<20));
172 #endif
173 
174 #if !defined(MOZILLA_PRESUME_AVX) || !defined(MOZILLA_PRESUME_AVX2)
has_avx()175   static bool has_avx()
176   {
177 #if defined(MOZILLA_PRESUME_AVX)
178       return true;
179 #else
180       const unsigned AVX = 1u << 28;
181       const unsigned OSXSAVE = 1u << 27;
182       const unsigned XSAVE = 1u << 26;
183 
184       const unsigned XMM_STATE = 1u << 1;
185       const unsigned YMM_STATE = 1u << 2;
186       const unsigned AVX_STATE = XMM_STATE | YMM_STATE;
187 
188       return has_cpuid_bits(1u, ecx, AVX | OSXSAVE | XSAVE) &&
189           // ensure the OS supports XSAVE of YMM registers
190           (xgetbv(0) & AVX_STATE) == AVX_STATE;
191 #endif // MOZILLA_PRESUME_AVX
192   }
193 #endif // !MOZILLA_PRESUME_AVX || !MOZILLA_PRESUME_AVX2
194 
195 #if !defined(MOZILLA_PRESUME_AVX)
196   bool avx_enabled = has_avx();
197 #endif
198 
199 #if !defined(MOZILLA_PRESUME_AVX2)
200   bool avx2_enabled = has_avx() && has_cpuid_bits(7u, ebx, (1u<<5));
201 #endif
202 
203 #endif
204 
205 } // namespace sse_private
206 } // namespace mozilla
207