1 /******************************************************************************
2 *
3 * Project: CPL - Common Portability Library
4 * Purpose: CPU features detection
5 * Author: Even Rouault, <even dot rouault at spatialys dot com>
6 *
7 ******************************************************************************
8 * Copyright (c) 2016, Even Rouault <even dot rouault at spatialys dot com>
9 *
10 * Permission is hereby granted, free of charge, to any person obtaining a
11 * copy of this software and associated documentation files (the "Software"),
12 * to deal in the Software without restriction, including without limitation
13 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
14 * and/or sell copies of the Software, and to permit persons to whom the
15 * Software is furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included
18 * in all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
21 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26 * DEALINGS IN THE SOFTWARE.
27 ****************************************************************************/
28
29 #include "cpl_port.h"
30 #include "cpl_string.h"
31 #include "cpl_cpu_features.h"
32
33 CPL_CVSID("$Id: cpl_cpu_features.cpp 044a11ee4d018f526644c342c3f483ddd1e6d688 2020-12-15 20:56:11 +0100 Even Rouault $")
34
35 //! @cond Doxygen_Suppress
36
37 #define CPUID_SSSE3_ECX_BIT 9
38 #define CPUID_OSXSAVE_ECX_BIT 27
39 #define CPUID_AVX_ECX_BIT 28
40
41 #define CPUID_SSE_EDX_BIT 25
42
43 #define BIT_XMM_STATE (1 << 1)
44 #define BIT_YMM_STATE (2 << 1)
45
46 #define REG_EAX 0
47 #define REG_EBX 1
48 #define REG_ECX 2
49 #define REG_EDX 3
50
51 #if defined(__GNUC__)
52 #if defined(__x86_64)
53 #define GCC_CPUID(level, a, b, c, d) \
54 __asm__ ("xchgq %%rbx, %q1\n" \
55 "cpuid\n" \
56 "xchgq %%rbx, %q1" \
57 : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \
58 : "0" (level))
59 #else
60 #define GCC_CPUID(level, a, b, c, d) \
61 __asm__ ("xchgl %%ebx, %1\n" \
62 "cpuid\n" \
63 "xchgl %%ebx, %1" \
64 : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \
65 : "0" (level))
66 #endif
67
68 #define CPL_CPUID(level, array) GCC_CPUID(level, array[0], array[1], array[2], array[3])
69
70 #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
71
72 #include <intrin.h>
73 #define CPL_CPUID(level, array) __cpuid(array, level)
74
75 #endif
76
77 #if defined(HAVE_SSE_AT_COMPILE_TIME) && !defined(HAVE_INLINE_SSE)
78
79 /************************************************************************/
80 /* CPLHaveRuntimeSSE() */
81 /************************************************************************/
82
CPLHaveRuntimeSSE()83 bool CPLHaveRuntimeSSE()
84 {
85 int cpuinfo[4] = { 0, 0, 0, 0 };
86 CPL_CPUID(1, cpuinfo);
87 return (cpuinfo[REG_EDX] & (1 << CPUID_SSE_EDX_BIT)) != 0;
88 }
89
90 #endif
91
92 #if defined(HAVE_SSSE3_AT_COMPILE_TIME) && !defined(HAVE_INLINE_SSSE3)
93
94 /************************************************************************/
95 /* CPLHaveRuntimeSSSE3() */
96 /************************************************************************/
97
CPLDetectSSSE3()98 static inline bool CPLDetectSSSE3()
99 {
100 int cpuinfo[4] = { 0, 0, 0, 0 };
101 CPL_CPUID(1, cpuinfo);
102 return (cpuinfo[REG_ECX] & (1 << CPUID_SSSE3_ECX_BIT)) != 0;
103 }
104
105 #if defined(__GNUC__) && !defined(DEBUG)
106 bool bCPLHasSSSE3 = false;
107 static void CPLHaveRuntimeSSSE3Initialize() __attribute__ ((constructor));
CPLHaveRuntimeSSSE3Initialize()108 static void CPLHaveRuntimeSSSE3Initialize()
109 {
110 bCPLHasSSSE3 = CPLDetectSSSE3();
111 }
112 #else
CPLHaveRuntimeSSSE3()113 bool CPLHaveRuntimeSSSE3()
114 {
115 #ifdef DEBUG
116 if( !CPLTestBool(CPLGetConfigOption("GDAL_USE_SSSE3", "YES")) )
117 return false;
118 #endif
119 return CPLDetectSSSE3();
120 }
121 #endif
122
123 #endif // defined(HAVE_SSSE3_AT_COMPILE_TIME) && !defined(HAVE_INLINE_SSSE3)
124
125 #if defined(HAVE_AVX_AT_COMPILE_TIME) && !defined(HAVE_INLINE_AVX)
126
127 /************************************************************************/
128 /* CPLHaveRuntimeAVX() */
129 /************************************************************************/
130
131 #if defined(__GNUC__)
132
CPLDetectRuntimeAVX()133 static bool CPLDetectRuntimeAVX()
134 {
135 int cpuinfo[4] = { 0, 0, 0, 0 };
136 CPL_CPUID(1, cpuinfo);
137
138 // Check OSXSAVE feature.
139 if( (cpuinfo[REG_ECX] & (1 << CPUID_OSXSAVE_ECX_BIT)) == 0 )
140 {
141 return false;
142 }
143
144 // Check AVX feature.
145 if( (cpuinfo[REG_ECX] & (1 << CPUID_AVX_ECX_BIT)) == 0 )
146 {
147 return false;
148 }
149
150 // Issue XGETBV and check the XMM and YMM state bit.
151 unsigned int nXCRLow;
152 unsigned int nXCRHigh;
153 __asm__ ("xgetbv" : "=a" (nXCRLow), "=d" (nXCRHigh) : "c" (0));
154 if( (nXCRLow & ( BIT_XMM_STATE | BIT_YMM_STATE )) !=
155 ( BIT_XMM_STATE | BIT_YMM_STATE ) )
156 {
157 return false;
158 }
159 CPL_IGNORE_RET_VAL(nXCRHigh); // unused
160
161 return true;
162 }
163
164 bool bCPLHasAVX = false;
165 static void CPLHaveRuntimeAVXInitialize() __attribute__ ((constructor));
CPLHaveRuntimeAVXInitialize()166 static void CPLHaveRuntimeAVXInitialize()
167 {
168 bCPLHasAVX = CPLDetectRuntimeAVX();
169 }
170
171 #elif defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219) && (defined(_M_IX86) || defined(_M_X64))
172 // _xgetbv available only in Visual Studio 2010 SP1 or later
173
CPLHaveRuntimeAVX()174 bool CPLHaveRuntimeAVX()
175 {
176 int cpuinfo[4] = { 0, 0, 0, 0 };
177 CPL_CPUID(1, cpuinfo);
178
179 // Check OSXSAVE feature.
180 if( (cpuinfo[REG_ECX] & (1 << CPUID_OSXSAVE_ECX_BIT)) == 0 )
181 {
182 return false;
183 }
184
185 // Check AVX feature.
186 if( (cpuinfo[REG_ECX] & (1 << CPUID_AVX_ECX_BIT)) == 0 )
187 {
188 return false;
189 }
190
191 // Issue XGETBV and check the XMM and YMM state bit.
192 unsigned __int64 xcrFeatureMask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
193 if( (xcrFeatureMask & ( BIT_XMM_STATE | BIT_YMM_STATE )) !=
194 ( BIT_XMM_STATE | BIT_YMM_STATE ) )
195 {
196 return false;
197 }
198
199 return true;
200 }
201
202 #else
203
CPLHaveRuntimeAVX()204 bool CPLHaveRuntimeAVX()
205 {
206 return false;
207 }
208
209 #endif
210
211 #endif // defined(HAVE_AVX_AT_COMPILE_TIME) && !defined(CPLHaveRuntimeAVX)
212
213 //! @endcond
214