1 /******************************************************************************
2  *
3  * Project:  CPL - Common Portability Library
4  * Purpose:  CPU features detection
5  * Author:   Even Rouault, <even dot rouault at spatialys dot com>
6  *
7  ******************************************************************************
8  * Copyright (c) 2016, Even Rouault <even dot rouault at spatialys dot com>
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining a
11  * copy of this software and associated documentation files (the "Software"),
12  * to deal in the Software without restriction, including without limitation
13  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
14  * and/or sell copies of the Software, and to permit persons to whom the
15  * Software is furnished to do so, subject to the following conditions:
16  *
17  * The above copyright notice and this permission notice shall be included
18  * in all copies or substantial portions of the Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
21  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26  * DEALINGS IN THE SOFTWARE.
27  ****************************************************************************/
28 
29 #include "cpl_port.h"
30 #include "cpl_string.h"
31 #include "cpl_cpu_features.h"
32 
33 CPL_CVSID("$Id: cpl_cpu_features.cpp 044a11ee4d018f526644c342c3f483ddd1e6d688 2020-12-15 20:56:11 +0100 Even Rouault $")
34 
35 //! @cond Doxygen_Suppress
36 
37 #define CPUID_SSSE3_ECX_BIT     9
38 #define CPUID_OSXSAVE_ECX_BIT   27
39 #define CPUID_AVX_ECX_BIT       28
40 
41 #define CPUID_SSE_EDX_BIT       25
42 
43 #define BIT_XMM_STATE           (1 << 1)
44 #define BIT_YMM_STATE           (2 << 1)
45 
46 #define REG_EAX                 0
47 #define REG_EBX                 1
48 #define REG_ECX                 2
49 #define REG_EDX                 3
50 
51 #if defined(__GNUC__)
52 #if defined(__x86_64)
53 #define GCC_CPUID(level, a, b, c, d)            \
54   __asm__ ("xchgq %%rbx, %q1\n"                 \
55            "cpuid\n"                            \
56            "xchgq %%rbx, %q1"                   \
57        : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \
58        : "0" (level))
59 #else
60 #define GCC_CPUID(level, a, b, c, d)            \
61   __asm__ ("xchgl %%ebx, %1\n"                  \
62            "cpuid\n"                            \
63            "xchgl %%ebx, %1"                    \
64        : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \
65        : "0" (level))
66 #endif
67 
68 #define CPL_CPUID(level, array) GCC_CPUID(level, array[0], array[1], array[2], array[3])
69 
70 #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
71 
72 #include <intrin.h>
73 #define CPL_CPUID(level, array) __cpuid(array, level)
74 
75 #endif
76 
77 #if defined(HAVE_SSE_AT_COMPILE_TIME) && !defined(HAVE_INLINE_SSE)
78 
79 /************************************************************************/
80 /*                          CPLHaveRuntimeSSE()                         */
81 /************************************************************************/
82 
CPLHaveRuntimeSSE()83 bool CPLHaveRuntimeSSE()
84 {
85     int cpuinfo[4] = { 0, 0, 0, 0 };
86     CPL_CPUID(1, cpuinfo);
87     return (cpuinfo[REG_EDX] & (1 << CPUID_SSE_EDX_BIT)) != 0;
88 }
89 
90 #endif
91 
92 #if defined(HAVE_SSSE3_AT_COMPILE_TIME) && !defined(HAVE_INLINE_SSSE3)
93 
94 /************************************************************************/
95 /*                         CPLHaveRuntimeSSSE3()                        */
96 /************************************************************************/
97 
CPLDetectSSSE3()98 static inline bool CPLDetectSSSE3()
99 {
100     int cpuinfo[4] = { 0, 0, 0, 0 };
101     CPL_CPUID(1, cpuinfo);
102     return (cpuinfo[REG_ECX] & (1 << CPUID_SSSE3_ECX_BIT)) != 0;
103 }
104 
105 #if defined(__GNUC__) && !defined(DEBUG)
106 bool bCPLHasSSSE3 = false;
107 static void CPLHaveRuntimeSSSE3Initialize() __attribute__ ((constructor));
CPLHaveRuntimeSSSE3Initialize()108 static void CPLHaveRuntimeSSSE3Initialize()
109 {
110     bCPLHasSSSE3 = CPLDetectSSSE3();
111 }
112 #else
CPLHaveRuntimeSSSE3()113 bool CPLHaveRuntimeSSSE3()
114 {
115 #ifdef DEBUG
116     if( !CPLTestBool(CPLGetConfigOption("GDAL_USE_SSSE3", "YES")) )
117         return false;
118 #endif
119     return CPLDetectSSSE3();
120 }
121 #endif
122 
123 #endif // defined(HAVE_SSSE3_AT_COMPILE_TIME) && !defined(HAVE_INLINE_SSSE3)
124 
125 #if defined(HAVE_AVX_AT_COMPILE_TIME) && !defined(HAVE_INLINE_AVX)
126 
127 /************************************************************************/
128 /*                          CPLHaveRuntimeAVX()                         */
129 /************************************************************************/
130 
131 #if defined(__GNUC__)
132 
CPLDetectRuntimeAVX()133 static bool CPLDetectRuntimeAVX()
134 {
135     int cpuinfo[4] = { 0, 0, 0, 0 };
136     CPL_CPUID(1, cpuinfo);
137 
138     // Check OSXSAVE feature.
139     if( (cpuinfo[REG_ECX] & (1 << CPUID_OSXSAVE_ECX_BIT)) == 0 )
140     {
141         return false;
142     }
143 
144     // Check AVX feature.
145     if( (cpuinfo[REG_ECX] & (1 << CPUID_AVX_ECX_BIT)) == 0 )
146     {
147         return false;
148     }
149 
150     // Issue XGETBV and check the XMM and YMM state bit.
151     unsigned int nXCRLow;
152     unsigned int nXCRHigh;
153     __asm__ ("xgetbv" : "=a" (nXCRLow), "=d" (nXCRHigh) : "c" (0));
154     if( (nXCRLow & ( BIT_XMM_STATE | BIT_YMM_STATE )) !=
155                 ( BIT_XMM_STATE | BIT_YMM_STATE ) )
156     {
157         return false;
158     }
159     CPL_IGNORE_RET_VAL(nXCRHigh); // unused
160 
161     return true;
162 }
163 
164 bool bCPLHasAVX = false;
165 static void CPLHaveRuntimeAVXInitialize() __attribute__ ((constructor));
CPLHaveRuntimeAVXInitialize()166 static void CPLHaveRuntimeAVXInitialize()
167 {
168     bCPLHasAVX = CPLDetectRuntimeAVX();
169 }
170 
171 #elif defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219) && (defined(_M_IX86) || defined(_M_X64))
172 // _xgetbv available only in Visual Studio 2010 SP1 or later
173 
CPLHaveRuntimeAVX()174 bool CPLHaveRuntimeAVX()
175 {
176     int cpuinfo[4] = { 0, 0, 0, 0 };
177     CPL_CPUID(1, cpuinfo);
178 
179     // Check OSXSAVE feature.
180     if( (cpuinfo[REG_ECX] & (1 << CPUID_OSXSAVE_ECX_BIT)) == 0 )
181     {
182         return false;
183     }
184 
185     // Check AVX feature.
186     if( (cpuinfo[REG_ECX] & (1 << CPUID_AVX_ECX_BIT)) == 0 )
187     {
188         return false;
189     }
190 
191     // Issue XGETBV and check the XMM and YMM state bit.
192     unsigned __int64 xcrFeatureMask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
193     if( (xcrFeatureMask & ( BIT_XMM_STATE | BIT_YMM_STATE )) !=
194                           ( BIT_XMM_STATE | BIT_YMM_STATE ) )
195     {
196         return false;
197     }
198 
199     return true;
200 }
201 
202 #else
203 
CPLHaveRuntimeAVX()204 bool CPLHaveRuntimeAVX()
205 {
206     return false;
207 }
208 
209 #endif
210 
211 #endif // defined(HAVE_AVX_AT_COMPILE_TIME) && !defined(CPLHaveRuntimeAVX)
212 
213 //! @endcond
214