1 /********************************************************************************
2 *                                                                               *
3 *                              C P U I D   S u p p o r t                        *
4 *                                                                               *
5 *********************************************************************************
6 * Copyright (C) 1998,2021 by Jeroen van der Zijp.   All Rights Reserved.        *
7 *********************************************************************************
8 * This library is free software; you can redistribute it and/or modify          *
9 * it under the terms of the GNU Lesser General Public License as published by   *
10 * the Free Software Foundation; either version 3 of the License, or             *
11 * (at your option) any later version.                                           *
12 *                                                                               *
13 * This library is distributed in the hope that it will be useful,               *
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of                *
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the                 *
16 * GNU Lesser General Public License for more details.                           *
17 *                                                                               *
18 * You should have received a copy of the GNU Lesser General Public License      *
19 * along with this program.  If not, see <http://www.gnu.org/licenses/>          *
20 ********************************************************************************/
21 #include "xincs.h"
22 #include "fxver.h"
23 #include "fxdefs.h"
24 #include "fxmath.h"
25 #include "fxcpuid.h"
26 
27 
28 /*
29   Notes:
30   - Obtain processor capabilities at runtime.
31   - Utility API's to discover CPU vendor and CPU instruction-set extensions.
32   - Only supported on x86 and x86-64 cpus.
33   - Consult AMD and Intel Programming Manuals for details.
34 */
35 
36 
37 using namespace FX;
38 
39 /*******************************************************************************/
40 
41 namespace FX {
42 
43 
44 // Return number of levels of CPUID feature-requests supported
fxCPUCaps(FXuint level)45 FXuint fxCPUCaps(FXuint level){
46 #if defined(WIN32) && (defined(_M_IX86) || defined(_M_X64)) && (_MSC_VER >= 1500)
47   FXint features[4];
48   level&=0x80000000;
49   __cpuid(features,level);
50   return features[0]+1;
51 #elif ((defined(__GNUC__) || defined(__INTEL_COMPILER)) && defined(__i686__))
52   FXuint eax,ebx,ecx,edx;
53   level&=0x80000000;
54   __asm__ __volatile__("xchgl %%ebx, %1 \n\t"  \
55                        "cpuid           \n\t"  \
56                        "xchgl %%ebx, %1 \n\t"  : "=a"(eax), "=r"(ebx), "=c"(ecx), "=d"(edx) : "0" (level) : "cc");
57   return eax+1;
58 #elif ((defined(__GNUC__) || defined(__INTEL_COMPILER)) && defined(__x86_64__))
59   FXuint eax,ebx,ecx,edx;
60   level&=0x80000000;
61   __asm__ __volatile__("cpuid \n\t" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) : "0" (level) : "cc");
62   return eax+1;
63 #endif
64   return 0;
65   }
66 
67 
68 // Get CPU info
fxCPUGetCaps(FXuint level,FXuint features[])69 FXbool fxCPUGetCaps(FXuint level,FXuint features[]){
70 #if defined(WIN32) && (defined(_M_IX86) || defined(_M_X64)) && (_MSC_VER >= 1500)
71   if(level<fxCPUCaps(level)){
72     __cpuid((int*)features,level);
73     return true;
74     }
75 #elif ((defined(__GNUC__) || defined(__INTEL_COMPILER)) && defined(__i686__))
76   if(level<fxCPUCaps(level)){
77   __asm__ __volatile__("xchgl %%ebx, %1 \n\t"  \
78                        "cpuid           \n\t"  \
79                        "xchgl %%ebx, %1 \n\t"  : "=a"(features[0]), "=r"(features[1]), "=c"(features[2]), "=d"(features[3]) : "0" (level) : "cc");
80     return true;
81     }
82 #elif ((defined(__GNUC__) || defined(__INTEL_COMPILER)) && defined(__x86_64__))
83   if(level<fxCPUCaps(level)){
84     __asm__ __volatile__("cpuid \n\t" : "=a"(features[0]), "=b"(features[1]), "=c"(features[2]), "=d"(features[3]) : "0" (level) : "cc");
85     return true;
86     }
87 #endif
88   return false;
89   }
90 
91 
92 // Get CPU info
fxCPUGetXCaps(FXuint level,FXuint count,FXuint features[])93 FXbool fxCPUGetXCaps(FXuint level,FXuint count,FXuint features[]){
94 #if defined(WIN32) && (defined(_M_IX86) || defined(_M_X64)) && (_MSC_VER >= 1500)
95   if(level<fxCPUCaps(level)){
96    __cpuidex((int*)features,level,count);
97     return true;
98     }
99 #elif ((defined(__GNUC__) || defined(__INTEL_COMPILER)) && defined(__i686__))
100   if(level<fxCPUCaps(level)){
101   __asm__ __volatile__("xchgl %%ebx, %1 \n\t"   \
102                        "cpuid           \n\t"   \
103                        "xchgl %%ebx, %1 \n\t" : "=a"(features[0]), "=r"(features[1]), "=c"(features[2]), "=d"(features[3]) : "0"(level), "2"(count) : "cc");
104     return true;
105     }
106 #elif ((defined(__GNUC__) || defined(__INTEL_COMPILER)) && defined(__x86_64__))
107   if(level<fxCPUCaps(level)){
108     __asm__ __volatile__("cpuid \n\t" : "=a"(features[0]), "=b"(features[1]), "=c"(features[2]), "=d"(features[3]) : "0"(level), "2"(count) : "cc");
109     return true;
110     }
111 #endif
112   return false;
113   }
114 
115 
116 // Return exciting features
fxCPUFeatures()117 FXuint fxCPUFeatures(){
118   FXuint features[4];
119   if(fxCPUGetCaps(1,features)){
120     FXuint blank=(CPU_HAS_AVX|CPU_HAS_AVX2|CPU_HAS_FMA|CPU_HAS_FMA4|CPU_HAS_XOP);
121     FXuint caps=0;
122     if(FXBIT(features[2],0)) caps|=CPU_HAS_SSE3;
123     if(FXBIT(features[3],8)) caps|=CPU_HAS_CX8;
124     if(FXBIT(features[2],9)) caps|=CPU_HAS_SSSE3;
125     if(FXBIT(features[2],12)) caps|=CPU_HAS_FMA;
126     if(FXBIT(features[2],13)) caps|=CPU_HAS_CX16;
127     if(FXBIT(features[2],19)) caps|=CPU_HAS_SSE41;
128     if(FXBIT(features[2],20)) caps|=CPU_HAS_SSE42;
129     if(FXBIT(features[2],23)) caps|=CPU_HAS_POPCNT;
130     if(FXBIT(features[2],25)) caps|=CPU_HAS_AES;
131     if(FXBIT(features[2],28)) caps|=CPU_HAS_AVX;
132     if(FXBIT(features[2],29)) caps|=CPU_HAS_F16;        // Half-floats
133     if(FXBIT(features[2],30)) caps|=CPU_HAS_RAND;
134     if(FXBIT(features[3],25)) caps|=CPU_HAS_SSE;
135     if(FXBIT(features[3],26)) caps|=CPU_HAS_SSE2;
136     if(FXBIT(features[2],27)){                          // OSXSAVE
137 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER)) && (defined(__i686__) || defined(__x86_64__)))
138       FXuint lo,hi;
139       __asm__ __volatile__(".byte 0x0f,0x01,0xd0" : "=a" (lo), "=d" (hi) : "c" (0));    // XGETBV ecx=0
140       if((lo&6)==6) blank=0;                            // Don't blank out AVX, AVX2, FMA, FMA4, XOP later
141 #endif
142 // _xgetbv(0); // For _MSC_VER
143       }
144     if(fxCPUGetXCaps(7,0,features)){
145       if(FXBIT(features[1],3)) caps|=CPU_HAS_BMI1;
146       if(FXBIT(features[1],5)) caps|=CPU_HAS_AVX2;
147       if(FXBIT(features[1],8)) caps|=CPU_HAS_BMI2;
148       }
149     if(fxCPUGetCaps(0,features) && (features[1]==0x68747541) && (features[2]==0x444d4163) && (features[3]==0x69746e65)){
150       if(fxCPUGetCaps(0x80000001,features)){
151         if(FXBIT(features[2],6)) caps|=CPU_HAS_SSE4A;
152         if(FXBIT(features[2],5)) caps|=CPU_HAS_ABM;
153         if(FXBIT(features[2],11)) caps|=CPU_HAS_XOP;
154         if(FXBIT(features[2],16)) caps|=CPU_HAS_FMA4;
155         if(FXBIT(features[2],21)) caps|=CPU_HAS_TBM;
156         }
157       }
158     caps&=~blank;
159     return caps;
160     }
161   return 0;
162   }
163 
164 
165 // Return CPU Identification.
fxCPUName(FXchar name[])166 FXbool fxCPUName(FXchar name[]){
167   FXuint features[4];
168   if(fxCPUGetCaps(0,features)){
169     name[0]=((char*)features)[4];
170     name[1]=((char*)features)[5];
171     name[2]=((char*)features)[6];
172     name[3]=((char*)features)[7];
173     name[4]=((char*)features)[12];
174     name[5]=((char*)features)[13];
175     name[6]=((char*)features)[14];
176     name[7]=((char*)features)[15];
177     name[8]=((char*)features)[8];
178     name[9]=((char*)features)[9];
179     name[10]=((char*)features)[10];
180     name[11]=((char*)features)[11];
181     name[12]='\0';
182     return true;
183     }
184   name[0]='\0';
185   return false;
186   }
187 
188 }
189