1 /********************************************************************************
2 * *
3 * C P U I D S u p p o r t *
4 * *
5 *********************************************************************************
6 * Copyright (C) 1998,2021 by Jeroen van der Zijp. All Rights Reserved. *
7 *********************************************************************************
8 * This library is free software; you can redistribute it and/or modify *
9 * it under the terms of the GNU Lesser General Public License as published by *
10 * the Free Software Foundation; either version 3 of the License, or *
11 * (at your option) any later version. *
12 * *
13 * This library is distributed in the hope that it will be useful, *
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
16 * GNU Lesser General Public License for more details. *
17 * *
18 * You should have received a copy of the GNU Lesser General Public License *
19 * along with this program. If not, see <http://www.gnu.org/licenses/> *
20 ********************************************************************************/
21 #include "xincs.h"
22 #include "fxver.h"
23 #include "fxdefs.h"
24 #include "fxmath.h"
25 #include "fxcpuid.h"
26
27
28 /*
29 Notes:
30 - Obtain processor capabilities at runtime.
31 - Utility API's to discover CPU vendor and CPU instruction-set extensions.
32 - Only supported on x86 and x86-64 cpus.
33 - Consult AMD and Intel Programming Manuals for details.
34 */
35
36
37 using namespace FX;
38
39 /*******************************************************************************/
40
41 namespace FX {
42
43
44 // Return number of levels of CPUID feature-requests supported
fxCPUCaps(FXuint level)45 FXuint fxCPUCaps(FXuint level){
46 #if defined(WIN32) && (defined(_M_IX86) || defined(_M_X64)) && (_MSC_VER >= 1500)
47 FXint features[4];
48 level&=0x80000000;
49 __cpuid(features,level);
50 return features[0]+1;
51 #elif ((defined(__GNUC__) || defined(__INTEL_COMPILER)) && defined(__i686__))
52 FXuint eax,ebx,ecx,edx;
53 level&=0x80000000;
54 __asm__ __volatile__("xchgl %%ebx, %1 \n\t" \
55 "cpuid \n\t" \
56 "xchgl %%ebx, %1 \n\t" : "=a"(eax), "=r"(ebx), "=c"(ecx), "=d"(edx) : "0" (level) : "cc");
57 return eax+1;
58 #elif ((defined(__GNUC__) || defined(__INTEL_COMPILER)) && defined(__x86_64__))
59 FXuint eax,ebx,ecx,edx;
60 level&=0x80000000;
61 __asm__ __volatile__("cpuid \n\t" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) : "0" (level) : "cc");
62 return eax+1;
63 #endif
64 return 0;
65 }
66
67
68 // Get CPU info
fxCPUGetCaps(FXuint level,FXuint features[])69 FXbool fxCPUGetCaps(FXuint level,FXuint features[]){
70 #if defined(WIN32) && (defined(_M_IX86) || defined(_M_X64)) && (_MSC_VER >= 1500)
71 if(level<fxCPUCaps(level)){
72 __cpuid((int*)features,level);
73 return true;
74 }
75 #elif ((defined(__GNUC__) || defined(__INTEL_COMPILER)) && defined(__i686__))
76 if(level<fxCPUCaps(level)){
77 __asm__ __volatile__("xchgl %%ebx, %1 \n\t" \
78 "cpuid \n\t" \
79 "xchgl %%ebx, %1 \n\t" : "=a"(features[0]), "=r"(features[1]), "=c"(features[2]), "=d"(features[3]) : "0" (level) : "cc");
80 return true;
81 }
82 #elif ((defined(__GNUC__) || defined(__INTEL_COMPILER)) && defined(__x86_64__))
83 if(level<fxCPUCaps(level)){
84 __asm__ __volatile__("cpuid \n\t" : "=a"(features[0]), "=b"(features[1]), "=c"(features[2]), "=d"(features[3]) : "0" (level) : "cc");
85 return true;
86 }
87 #endif
88 return false;
89 }
90
91
92 // Get CPU info
fxCPUGetXCaps(FXuint level,FXuint count,FXuint features[])93 FXbool fxCPUGetXCaps(FXuint level,FXuint count,FXuint features[]){
94 #if defined(WIN32) && (defined(_M_IX86) || defined(_M_X64)) && (_MSC_VER >= 1500)
95 if(level<fxCPUCaps(level)){
96 __cpuidex((int*)features,level,count);
97 return true;
98 }
99 #elif ((defined(__GNUC__) || defined(__INTEL_COMPILER)) && defined(__i686__))
100 if(level<fxCPUCaps(level)){
101 __asm__ __volatile__("xchgl %%ebx, %1 \n\t" \
102 "cpuid \n\t" \
103 "xchgl %%ebx, %1 \n\t" : "=a"(features[0]), "=r"(features[1]), "=c"(features[2]), "=d"(features[3]) : "0"(level), "2"(count) : "cc");
104 return true;
105 }
106 #elif ((defined(__GNUC__) || defined(__INTEL_COMPILER)) && defined(__x86_64__))
107 if(level<fxCPUCaps(level)){
108 __asm__ __volatile__("cpuid \n\t" : "=a"(features[0]), "=b"(features[1]), "=c"(features[2]), "=d"(features[3]) : "0"(level), "2"(count) : "cc");
109 return true;
110 }
111 #endif
112 return false;
113 }
114
115
116 // Return exciting features
fxCPUFeatures()117 FXuint fxCPUFeatures(){
118 FXuint features[4];
119 if(fxCPUGetCaps(1,features)){
120 FXuint blank=(CPU_HAS_AVX|CPU_HAS_AVX2|CPU_HAS_FMA|CPU_HAS_FMA4|CPU_HAS_XOP);
121 FXuint caps=0;
122 if(FXBIT(features[2],0)) caps|=CPU_HAS_SSE3;
123 if(FXBIT(features[3],8)) caps|=CPU_HAS_CX8;
124 if(FXBIT(features[2],9)) caps|=CPU_HAS_SSSE3;
125 if(FXBIT(features[2],12)) caps|=CPU_HAS_FMA;
126 if(FXBIT(features[2],13)) caps|=CPU_HAS_CX16;
127 if(FXBIT(features[2],19)) caps|=CPU_HAS_SSE41;
128 if(FXBIT(features[2],20)) caps|=CPU_HAS_SSE42;
129 if(FXBIT(features[2],23)) caps|=CPU_HAS_POPCNT;
130 if(FXBIT(features[2],25)) caps|=CPU_HAS_AES;
131 if(FXBIT(features[2],28)) caps|=CPU_HAS_AVX;
132 if(FXBIT(features[2],29)) caps|=CPU_HAS_F16; // Half-floats
133 if(FXBIT(features[2],30)) caps|=CPU_HAS_RAND;
134 if(FXBIT(features[3],25)) caps|=CPU_HAS_SSE;
135 if(FXBIT(features[3],26)) caps|=CPU_HAS_SSE2;
136 if(FXBIT(features[2],27)){ // OSXSAVE
137 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER)) && (defined(__i686__) || defined(__x86_64__)))
138 FXuint lo,hi;
139 __asm__ __volatile__(".byte 0x0f,0x01,0xd0" : "=a" (lo), "=d" (hi) : "c" (0)); // XGETBV ecx=0
140 if((lo&6)==6) blank=0; // Don't blank out AVX, AVX2, FMA, FMA4, XOP later
141 #endif
142 // _xgetbv(0); // For _MSC_VER
143 }
144 if(fxCPUGetXCaps(7,0,features)){
145 if(FXBIT(features[1],3)) caps|=CPU_HAS_BMI1;
146 if(FXBIT(features[1],5)) caps|=CPU_HAS_AVX2;
147 if(FXBIT(features[1],8)) caps|=CPU_HAS_BMI2;
148 }
149 if(fxCPUGetCaps(0,features) && (features[1]==0x68747541) && (features[2]==0x444d4163) && (features[3]==0x69746e65)){
150 if(fxCPUGetCaps(0x80000001,features)){
151 if(FXBIT(features[2],6)) caps|=CPU_HAS_SSE4A;
152 if(FXBIT(features[2],5)) caps|=CPU_HAS_ABM;
153 if(FXBIT(features[2],11)) caps|=CPU_HAS_XOP;
154 if(FXBIT(features[2],16)) caps|=CPU_HAS_FMA4;
155 if(FXBIT(features[2],21)) caps|=CPU_HAS_TBM;
156 }
157 }
158 caps&=~blank;
159 return caps;
160 }
161 return 0;
162 }
163
164
165 // Return CPU Identification.
fxCPUName(FXchar name[])166 FXbool fxCPUName(FXchar name[]){
167 FXuint features[4];
168 if(fxCPUGetCaps(0,features)){
169 name[0]=((char*)features)[4];
170 name[1]=((char*)features)[5];
171 name[2]=((char*)features)[6];
172 name[3]=((char*)features)[7];
173 name[4]=((char*)features)[12];
174 name[5]=((char*)features)[13];
175 name[6]=((char*)features)[14];
176 name[7]=((char*)features)[15];
177 name[8]=((char*)features)[8];
178 name[9]=((char*)features)[9];
179 name[10]=((char*)features)[10];
180 name[11]=((char*)features)[11];
181 name[12]='\0';
182 return true;
183 }
184 name[0]='\0';
185 return false;
186 }
187
188 }
189