1 // [AsmJit]
2 // Complete JIT Assembler for C++ Language.
3 //
4 // [License]
5 // Zlib - See COPYING file in this package.
6 
7 #define ASMJIT_EXPORTS
8 
9 // [Dependencies - AsmJit]
10 #include "../x86/x86cpuinfo.h"
11 #include "../x86/x86defs.h"
12 
13 // 2009-02-05: Thanks to Mike Tajmajer for VC7.1 compiler support. It shouldn't
14 // affect x64 compilation, because x64 compiler starts with VS2005 (VC8.0).
15 #if defined(_MSC_VER)
16 # if _MSC_VER >= 1400
17 #  include <intrin.h>
18 # endif // _MSC_VER >= 1400 (>= VS2005)
19 #endif // _MSC_VER
20 
21 // [Api-Begin]
22 #include "../core/apibegin.h"
23 
24 namespace AsmJit {
25 
26 // ============================================================================
27 // [AsmJit::X86CpuVendor]
28 // ============================================================================
29 
30 struct X86CpuVendor
31 {
32   uint32_t id;
33   char text[12];
34 };
35 
36 static const X86CpuVendor x86CpuVendor[] =
37 {
38   { kCpuIntel    , { 'G', 'e', 'n', 'u', 'i', 'n', 'e', 'I', 'n', 't', 'e', 'l' } },
39 
40   { kCpuAmd      , { 'A', 'u', 't', 'h', 'e', 'n', 't', 'i', 'c', 'A', 'M', 'D' } },
41   { kCpuAmd      , { 'A', 'M', 'D', 'i', 's', 'b', 'e', 't', 't', 'e', 'r', '!' } },
42 
43   { kCpuNSM      , { 'G', 'e', 'o', 'd', 'e', ' ', 'b', 'y', ' ', 'N', 'S', 'C' } },
44   { kCpuNSM      , { 'C', 'y', 'r', 'i', 'x', 'I', 'n', 's', 't', 'e', 'a', 'd' } },
45 
46   { kCpuTransmeta, { 'G', 'e', 'n', 'u', 'i', 'n', 'e', 'T', 'M', 'x', '8', '6' } },
47   { kCpuTransmeta, { 'T', 'r', 'a', 'n', 's', 'm', 'e', 't', 'a', 'C', 'P', 'U' } },
48 
49   { kCpuVia      , { 'V', 'I', 'A',  0 , 'V', 'I', 'A',  0 , 'V', 'I', 'A',  0  } },
50   { kCpuVia      , { 'C', 'e', 'n', 't', 'a', 'u', 'r', 'H', 'a', 'u', 'l', 's' } }
51 };
52 
x86CpuVendorEq(const X86CpuVendor & info,const char * vendorString)53 static inline bool x86CpuVendorEq(const X86CpuVendor& info, const char* vendorString)
54 {
55   const uint32_t* a = reinterpret_cast<const uint32_t*>(info.text);
56   const uint32_t* b = reinterpret_cast<const uint32_t*>(vendorString);
57 
58   return (a[0] == b[0]) &
59          (a[1] == b[1]) &
60          (a[2] == b[2]) ;
61 }
62 
63 // ============================================================================
64 // [AsmJit::x86CpuSimplifyBrandString]
65 // ============================================================================
66 
x86CpuSimplifyBrandString(char * s)67 static inline void x86CpuSimplifyBrandString(char* s)
68 {
69   // Always clear the current character in the buffer. This ensures that there
70   // is no garbage after the string NULL terminator.
71   char* d = s;
72 
73   char prev = 0;
74   char curr = s[0];
75   s[0] = '\0';
76 
77   for (;;)
78   {
79     if (curr == 0) break;
80 
81     if (curr == ' ')
82     {
83       if (prev == '@') goto _Skip;
84       if (s[1] == ' ' || s[1] == '@') goto _Skip;
85     }
86 
87     d[0] = curr;
88     d++;
89     prev = curr;
90 
91 _Skip:
92     curr = *++s;
93     s[0] = '\0';
94   }
95 
96   d[0] = '\0';
97 }
98 
99 // ============================================================================
100 // [AsmJit::x86CpuId]
101 // ============================================================================
102 
103 // This is messy, I know. cpuid is implemented as intrinsic in VS2005, but
104 // we should support other compilers as well. Main problem is that MS compilers
105 // in 64-bit mode not allows to use inline assembler, so we need intrinsic and
106 // we need also asm version.
107 
108 // x86CpuId() and detectCpuInfo() for x86 and x64 platforms begins here.
109 #if defined(ASMJIT_X86) || defined(ASMJIT_X64)
x86CpuId(uint32_t in,X86CpuId * out)110 void x86CpuId(uint32_t in, X86CpuId* out)
111 {
112 #if defined(_MSC_VER)
113 
114 // 2009-02-05: Thanks to Mike Tajmajer for supporting VC7.1 compiler.
115 // ASMJIT_X64 is here only for readibility, only VS2005 can compile 64-bit code.
116 # if _MSC_VER >= 1400 || defined(ASMJIT_X64)
117   // Done by intrinsics.
118   __cpuid(reinterpret_cast<int*>(out->i), in);
119 # else // _MSC_VER < 1400
120   uint32_t cpuid_in = in;
121   uint32_t* cpuid_out = out->i;
122 
123   __asm
124   {
125     mov     eax, cpuid_in
126     mov     edi, cpuid_out
127     cpuid
128     mov     dword ptr[edi +  0], eax
129     mov     dword ptr[edi +  4], ebx
130     mov     dword ptr[edi +  8], ecx
131     mov     dword ptr[edi + 12], edx
132   }
133 # endif // _MSC_VER < 1400
134 
135 #elif defined(__GNUC__)
136 
137 // Note, need to preserve ebx/rbx register!
138 # if defined(ASMJIT_X86)
139 #  define __myCpuId(a, b, c, d, inp) \
140   asm ("mov %%ebx, %%edi\n"    \
141        "cpuid\n"               \
142        "xchg %%edi, %%ebx\n"   \
143        : "=a" (a), "=D" (b), "=c" (c), "=d" (d) : "a" (inp))
144 # else
145 #  define __myCpuId(a, b, c, d, inp) \
146   asm ("mov %%rbx, %%rdi\n"    \
147        "cpuid\n"               \
148        "xchg %%rdi, %%rbx\n"   \
149        : "=a" (a), "=D" (b), "=c" (c), "=d" (d) : "a" (inp))
150 # endif
151   __myCpuId(out->eax, out->ebx, out->ecx, out->edx, in);
152 
153 #endif // Compiler #ifdef.
154 }
155 
156 // ============================================================================
157 // [AsmJit::x86CpuDetect]
158 // ============================================================================
159 
x86CpuDetect(X86CpuInfo * out)160 void x86CpuDetect(X86CpuInfo* out)
161 {
162   uint32_t i;
163   X86CpuId regs;
164 
165   // Clear everything except the '_size' member.
166   memset(reinterpret_cast<uint8_t*>(out) + sizeof(uint32_t),
167     0, sizeof(CpuInfo) - sizeof(uint32_t));
168 
169   // Fill safe defaults.
170   memcpy(out->_vendorString, "Unknown", 8);
171   out->_numberOfProcessors = CpuInfo::detectNumberOfProcessors();
172 
173   // Get vendor string/id.
174   x86CpuId(0, &regs);
175 
176   memcpy(out->_vendorString, &regs.ebx, 4);
177   memcpy(out->_vendorString + 4, &regs.edx, 4);
178   memcpy(out->_vendorString + 8, &regs.ecx, 4);
179 
180   for (i = 0; i < 3; i++)
181   {
182     if (x86CpuVendorEq(x86CpuVendor[i], out->_vendorString))
183     {
184       out->_vendorId = x86CpuVendor[i].id;
185       break;
186     }
187   }
188 
189   // Get feature flags in ecx/edx, and family/model in eax.
190   x86CpuId(1, &regs);
191 
192   // Fill family and model fields.
193   out->_family   = (regs.eax >> 8) & 0x0F;
194   out->_model    = (regs.eax >> 4) & 0x0F;
195   out->_stepping = (regs.eax     ) & 0x0F;
196 
197   // Use extended family and model fields.
198   if (out->_family == 0x0F)
199   {
200     out->_family += ((regs.eax >> 20) & 0xFF);
201     out->_model  += ((regs.eax >> 16) & 0x0F) << 4;
202   }
203 
204   out->_processorType        = ((regs.eax >> 12) & 0x03);
205   out->_brandIndex           = ((regs.ebx      ) & 0xFF);
206   out->_flushCacheLineSize   = ((regs.ebx >>  8) & 0xFF) * 8;
207   out->_maxLogicalProcessors = ((regs.ebx >> 16) & 0xFF);
208   out->_apicPhysicalId       = ((regs.ebx >> 24) & 0xFF);
209 
210   if (regs.ecx & 0x00000001U) out->_features |= kX86FeatureSse3;
211   if (regs.ecx & 0x00000002U) out->_features |= kX86FeaturePclMulDQ;
212   if (regs.ecx & 0x00000008U) out->_features |= kX86FeatureMonitorMWait;
213   if (regs.ecx & 0x00000200U) out->_features |= kX86FeatureSsse3;
214   if (regs.ecx & 0x00002000U) out->_features |= kX86FeatureCmpXchg16B;
215   if (regs.ecx & 0x00080000U) out->_features |= kX86FeatureSse41;
216   if (regs.ecx & 0x00100000U) out->_features |= kX86FeatureSse42;
217   if (regs.ecx & 0x00400000U) out->_features |= kX86FeatureMovBE;
218   if (regs.ecx & 0x00800000U) out->_features |= kX86FeaturePopCnt;
219   if (regs.ecx & 0x10000000U) out->_features |= kX86FeatureAvx;
220 
221   if (regs.edx & 0x00000010U) out->_features |= kX86FeatureRdtsc;
222   if (regs.edx & 0x00000100U) out->_features |= kX86FeatureCmpXchg8B;
223   if (regs.edx & 0x00008000U) out->_features |= kX86FeatureCMov;
224   if (regs.edx & 0x00800000U) out->_features |= kX86FeatureMmx;
225   if (regs.edx & 0x01000000U) out->_features |= kX86FeatureFXSR;
226   if (regs.edx & 0x02000000U) out->_features |= kX86FeatureSse | kX86FeatureMmxExt;
227   if (regs.edx & 0x04000000U) out->_features |= kX86FeatureSse | kX86FeatureSse2;
228   if (regs.edx & 0x10000000U) out->_features |= kX86FeatureMultiThreading;
229 
230   if (out->_vendorId == kCpuAmd && (regs.edx & 0x10000000U))
231   {
232     // AMD sets Multithreading to ON if it has more cores.
233     if (out->_numberOfProcessors == 1) out->_numberOfProcessors = 2;
234   }
235 
236   // This comment comes from V8 and I think that its important:
237   //
238   // Opteron Rev E has i bug in which on very rare occasions i locked
239   // instruction doesn't act as i read-acquire barrier if followed by i
240   // non-locked read-modify-write instruction.  Rev F has this bug in
241   // pre-release versions, but not in versions released to customers,
242   // so we test only for Rev E, which is family 15, model 32..63 inclusive.
243 
244   if (out->_vendorId == kCpuAmd && out->_family == 15 && out->_model >= 32 && out->_model <= 63)
245   {
246     out->_bugs |= kX86BugAmdLockMB;
247   }
248 
249   // Calling cpuid with 0x80000000 as the in argument
250   // gets the number of valid extended IDs.
251 
252   x86CpuId(0x80000000, &regs);
253 
254   uint32_t exIds = regs.eax;
255   if (exIds > 0x80000004) exIds = 0x80000004;
256 
257   uint32_t* brand = reinterpret_cast<uint32_t*>(out->_brandString);
258 
259   for (i = 0x80000001; i <= exIds; i++)
260   {
261     x86CpuId(i, &regs);
262 
263     switch (i)
264     {
265       case 0x80000001:
266         if (regs.ecx & 0x00000001U) out->_features |= kX86FeatureLahfSahf;
267         if (regs.ecx & 0x00000020U) out->_features |= kX86FeatureLzCnt;
268         if (regs.ecx & 0x00000040U) out->_features |= kX86FeatureSse4A;
269         if (regs.ecx & 0x00000080U) out->_features |= kX86FeatureMSse;
270         if (regs.ecx & 0x00000100U) out->_features |= kX86FeaturePrefetch;
271 
272         if (regs.edx & 0x00100000U) out->_features |= kX86FeatureExecuteDisableBit;
273         if (regs.edx & 0x00200000U) out->_features |= kX86FeatureFFXSR;
274         if (regs.edx & 0x00400000U) out->_features |= kX86FeatureMmxExt;
275         if (regs.edx & 0x08000000U) out->_features |= kX86FeatureRdtscP;
276         if (regs.edx & 0x20000000U) out->_features |= kX86Feature64Bit;
277         if (regs.edx & 0x40000000U) out->_features |= kX86Feature3dNowExt | kX86FeatureMmxExt;
278         if (regs.edx & 0x80000000U) out->_features |= kX86Feature3dNow;
279         break;
280 
281       case 0x80000002:
282       case 0x80000003:
283       case 0x80000004:
284         *brand++ = regs.eax;
285         *brand++ = regs.ebx;
286         *brand++ = regs.ecx;
287         *brand++ = regs.edx;
288         break;
289 
290       default:
291         // Additional features can be detected in the future.
292         break;
293     }
294   }
295 
296   // Simplify the brand string (remove unnecessary spaces to make it printable).
297   x86CpuSimplifyBrandString(out->_brandString);
298 }
299 #endif
300 
301 } // AsmJit
302 
303 // [Api-End]
304 #include "../core/apiend.h"
305