1 /*
2 * Copyright (c) 2007-2019, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 */
17
18
19 #include <stdio.h>
20 #include <stdint.h>
21 #include <stdlib.h>
22
23 #if defined(TARGET_WIN_X8664)
24 # if defined(OBJ_WIN_X8664_IS_X86ID)
25 # error object macro OBJ_WIN_X8664_IS_X86ID cannot already be defined
26 # else
27 # define OBJ_WIN_X8664_IS_X86ID
28 # endif
29 #endif
30
31 #include "pgcpuid.h"
32 #include "x86id.h"
33
34 /*
35 * Define some interesting fields in the extended control register[0].
36 * xcr[0] only defines the lower 32-bits of the 64-bit register.
37 */
38
39 #define xcr0_bit_XMM 0x00000002U
40 #define xcr0_bit_YMM 0x00000004U
41 #define xcr0_bit_ZMMK 0x00000020U
42 #define xcr0_bit_ZMMLO 0x00000040U
43 #define xcr0_bit_ZMMHI 0x00000080U
44
45 #define xcr0_mask_YMM (xcr0_bit_XMM | xcr0_bit_YMM)
46 #define xcr0_mask_ZMM (xcr0_bit_ZMMK | xcr0_bit_ZMMLO | xcr0_bit_ZMMHI)
47
48
49 #define signature_AMD_ebx 0x68747541
50 #define signature_AMD_ecx 0x444d4163
51 #define signature_AMD_edx 0x69746e65
52
53 #define signature_INTEL_ebx 0x756e6547
54 #define signature_INTEL_ecx 0x6c65746e
55 #define signature_INTEL_edx 0x49656e69
56
57 //#define DEBUG
58 #if defined(DEBUG)
59 #include <string.h>
60 /* use DEBUG_PRINTF(format %s with any arguments %d but no endline",
61 * string, integer );
62 */
63 #define DEBUG_PRINTF(...) \
64 do { fputs(__func__, stdout); \
65 fputs(strlen(__func__) > 7 ? ":\t" : ":\t\t", stdout); \
66 printf(__VA_ARGS__); \
67 fputs("\n", stdout); } while (0)
68 #else
69 #define DEBUG_PRINTF(...)
70 #endif
71
72 /*
73 * prototypes for the test functions here
74 */
75 static int ia_cachesize(void);
76 static int ia_unifiedcache(void);
77 static int amd_cachesize(void);
78 static int ia_cores(void);
79 static int amd_cores(void);
80 static int is_xcr_set(uint32_t, uint64_t);
81 static int is_amd_family(uint32_t, uint32_t *);
82
83 /*
84 * Various routines in the runtime libraries are needing to detect what processor type/model/feature
85 * they are running on. Instead of using relatively heavy weight routines to return that information,
86 * provide a mechanism to cache the data.
87 *
88 * The "X86IDFN(is_<TYPE/MODEL/FEATURE>)" is the routine that is called, cache that info in global
89 * variable X86IDFN(is_<TYPE/MODEL/FEATURE>_cached).
90 *
91 * Use macro IS_X86ID(<
92 */
93
94 uint32_t X86IDFN(hw_features) = 0;
95 int X86IDFN(is_intel_cached) = X86ID_IS_CACHED_UNDEF;
96 int X86IDFN(is_amd_cached) = X86ID_IS_CACHED_UNDEF;
97 int X86IDFN(is_ip6_cached) = X86ID_IS_CACHED_UNDEF;
98 int X86IDFN(is_sse_cached) = X86ID_IS_CACHED_UNDEF;
99 int X86IDFN(is_sse2_cached) = X86ID_IS_CACHED_UNDEF;
100 int X86IDFN(is_sse3_cached) = X86ID_IS_CACHED_UNDEF;
101 int X86IDFN(is_ssse3_cached) = X86ID_IS_CACHED_UNDEF;
102 int X86IDFN(is_sse4a_cached) = X86ID_IS_CACHED_UNDEF;
103 int X86IDFN(is_sse41_cached) = X86ID_IS_CACHED_UNDEF;
104 int X86IDFN(is_sse42_cached) = X86ID_IS_CACHED_UNDEF;
105 int X86IDFN(is_aes_cached) = X86ID_IS_CACHED_UNDEF;
106 int X86IDFN(is_avx_cached) = X86ID_IS_CACHED_UNDEF;
107 int X86IDFN(is_avx2_cached) = X86ID_IS_CACHED_UNDEF;
108 int X86IDFN(is_avx512_cached) = X86ID_IS_CACHED_UNDEF;
109 int X86IDFN(is_avx512f_cached) = X86ID_IS_CACHED_UNDEF;
110 int X86IDFN(is_avx512vl_cached) = X86ID_IS_CACHED_UNDEF;
111 int X86IDFN(is_fma_cached) = X86ID_IS_CACHED_UNDEF;
112 int X86IDFN(is_fma4_cached) = X86ID_IS_CACHED_UNDEF;
113 int X86IDFN(is_ht_cached) = X86ID_IS_CACHED_UNDEF;
114 int X86IDFN(is_athlon_cached) = X86ID_IS_CACHED_UNDEF;
115 int X86IDFN(is_hammer_cached) = X86ID_IS_CACHED_UNDEF;
116 int X86IDFN(is_gh_cached) = X86ID_IS_CACHED_UNDEF;
117 int X86IDFN(is_gh_a_cached) = X86ID_IS_CACHED_UNDEF;
118 int X86IDFN(is_gh_b_cached) = X86ID_IS_CACHED_UNDEF;
119 int X86IDFN(is_shanghai_cached) = X86ID_IS_CACHED_UNDEF;
120 int X86IDFN(is_istanbul_cached) = X86ID_IS_CACHED_UNDEF;
121 int X86IDFN(is_bulldozer_cached) = X86ID_IS_CACHED_UNDEF;
122 int X86IDFN(is_piledriver_cached) = X86ID_IS_CACHED_UNDEF;
123 int X86IDFN(is_k7_cached) = X86ID_IS_CACHED_UNDEF;
124 int X86IDFN(is_ia32e_cached) = X86ID_IS_CACHED_UNDEF;
125 int X86IDFN(is_p4_cached) = X86ID_IS_CACHED_UNDEF;
126 int X86IDFN(is_knl_cached) = X86ID_IS_CACHED_UNDEF;
127 int X86IDFN(is_x86_64_cached) = X86ID_IS_CACHED_UNDEF;
128 int X86IDFN(is_f16c_cached) = X86ID_IS_CACHED_UNDEF;
129
130 /*
131 * Return whether extended control register has requested bits set.
132 * Assumes that the processor has the xgetbv instruction.
133 * Return: 0 == register does not have bit(s) set or __pgi_getbv() failed.
134 * 1 == bits set.
135 */
136
137 static
is_xcr_set(uint32_t xcr_indx,uint64_t xcr_mask)138 int is_xcr_set(uint32_t xcr_indx, uint64_t xcr_mask)
139 {
140 uint64_t xcr;
141
142 if( __pgi_getbv( xcr_indx, &xcr ) == 0 ) {
143 DEBUG_PRINTF("_pgi_getbv() failed xcr_indx=%#8.8x, "
144 "xcr_mask=%#16.16lx", xcr_indx, xcr_mask);
145 return 0;
146 }
147
148 DEBUG_PRINTF("xcr[%u]=%#16.16x, xcr_mask=%#16.16lx",
149 xcr_indx, xcr, xcr_mask);
150 return (xcr & xcr_mask) == xcr_mask;
151 }
152
153 /*
154 * cache values returned from __pgi_cpuid.
155 * cpuid instructions on Windows are costly
156 */
157 int
X86IDFN(idcache)158 X86IDFN(idcache)(uint32_t f, uint32_t *r)
159 {
160 int j, rv = 1;
161 static struct{
162 int set;
163 uint32_t f;
164 uint32_t i[4];
165 }saved[] = {
166 { 0, 0U, { 0, 0, 0, 0}}, //
167 { 0, 1U, { 0, 0, 0, 0}}, //
168 { 0, 2U, { 0, 0, 0, 0}}, //
169 { 0, 0x80000000U, { 0, 0, 0, 0}}, //
170 { 0, 0x80000001U, { 0, 0, 0, 0}}, //
171 { 0, 0x80000002U, { 0, 0, 0, 0}}, //
172 { 0, 0x80000003U, { 0, 0, 0, 0}}, //
173 { 0, 0x80000004U, { 0, 0, 0, 0}}, //
174 { 0, 0x80000006U, { 0, 0, 0, 0}}, //
175 { 0, 0x80000008U, { 0, 0, 0, 0}}, //
176 {-1, 0U, { 0, 0, 0, 0}} };
177 for (j = 0; saved[j].set >= 0; ++j) {
178 if (saved[j].f == f) {
179 if (!saved[j].set) {
180 /* call cpuid once, save its value */
181 rv = __pgi_cpuid(f, saved[j].i);
182 saved[j].set = 1;
183 }
184 /* return the saved value */
185 r[0] = saved[j].i[0];
186 r[1] = saved[j].i[1];
187 r[2] = saved[j].i[2];
188 r[3] = saved[j].i[3];
189 break;
190 } else if (saved[j].set == -1) {
191 /* we're not caching this value */
192 rv = __pgi_cpuid(f, r);
193 break;
194 }
195 }
196 return rv;
197 }
198
199 /*
200 * is_amd_family(uint32_t family, uint32_t * model)
201 * Return true if processor is AMD and of specific family.
202 * Always return model.
203 */
204
205 static
is_amd_family(uint32_t family,uint32_t * model)206 int is_amd_family(uint32_t family, uint32_t *model)
207 {
208 ACPU1 c1;
209
210 if ((X86IDFN(is_amd)() == 0) || (X86IDFN(idcache)( 1, c1.i ) == 0)) {
211 return 0;
212 }
213
214 DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
215 c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
216 *model = c1.u.eax.model;
217 return ( c1.u.eax.family == family);
218 }
219
220 /*
221 * Check that this is a Genuine Intel processor
222 */
223 int
X86IDFN(is_intel)224 X86IDFN(is_intel)(void)
225 // is_intel: eax 0x00000014 ebx 0x756e6547 ecx 0x6c65746e edx 0x49656e69
226 // is_intel: eax 0x00000014 ebx 0x756e6547 ecx 0x49656e69 edx 0x6c65746e
227 {
228 unsigned int h;
229 CPU0 c0;
230 X86IDFN(idcache)( 0, c0.i );
231 DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
232 c0.i[0], c0.i[1], c0.i[2], c0.i[3] );
233 X86IDFN(is_intel_cached) =
234 ((signature_INTEL_ebx ^ c0.i[1]) |
235 (signature_INTEL_ecx ^ c0.i[2]) |
236 (signature_INTEL_edx ^ c0.i[3])) == 0;
237 return X86IDFN(is_intel_cached);
238 }/* is_intel */
239
240 /*
241 * Check that this is an Authentic AMD processor
242 */
243 int
X86IDFN(is_amd)244 X86IDFN(is_amd)(void)
245 {
246 CPU0 c0;
247 unsigned int h;
248 X86IDFN(idcache)( 0, c0.i );
249 DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
250 c0.i[0], c0.i[1], c0.i[2], c0.i[3] );
251 X86IDFN(is_amd_cached) =
252 ((signature_AMD_ebx ^ c0.i[1]) |
253 (signature_AMD_ecx ^ c0.i[2]) |
254 (signature_AMD_edx ^ c0.i[3])) == 0;
255 return X86IDFN(is_amd_cached);
256 }/* is_amd */
257
258 /*
259 * test(p6)
260 * either manufacturer
261 * cpuid(1) returns fpu and cmov flag, then must be at least p6
262 */
263 int
X86IDFN(is_ip6)264 X86IDFN(is_ip6)(void)
265 {
266 ICPU1 c1;
267
268 if( X86IDFN(idcache)( 1, c1.i ) == 0 )
269 return X86IDFN(is_ip6_cached) = 0;
270 DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
271 c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
272
273 return X86IDFN(is_ip6_cached) = ( c1.u.edx.fpu && c1.u.edx.cmov );
274 }/* is_ip6 */
275
276 /*
277 * test(sse)
278 * call with either AMD or Intel
279 * test sse bit, same bit for either manufacturer
280 */
281 int
X86IDFN(is_sse)282 X86IDFN(is_sse)(void)
283 {
284 ICPU1 c1;
285 if( !X86IDFN(is_intel)() && !X86IDFN(is_amd)() )
286 return X86IDFN(is_sse_cached) = 0;
287 if( X86IDFN(idcache)( 1, c1.i ) == 0 )
288 return X86IDFN(is_sse_cached) = 0;
289 DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
290 c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
291 return X86IDFN(is_sse_cached) = ( c1.u.edx.sse != 0);
292 }/* is_sse */
293
294 /*
295 * test(sse2)
296 * call with either AMD or Intel
297 * test sse2 bit, same bit for either manufacturer
298 */
299 int
X86IDFN(is_sse2)300 X86IDFN(is_sse2)(void)
301 {
302 ICPU1 c1;
303 if( !X86IDFN(is_intel)() && !X86IDFN(is_amd)() )
304 return X86IDFN(is_sse2_cached) = 0;
305 if( X86IDFN(idcache)( 1, c1.i ) == 0 )
306 return X86IDFN(is_sse2_cached) = 0;
307 DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
308 c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
309 return X86IDFN(is_sse2_cached) = ( c1.u.edx.sse2 != 0);
310 }/* is_sse2 */
311
312 /*
313 * test(sse3)
314 * call with either AMD or Intel
315 */
316 int
X86IDFN(is_sse3)317 X86IDFN(is_sse3)(void)
318 {
319 ICPU1 c1;
320 if( !X86IDFN(is_intel)() && !X86IDFN(is_amd)() )
321 return X86IDFN(is_sse3_cached) = 0;
322 if( X86IDFN(idcache)( 1, c1.i ) == 0 )
323 return X86IDFN(is_sse3_cached) = 0;
324 DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
325 c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
326 return X86IDFN(is_sse3_cached) = ( c1.u.ecx.sse3 != 0);
327 }/* is_sse3 */
328
329 /*
330 * test(ssse3)
331 * call with either AMD or Intel
332 * test ssse3 bit, same bit for either manufacturer
333 */
334 int
X86IDFN(is_ssse3)335 X86IDFN(is_ssse3)(void)
336 {
337 ICPU1 c1;
338 if( !X86IDFN(is_intel)() && !X86IDFN(is_amd)() )
339 return X86IDFN(is_ssse3_cached) = 0;
340 if( X86IDFN(idcache)( 1, c1.i ) == 0 )
341 return X86IDFN(is_ssse3_cached) = 0;
342 DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
343 c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
344 return X86IDFN(is_ssse3_cached) = ( c1.u.ecx.ssse3 != 0);
345 }/* is_ssse3 */
346
347 /*
348 * test(sse4a)
349 * right now, it's just the greyhound check
350 */
351 int
X86IDFN(is_sse4a)352 X86IDFN(is_sse4a)(void)
353 {
354 CPU80 c80;
355 ACPU81 c81;
356 if( !X86IDFN(is_amd)() )
357 return X86IDFN(is_sse4a_cached) = 0;
358 if( X86IDFN(idcache)( 0x80000000, c80.i ) == 0 )
359 return X86IDFN(is_sse4a_cached) = 0;
360 if( c80.b.largest < 0x80000001 )
361 return X86IDFN(is_sse4a_cached) = 0;
362 if( X86IDFN(idcache)( 0x80000001, c81.i ) == 0 )
363 return X86IDFN(is_sse4a_cached) = 0;
364 return X86IDFN(is_sse4a_cached) = ( c81.u.ecx.sse4a != 0);
365 }/* is_sse4a */
366
367 /*
368 * test(sse41)
369 * right now, it's just the penryn check
370 */
371 int
X86IDFN(is_sse41)372 X86IDFN(is_sse41)(void)
373 {
374 ICPU1 c1;
375 if( !X86IDFN(is_intel)() )
376 return X86IDFN(is_sse41_cached) = 0;
377 if( X86IDFN(idcache)( 1, c1.i ) == 0 )
378 return X86IDFN(is_sse41_cached) = 0;
379 DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
380 c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
381 return X86IDFN(is_sse41_cached) = ( c1.u.ecx.sse41 != 0);
382 }/* is_sse41 */
383
384 /*
385 * test(sse42)
386 */
387 int
X86IDFN(is_sse42)388 X86IDFN(is_sse42)(void)
389 {
390 ICPU1 c1;
391 if( !X86IDFN(is_intel)() && !X86IDFN(is_amd)() )
392 return X86IDFN(is_sse42_cached) = 0;
393 if( X86IDFN(idcache)( 1, c1.i ) == 0 )
394 return X86IDFN(is_sse42_cached) = 0;
395 DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
396 c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
397 return X86IDFN(is_sse42_cached) = ( c1.u.ecx.sse42 != 0);
398 }/* is_sse42 */
399
400 /*
401 * test(aes)
402 */
403 int
X86IDFN(is_aes)404 X86IDFN(is_aes)(void)
405 {
406 ICPU1 c1;
407 if( !X86IDFN(is_intel)() && !X86IDFN(is_amd)() )
408 return X86IDFN(is_aes_cached) = 0;
409 if( X86IDFN(idcache)( 1, c1.i ) == 0 )
410 return X86IDFN(is_aes_cached) = 0;
411 DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
412 c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
413 return X86IDFN(is_aes_cached) = ( c1.u.ecx.aes != 0);
414 }/* is_aes */
415
416 /*
417 * test(avx)
418 */
419 int
X86IDFN(is_avx)420 X86IDFN(is_avx)(void)
421 {
422 ICPU1 c1;
423
424 if( !X86IDFN(is_intel)() && !X86IDFN(is_amd)() )
425 return X86IDFN(is_avx_cached) = 0;
426 if( X86IDFN(idcache)( 1, c1.i ) == 0 )
427 return X86IDFN(is_avx_cached) = 0;
428 DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
429 c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
430 if( !c1.u.ecx.avx )
431 return X86IDFN(is_avx_cached) = 0;
432 /* see whether the OS will save the ymm state */
433 if( !c1.u.ecx.osxsave )
434 return X86IDFN(is_avx_cached) = 0;
435
436 return X86IDFN(is_avx_cached) = is_xcr_set(0, xcr0_mask_YMM);
437 }/* is_avx */
438
439
440 /*
441 * test(avx2)
442 */
443 int
X86IDFN(is_avx2)444 X86IDFN(is_avx2)(void)
445 {
446 ICPU7 c7;
447
448 if ( !X86IDFN(is_intel)() && !X86IDFN(is_amd)() )
449 return X86IDFN(is_avx2_cached) = 0;
450
451 if ( !X86IDFN(is_avx)() )
452 return X86IDFN(is_avx2_cached) = 0;
453
454 if ( __pgi_cpuid_ecx( 7, c7.i, 0 ) == 0 )
455 return X86IDFN(is_avx2_cached) = 0;
456
457 DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
458 c7.i[0], c7.i[1], c7.i[2], c7.i[3] );
459
460 return X86IDFN(is_avx2_cached) = (c7.u.ebx.avx2 != 0);
461 }/* is_avx2 */
462
463 /*
464 * test(avx512)
465 * Determine whether processor and O/S support AVX512.
466 */
467 int
X86IDFN(is_avx512)468 X86IDFN(is_avx512)(void)
469 {
470 if( !X86IDFN(is_intel)() )
471 return X86IDFN(is_avx512_cached) = 0;
472
473 if ( !X86IDFN(is_avx)() )
474 return X86IDFN(is_avx512_cached) = 0;
475
476 return X86IDFN(is_avx512_cached) = is_xcr_set(0, xcr0_mask_ZMM);
477 }
478
479 /*
480 * test(avx512f)
481 */
482 int
X86IDFN(is_avx512f)483 X86IDFN(is_avx512f)(void)
484 {
485 ICPU7 c7;
486
487 if ( !X86IDFN(is_avx512)() )
488 return X86IDFN(is_avx512f_cached) = 0;
489 if( __pgi_cpuid_ecx( 7, c7.i, 0 ) == 0 )
490 return X86IDFN(is_avx512f_cached) = 0;
491 DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
492 c7.i[0], c7.i[1], c7.i[2], c7.i[3] );
493 return X86IDFN(is_avx512f_cached) = ( c7.u.ebx.avx512f != 0);
494 }/* is_avx512f */
495
496 /*
497 * test(avx512vl)
498 */
499 int
X86IDFN(is_avx512vl)500 X86IDFN(is_avx512vl)(void)
501 {
502 ICPU7 c7;
503
504 if( !X86IDFN(is_avx512f)() )
505 return X86IDFN(is_avx512vl_cached) = 0;
506 if( __pgi_cpuid_ecx( 7, c7.i, 0 ) == 0 )
507 return X86IDFN(is_avx512vl_cached) = 0;
508 DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
509 c7.i[0], c7.i[1], c7.i[2], c7.i[3] );
510 return X86IDFN(is_avx512vl_cached) = ( c7.u.ebx.avx512vl != 0);
511 }/* is_avx51vlf */
512
513 /*
514 * test(f16c)
515 */
516 int
X86IDFN(is_f16c)517 X86IDFN(is_f16c)(void)
518 {
519 ICPU1 c1;
520
521 if ( !X86IDFN(is_intel)() && !X86IDFN(is_amd)() )
522 return X86IDFN(is_f16c_cached) = 0;
523
524 if ( !X86IDFN(is_avx)() )
525 return X86IDFN(is_f16c_cached) = 0;
526
527 if ( X86IDFN(idcache)( 1, c1.i ) == 0 )
528 return X86IDFN(is_f16c_cached) = 0;
529
530 DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
531 c7.i[0], c7.i[1], c7.i[2], c7.i[3] );
532
533 return X86IDFN(is_f16c_cached) = (c1.u.ecx.f16c != 0);
534 }/* is_f16c */
535
536 /*
537 * test(fma)
538 */
539 int
X86IDFN(is_fma)540 X86IDFN(is_fma)(void)
541 {
542 ICPU1 c1;
543 if( !X86IDFN(is_intel)() && !X86IDFN(is_amd)() )
544 return X86IDFN(is_fma_cached) = 0;
545 if( X86IDFN(idcache)( 1, c1.i ) == 0 )
546 return X86IDFN(is_fma_cached) = 0;
547 DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
548 c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
549 return X86IDFN(is_fma_cached) = ( c1.u.ecx.fma != 0);
550 }/* is_fma */
551
552 /*
553 * test(fma4)
554 */
555 int
X86IDFN(is_fma4)556 X86IDFN(is_fma4)(void)
557 {
558 CPU80 c80;
559 ACPU81 c81;
560 if( !X86IDFN(is_amd)() )
561 return X86IDFN(is_fma4_cached) = 0;
562 if( X86IDFN(idcache)( 0x80000000, c80.i ) == 0 )
563 return X86IDFN(is_fma4_cached) = 0;
564 if( c80.b.largest < 0x80000001 )
565 return X86IDFN(is_fma4_cached) = 0;
566 if( X86IDFN(idcache)( 0x80000001, c81.i ) == 0 )
567 return X86IDFN(is_fma4_cached) = 0;
568 return X86IDFN(is_fma4_cached) = ( c81.u.ecx.fma4 != 0);
569 }/* is_fma4 */
570
571 /*
572 * test(ht)
573 * call with Intel
574 * test sse3 bit, same bit for either manufacturer
575 */
576 int
X86IDFN(is_ht)577 X86IDFN(is_ht)(void)
578 {
579 ICPU1 c1;
580 if( !X86IDFN(is_intel)() )
581 return X86IDFN(is_ht_cached) = 0;
582 if( X86IDFN(idcache)( 1, c1.i ) == 0 )
583 return X86IDFN(is_ht_cached) = 0;
584 DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
585 c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
586 if( c1.u.edx.htt )
587 return X86IDFN(is_ht_cached) = c1.u.ebx.proccount;
588 return X86IDFN(is_ht_cached) = 0;
589 }/* is_ht */
590
591 /*
592 * test(athlon)
593 * test AMD
594 * test family==15, or model == 1,2,4,6
595 */
596 int
X86IDFN(is_athlon)597 X86IDFN(is_athlon)(void)
598 {
599 ACPU1 c1;
600 if( !X86IDFN(is_amd)() )
601 return X86IDFN(is_athlon_cached) = 0;
602 if( X86IDFN(idcache)( 1, c1.i ) == 0 )
603 return X86IDFN(is_athlon_cached) = 0;
604 DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
605 c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
606 if( c1.u.eax.family == 15 )
607 return X86IDFN(is_athlon_cached) = 1;
608 if( c1.u.eax.family != 6 )
609 return X86IDFN(is_athlon_cached) = 0;
610 switch( c1.u.eax.model ){
611 case 1 :
612 case 2 :
613 case 4 :
614 case 6 :
615 return X86IDFN(is_athlon_cached) = 1;
616 }
617 return X86IDFN(is_athlon_cached) = 0;
618 }/* is_athlon */
619
620 /*
621 * test(hammer)
622 * test for AMD
623 * test for family == 15
624 */
625 int
X86IDFN(is_hammer)626 X86IDFN(is_hammer)(void)
627 {
628 ACPU1 c1;
629 if( !X86IDFN(is_amd)() )
630 return X86IDFN(is_hammer_cached) = 0;
631 if( X86IDFN(idcache)( 1, c1.i ) == 0 )
632 return X86IDFN(is_hammer_cached) = 0;
633 DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
634 c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
635 return X86IDFN(is_hammer_cached) = ( c1.u.eax.family == 15 );
636 }/* is_hammer */
637
638 /*
639 * test(gh)
640 * test for AMD
641 * test for family == 16
642 */
643 int
X86IDFN(is_gh)644 X86IDFN(is_gh)(void)
645 {
646 ACPU1 c1;
647 if( !X86IDFN(is_amd)() )
648 return X86IDFN(is_gh_cached) = 0;
649 if( X86IDFN(idcache)( 1, c1.i ) == 0 )
650 return X86IDFN(is_gh_cached) = 0;
651 DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
652 c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
653 return X86IDFN(is_gh_cached) = ( c1.u.eax.family == 15 && c1.u.eax.extfamily == 1);
654 }/* is_gh */
655
656 /*
657 * test(gh-a)
658 * test for gh
659 * test for model == 0
660 */
661 int
X86IDFN(is_gh_a)662 X86IDFN(is_gh_a)(void)
663 {
664 ACPU1 c1;
665 if( !X86IDFN(is_gh)() )
666 return X86IDFN(is_gh_a_cached) = 0;
667 if( X86IDFN(idcache)( 1, c1.i ) == 0 )
668 return X86IDFN(is_gh_a_cached) = 0;
669 DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
670 c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
671 return X86IDFN(is_gh_a_cached) = ( c1.u.eax.model == 0 );
672 }/* is_gh_a */
673
674 /*
675 * test(gh-b)
676 * test for gh
677 * test for model == 1
678 */
679
680 /*
681 *
682 * Code from rte/pgc/hammer/src/cpuinfo.c
683 *
684 * {
685 * CPUID c1;
686 * CPUMODEL m1;
687 * ACPU81 c81;
688 *
689 * if (!__pgi_is_gh())
690 * return 0;
691 *
692 * if (X86IDFN(idcache)(1, c1.i) == 0)
693 * return 0;
694 *
695 * m1.i = c1.reg.eax;
696 *
697 * if (m1.bits.model >= 2) {
698 * if (X86IDFN(idcache)(0x80000001, c81.i) == 0)
699 * return 0;
700 * if (c81.u.ecx.mas) {
701 * return 1;
702 * }
703 * }
704 *
705 * return 0;
706 * }
707 */
708
709 int
X86IDFN(is_gh_b)710 X86IDFN(is_gh_b)(void)
711 {
712 ACPU1 c1;
713 if( !X86IDFN(is_gh)() )
714 return X86IDFN(is_gh_b_cached) = 0;
715 if( X86IDFN(idcache)( 1, c1.i ) == 0 )
716 return X86IDFN(is_gh_b_cached) = 0;
717 DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
718 c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
719 return X86IDFN(is_gh_b_cached) = ( c1.u.eax.model >= 2 );
720 }/* is_gh_b */
721
722 /*
723 * test(shanghai)
724 * test for shanghai
725 * test for is a gh, and cache size >= 6MB
726 */
727 int
X86IDFN(is_shanghai)728 X86IDFN(is_shanghai)(void)
729 {
730 CPU80 c80;
731 ACPU86 c86;
732 if( !X86IDFN(is_gh)() )
733 return X86IDFN(is_shanghai_cached) = 0;
734 if( X86IDFN(idcache)( 0x80000000, c80.i ) == 0 )
735 return X86IDFN(is_shanghai_cached) = 0;
736 if( c80.b.largest < 0x80000006U )
737 return X86IDFN(is_shanghai_cached) = 0;
738 if( X86IDFN(idcache)( 0x80000006U, c86.i ) == 0 )
739 return X86IDFN(is_shanghai_cached) = 0;
740 return X86IDFN(is_shanghai_cached) = ( c86.u.l3cache.size >= 6 );
741 }/* is_shanghai */
742
743 /*
744 * test(istanbul)
745 * test for istanbul
746 * test for is a shanghai, and model > 4
747 */
748 int
X86IDFN(is_istanbul)749 X86IDFN(is_istanbul)(void)
750 {
751 ACPU1 c1;
752 if( !X86IDFN(is_shanghai)() )
753 return X86IDFN(is_istanbul_cached) = 0;
754 if( X86IDFN(idcache)( 1, c1.i ) == 0 )
755 return X86IDFN(is_istanbul_cached) = 0;
756 return X86IDFN(is_istanbul_cached) = ( c1.u.eax.model > 4 );
757 }/* is_istanbul */
758
759
760 /*
761 * test(bulldozer)
762 * test for bulldozer
763 * test for family == 21
764 */
765 int
X86IDFN(is_bulldozer)766 X86IDFN(is_bulldozer)(void)
767 {
768 ACPU1 c1;
769
770 if ( (X86IDFN(is_amd)() == 0) || (X86IDFN(idcache)( 1, c1.i ) == 0)) {
771 return X86IDFN(is_bulldozer_cached) = 0;
772 }
773 DEBUG_PRINTF("eax %8.8x ebx %8.8x ecx %8.8x edx %8.8x",
774 c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
775 return X86IDFN(is_bulldozer_cached) = ( c1.u.eax.family == 15 && c1.u.eax.extfamily == 6);
776 }/* is_bulldozer */
777
778 /*
779 * test(piledriver)
780 * test for bulldozer & fma
781 */
782 int
X86IDFN(is_piledriver)783 X86IDFN(is_piledriver)(void)
784 {
785 return X86IDFN(is_piledriver_cached) = ( X86IDFN(is_bulldozer)() && X86IDFN(is_fma)() );
786 }/* is_piledriver */
787
788 /*
789 * test(k7)
790 * test AMD
791 * test family == 6
792 */
793 int
X86IDFN(is_k7)794 X86IDFN(is_k7)(void)
795 {
796 ACPU1 c1;
797 if( !X86IDFN(is_amd)() )
798 return X86IDFN(is_k7_cached) = 0;
799 if( X86IDFN(idcache)( 1, c1.i ) == 0 )
800 return X86IDFN(is_k7_cached) = 0;
801 DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
802 c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
803 return X86IDFN(is_k7_cached) = ( c1.u.eax.family == 6 );
804 }/* is_k7 */
805
806 /*
807 * test(ia32e)
808 * test Intel
809 * test family == 15 and lm
810 */
811 int
X86IDFN(is_ia32e)812 X86IDFN(is_ia32e)(void)
813 {
814 ICPU1 c1;
815 CPU80 c80;
816 ICPU81 c81;
817 if( !X86IDFN(is_intel)() )
818 return X86IDFN(is_ia32e_cached) = 0;
819 if( X86IDFN(idcache)( 1, c1.i ) == 0 )
820 return X86IDFN(is_ia32e_cached) = 0;
821 DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
822 c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
823 if( c1.u.eax.family != 15 )
824 return X86IDFN(is_ia32e_cached) = 0;
825 if( X86IDFN(idcache)( 0x80000000, c80.i ) == 0 )
826 return X86IDFN(is_ia32e_cached) = 0;
827 DEBUG_PRINTF("eax %#8.8x", c80.i[0] );
828 if( c80.b.largest < 0x80000001 )
829 return X86IDFN(is_ia32e_cached) = 0; /* no extended flags */
830 if( X86IDFN(idcache)( 0x80000001, c81.i ) == 0 )
831 return X86IDFN(is_ia32e_cached) = 0;
832 DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
833 c81.i[0], c81.i[1], c81.i[2], c81.i[3] );
834 return X86IDFN(is_ia32e_cached) = ( c81.u.edx.lm != 0);
835 }/* is_ia32e */
836
837 /*
838 * test(p4)
839 * test Intel
840 * test family == 15
841 */
842 int
X86IDFN(is_p4)843 X86IDFN(is_p4)(void)
844 {
845 ICPU1 c1;
846 if( !X86IDFN(is_intel)() )
847 return X86IDFN(is_p4_cached) = 0;
848 if( X86IDFN(idcache)( 1, c1.i ) == 0 )
849 return X86IDFN(is_p4_cached) = 0;
850 DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
851 c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
852 return X86IDFN(is_p4_cached) = ( c1.u.eax.family == 15 );
853 }/* is_p4 */
854
855 /*
856 * test(knl)
857 * test Intel
858 * test family == 6 && model == 0x57
859 */
860 int
X86IDFN(is_knl)861 X86IDFN(is_knl)(void)
862 {
863 ICPU1 c1;
864 if( !X86IDFN(is_intel)() )
865 return X86IDFN(is_knl_cached) = 0;
866 if( X86IDFN(idcache)( 1, c1.i ) == 0 )
867 return X86IDFN(is_knl_cached) = 0;
868 DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
869 c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
870 if( c1.u.eax.family == 6 ){
871 int model = ((int)c1.u.eax.extmodel << 4) + (int)c1.u.eax.model;
872 return X86IDFN(is_knl_cached) = ( model == 0x57 );
873 }
874 return X86IDFN(is_knl_cached) = 0;
875 }/* is_knl */
876
877 /*
878 * either manufacturer
879 * test for lm flag in extended features
880 */
881 int
X86IDFN(is_x86_64)882 X86IDFN(is_x86_64)(void)
883 {
884 CPU80 c80;
885 ICPU81 c81;
886
887 if( X86IDFN(idcache)( 0x80000000, c80.i ) == 0 )
888 return X86IDFN(is_x86_64_cached) = 0;
889 DEBUG_PRINTF("eax %#8.8x", c80.i[0] );
890 if( c80.b.largest < 0x80000001 )
891 return X86IDFN(is_x86_64_cached) = 0;
892 if( X86IDFN(idcache)( 0x80000001, c81.i ) == 0 )
893 return X86IDFN(is_x86_64_cached) = 0;
894 DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
895 c81.i[0], c81.i[1], c81.i[2], c81.i[3] );
896 return X86IDFN(is_x86_64_cached) = ( c81.u.edx.lm != 0);
897 }/* is_x86_64 */
898
899 /*
900 * Initialize global variable X86IDFN(hw_features).
901 */
902
903 uint32_t
X86IDFN(init_hw_features)904 X86IDFN(init_hw_features)(uint32_t old_hw_features)
905 {
906 if (X86IDFN(is_sse3)()) { // Implies SSE, SSE2, SSE3
907 X86IDFN(hw_features) |= HW_SSE;
908 }
909
910 /*
911 * AMD processors with SSE4A does not necessarily imply support for SSSE3.
912 */
913 if (X86IDFN(is_ssse3)()) {
914 X86IDFN(hw_features) |= HW_SSSE3;
915 }
916
917 if (X86IDFN(is_sse42)()) { // Implies SSE4A, SSE41, and SSE42
918 X86IDFN(hw_features) |= HW_SSE4;
919 }
920
921 if (X86IDFN(is_avx)()) {
922 X86IDFN(hw_features) |= HW_AVX;
923 }
924
925 if (X86IDFN(is_avx2)()) {
926 X86IDFN(hw_features) |= HW_AVX2;
927 }
928
929 if (X86IDFN(is_avx512)()) {
930 X86IDFN(hw_features) |= HW_AVX512;
931 }
932
933 if (X86IDFN(is_avx512f)()) {
934 X86IDFN(hw_features) |= HW_AVX512F;
935 }
936
937 if (X86IDFN(is_avx512vl)()) {
938 X86IDFN(hw_features) |= HW_AVX512VL;
939 }
940
941 if (X86IDFN(is_fma)()) {
942 X86IDFN(hw_features) |= HW_FMA;
943 }
944
945 if (X86IDFN(is_fma4)()) {
946 X86IDFN(hw_features) |= HW_FMA4;
947 }
948
949 if (X86IDFN(is_knl)()) {
950 X86IDFN(hw_features) |= HW_KNL;
951 }
952
953 if (X86IDFN(is_f16c)()) {
954 X86IDFN(hw_features) |= HW_F16C;
955 }
956
957 if (old_hw_features != X86IDFN(hw_features)) {
958 return X86IDFN(hw_features);
959 }
960
961 /*
962 * Either the processor does not have at a minimum SSE3 support, or
963 * this routine has been now called twice with same input argument.
964 * Abort and avoid infinite loop since nothing is going to change.
965 */
966
967 #if defined(TARGET_WIN_X8664) && ! defined(_NO_CRT_STDIO_INLINE)
968 /*
969 * Exception! Windows - building x86id.obj for libcpuid.lib:
970 * It is unclear why fprintf() can't be used when x86id.c is being
971 * compiled for libcpuid.lib.
972 */
973
974 printf("Error: %s called twice with hw_features=%#x\n", __func__,
975 X86IDFN(hw_features));
976 #else
977 // All other architectures/platforms/libraries can safely use fprintf().
978 fprintf(stderr, "Error: %s called twice with hw_features=%#x\n", __func__,
979 X86IDFN(hw_features));
980 #endif
981 exit(EXIT_FAILURE); // XXX XXX - should be __abort(1, "some string");
982
983 }/* init_hw_features */
984
985 /*
986 * Locally defined functions.
987 */
988
989
990 /*
991 * for Intel processors, the values returned by cpuid(2)
992 * are an encoding of the cache size, as below
993 * other values encode TLB sizes, etc.
994 */
995 static int
ia_cachecode(int code)996 ia_cachecode( int code )
997 {
998 switch( code ){
999 case 0x39:
1000 case 0x3b:
1001 case 0x41:
1002 case 0x79:
1003 case 0x81:
1004 return 128*1024; /*"128KB L2 cache"*/
1005 case 0x3c:
1006 case 0x42:
1007 case 0x7a:
1008 case 0x82:
1009 return 256*1024; /*"256KB L2 cache"*/
1010 case 0x43:
1011 case 0x7b:
1012 case 0x7f:
1013 case 0x83:
1014 case 0x86:
1015 return 512*1024; /*"512KB L2 cache"*/
1016 case 0x44:
1017 case 0x7c:
1018 case 0x84:
1019 case 0x87:
1020 return 1024*1024; /*"1MB L2 cache"*/
1021 case 0x45:
1022 case 0x7d:
1023 case 0x85:
1024 return 2048*1024; /*"2MB L2 cache"*/
1025 case 0x4e:
1026 return 6*1024*1024; /*"6MB L2 cache"*/
1027 case 0xe4:
1028 return 8*1024*1024; /*"8MB L3 cache"*/
1029 }
1030 return 0;
1031 }/* ia_cachecode */
1032
1033 /*
1034 * return cache size for Intel processors
1035 */
1036 static int
ia_cachesize(void)1037 ia_cachesize(void)
1038 {
1039 CPU0 c0;
1040 ICPU2 c2;
1041 CPU80 c80;
1042 ICPU86 c86;
1043 ICPU4 c4;
1044 int i, n, r;
1045
1046 if( X86IDFN(idcache)( 0, c0.i ) == 0 )
1047 return 0;
1048 if (c0.b.largest >= 4) {
1049 r = ia_unifiedcache();
1050 if (r) {
1051 return r;
1052 }
1053 }
1054 if( X86IDFN(idcache)( 0x80000000, c80.i ) == 0 )
1055 return 0;
1056 DEBUG_PRINTF("eax %#8.8x", c80.i[0] );
1057 if( c80.b.largest >= 0x80000006 ){
1058 if( X86IDFN(idcache)( 0x80000006, c86.i ) ){
1059 DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
1060 c86.i[0], c86.i[1], c86.i[2], c86.i[3] );
1061 return c86.u.ecx.size * 1024;
1062 }
1063 }
1064
1065 DEBUG_PRINTF("largest=%d", c0.b.largest );
1066
1067 if( c0.b.largest < 2 )
1068 return 0;
1069
1070 X86IDFN(idcache)( 2, c2.i );
1071 DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
1072 c2.i[0], c2.i[1], c2.i[2], c2.i[3] );
1073 n = c2.u[0].c1;
1074 while( n-- ){
1075 for( i = 0; i < 4; ++i ){
1076 if( c2.u[i].invalid == 0 ){
1077 if( i > 0 ){ /* 1st byte in eax is something else */
1078 r = ia_cachecode( c2.u[i].c1 );
1079 if( r )
1080 return r;
1081 }
1082 r = ia_cachecode( c2.u[i].c2 );
1083 if( r )
1084 return r;
1085 r = ia_cachecode( c2.u[i].c3 );
1086 if( r )
1087 return r;
1088 r = ia_cachecode( c2.u[i].c4 );
1089 if( r )
1090 return r;
1091 }
1092 }
1093 X86IDFN(idcache)( 2, c2.i );
1094 DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
1095 c2.i[0], c2.i[1], c2.i[2], c2.i[3] );
1096 }
1097 return 0;
1098 }/* ia_cachesize */
1099
1100 static int
ia_unifiedcache(void)1101 ia_unifiedcache(void) {
1102 ICPU4 c4;
1103 int n;
1104 int i;
1105 int r, r2, r3;
1106 /* cache size information available */
1107
1108 r2 = r3 = 0;
1109 for (i = 0; i <= 3; i++) {
1110 __pgi_cpuid_ecx( 4, c4.i, i );
1111 DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
1112 c4.i[0], c4.i[1], c4.i[2], c4.i[3] );
1113 switch (c4.u.eax.cachetype) {
1114 default:
1115 goto done;
1116 case 1:
1117 /*
1118 printf("Data Cache\n");
1119 printf("+++ level %d\n", c4.u.eax.cachelevel);
1120 printf("+++ #bytes %d\n",
1121 ( (c4.u.ebx.assoc+1) *
1122 (c4.u.ebx.partitions+1) *
1123 (c4.u.ebx.linesize+1) *
1124 (c4.u.nsets+1) ) ;
1125 );
1126 */
1127 break;
1128 case 2:
1129 /*
1130 printf("Instruction Cache\n");
1131 printf("+++ level %d\n", c4.u.eax.cachelevel);
1132 */
1133 break;
1134 case 3:
1135 /*
1136 printf("Unified Cache\n");
1137 printf("+++ level %d\n", c4.u.eax.cachelevel);
1138 printf("+++ #bytes %d\n",
1139 ( (c4.u.ebx.assoc+1) *
1140 (c4.u.ebx.partitions+1) *
1141 (c4.u.ebx.linesize+1) *
1142 (c4.u.nsets+1) )
1143 );
1144 */
1145 r = (c4.u.ebx.assoc+1) *
1146 (c4.u.ebx.partitions+1) *
1147 (c4.u.ebx.linesize+1) *
1148 (c4.u.nsets+1);
1149 if (c4.u.eax.cachelevel == 2)
1150 r2 = r;
1151 else if (c4.u.eax.cachelevel == 3) {
1152 r3 = r;
1153 }
1154 break;
1155 }
1156 }
1157 done:
1158 if (r3)
1159 return r3;
1160 return r2;
1161 }
1162
1163 /*
1164 * return cache size for AMD processors
1165 */
1166 static int
amd_cachesize(void)1167 amd_cachesize(void)
1168 {
1169 CPU80 c80;
1170 ACPU86 c86;
1171
1172 if( X86IDFN(idcache)( 0x80000000U, c80.i ) == 0 )
1173 return 0;
1174 DEBUG_PRINTF("largest=%#8.8x", c80.b.largest );
1175 if( c80.b.largest < 0x80000006U )
1176 return 0;
1177 if( X86IDFN(idcache)( 0x80000006U, c86.i ) == 0 )
1178 return 0;
1179 if( c86.u.l3cache.size ) {
1180 return c86.u.l3cache.size * 512 * 1024;
1181 }
1182 return c86.u.l2cache.size * 1024;
1183 }/* amd_cachesize */
1184
1185 /*
1186 * test(cachesize)
1187 * return intel or amd cache size
1188 */
1189 int
X86IDFN(get_cachesize)1190 X86IDFN(get_cachesize)(void)
1191 {
1192 if( X86IDFN(is_intel)() )
1193 return ia_cachesize();
1194 if( X86IDFN(is_amd)() )
1195 return amd_cachesize();
1196 return 0;
1197 }/* get_cachesize */
1198
1199 /*
1200 * return cores for Intel processors
1201 */
1202 static int
ia_cores(void)1203 ia_cores(void)
1204 {
1205 CPU0 c0;
1206 ICPU4 c4;
1207 int i, n, r;
1208
1209 if( X86IDFN(idcache)( 0, c0.i ) == 0 )
1210 return 0;
1211 DEBUG_PRINTF("largest=%d", c0.b.largest );
1212
1213 if( c0.b.largest < 4 )
1214 return 0;
1215
1216 __pgi_cpuid_ecx( 4, c4.i, 0 );
1217 DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
1218 c4.i[0], c4.i[1], c4.i[2], c4.i[3] );
1219 return c4.u.eax.ncores + 1;
1220 }/* ia_cores */
1221
1222 /*
1223 * return cores for AMD processors
1224 */
1225 static int
amd_cores(void)1226 amd_cores(void)
1227 {
1228 CPU80 c80;
1229 ACPU88 c88;
1230
1231 if( X86IDFN(idcache)( 0x80000000U, c80.i ) == 0 )
1232 return 0;
1233 DEBUG_PRINTF("largest=%d", c80.b.largest );
1234 if( c80.b.largest < 0x80000008U )
1235 return 0;
1236 if( X86IDFN(idcache)( 0x80000008U, c88.i ) == 0 )
1237 return 0;
1238 return c88.u.ecx.cores + 1;
1239 }/* amd_cores */
1240
1241 /*
1242 * test(cpuname)
1243 * return processor name string
1244 */
1245 static char processor_name[50];
1246 char *
X86IDFN(get_processor_name)1247 X86IDFN(get_processor_name)(void)
1248 {
1249 CPU80 c80;
1250 int i;
1251 if( X86IDFN(idcache)( 0x80000000, c80.i ) == 0 )
1252 return 0;
1253 DEBUG_PRINTF("eax %#8.8x", c80.i[0] );
1254 if( c80.b.largest < 0x80000004 ){
1255 processor_name[0] = '\0';
1256 return processor_name; /* no processor name string */
1257 }
1258 if( X86IDFN(idcache)( 0x80000002, (unsigned int*)(processor_name+0) ) == 0 ){
1259 processor_name[0] = '\0';
1260 return processor_name; /* no processor name string */
1261 }
1262 if( X86IDFN(idcache)( 0x80000003, (unsigned int*)(processor_name+16) ) == 0 ){
1263 processor_name[0] = '\0';
1264 return processor_name; /* no processor name string */
1265 }
1266 if( X86IDFN(idcache)( 0x80000004, (unsigned int*)(processor_name+32) ) == 0 ){
1267 processor_name[0] = '\0';
1268 return processor_name; /* no processor name string */
1269 }
1270 processor_name[48] = '\0';
1271 for( i = 0; i < 48; ++i ){
1272 if( processor_name[i] != ' ' )
1273 return processor_name+i;
1274 }
1275 return processor_name;
1276 }/* get_processor_name */
1277