1 /*
2  * Copyright (c) 2007-2019, NVIDIA CORPORATION.  All rights reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *
16  */
17 
18 
19 #include <stdio.h>
20 #include <stdint.h>
21 #include <stdlib.h>
22 
23 #if     defined(TARGET_WIN_X8664)
24 #   if      defined(OBJ_WIN_X8664_IS_X86ID)
25 #       error   object macro OBJ_WIN_X8664_IS_X86ID cannot already be defined
26 #   else
27 #       define  OBJ_WIN_X8664_IS_X86ID
28 #   endif
29 #endif
30 
31 #include "pgcpuid.h"
32 #include "x86id.h"
33 
34 /*
35  * Define some interesting fields in the extended control register[0].
36  * xcr[0] only defines the lower 32-bits of the 64-bit register.
37  */
38 
39 #define	xcr0_bit_XMM	0x00000002U
40 #define	xcr0_bit_YMM	0x00000004U
41 #define	xcr0_bit_ZMMK	0x00000020U
42 #define	xcr0_bit_ZMMLO 	0x00000040U
43 #define	xcr0_bit_ZMMHI 	0x00000080U
44 
45 #define	xcr0_mask_YMM	(xcr0_bit_XMM | xcr0_bit_YMM)
46 #define	xcr0_mask_ZMM	(xcr0_bit_ZMMK | xcr0_bit_ZMMLO | xcr0_bit_ZMMHI)
47 
48 
49 #define signature_AMD_ebx	0x68747541
50 #define signature_AMD_ecx	0x444d4163
51 #define signature_AMD_edx	0x69746e65
52 
53 #define signature_INTEL_ebx	0x756e6547
54 #define signature_INTEL_ecx	0x6c65746e
55 #define signature_INTEL_edx	0x49656e69
56 
57 //#define DEBUG
58 #if     defined(DEBUG)
59 #include    <string.h>
60 /* use DEBUG_PRINTF(format %s with any arguments %d but no endline",
61  *              string, integer );
62  */
63 #define DEBUG_PRINTF(...)                                            \
64     do { fputs(__func__, stdout);                                    \
65          fputs(strlen(__func__) > 7 ? ":\t" : ":\t\t", stdout);      \
66          printf(__VA_ARGS__);                                        \
67          fputs("\n", stdout); } while (0)
68 #else
69 #define DEBUG_PRINTF(...)
70 #endif
71 
72 /*
73  * prototypes for the test functions here
74  */
75 static int ia_cachesize(void);
76 static int ia_unifiedcache(void);
77 static int amd_cachesize(void);
78 static int ia_cores(void);
79 static int amd_cores(void);
80 static int is_xcr_set(uint32_t, uint64_t);
81 static int is_amd_family(uint32_t, uint32_t *);
82 
83 /*
84  * Various routines in the runtime libraries are needing to detect what processor type/model/feature
85  * they are running on.  Instead of using relatively heavy weight routines to return that information,
86  * provide a mechanism to cache the data.
87  *
88  * The "X86IDFN(is_<TYPE/MODEL/FEATURE>)" is the routine that is called, cache that info in global
89  * variable X86IDFN(is_<TYPE/MODEL/FEATURE>_cached).
90  *
91  * Use macro IS_X86ID(<
92  */
93 
94 uint32_t X86IDFN(hw_features)    	= 0;
95 int X86IDFN(is_intel_cached)    	= X86ID_IS_CACHED_UNDEF;
96 int X86IDFN(is_amd_cached)      	= X86ID_IS_CACHED_UNDEF;
97 int X86IDFN(is_ip6_cached)      	= X86ID_IS_CACHED_UNDEF;
98 int X86IDFN(is_sse_cached)      	= X86ID_IS_CACHED_UNDEF;
99 int X86IDFN(is_sse2_cached)     	= X86ID_IS_CACHED_UNDEF;
100 int X86IDFN(is_sse3_cached)     	= X86ID_IS_CACHED_UNDEF;
101 int X86IDFN(is_ssse3_cached)    	= X86ID_IS_CACHED_UNDEF;
102 int X86IDFN(is_sse4a_cached)    	= X86ID_IS_CACHED_UNDEF;
103 int X86IDFN(is_sse41_cached)    	= X86ID_IS_CACHED_UNDEF;
104 int X86IDFN(is_sse42_cached)    	= X86ID_IS_CACHED_UNDEF;
105 int X86IDFN(is_aes_cached)      	= X86ID_IS_CACHED_UNDEF;
106 int X86IDFN(is_avx_cached)      	= X86ID_IS_CACHED_UNDEF;
107 int X86IDFN(is_avx2_cached)     	= X86ID_IS_CACHED_UNDEF;
108 int X86IDFN(is_avx512_cached)     	= X86ID_IS_CACHED_UNDEF;
109 int X86IDFN(is_avx512f_cached)  	= X86ID_IS_CACHED_UNDEF;
110 int X86IDFN(is_avx512vl_cached) 	= X86ID_IS_CACHED_UNDEF;
111 int X86IDFN(is_fma_cached)      	= X86ID_IS_CACHED_UNDEF;
112 int X86IDFN(is_fma4_cached)     	= X86ID_IS_CACHED_UNDEF;
113 int X86IDFN(is_ht_cached)       	= X86ID_IS_CACHED_UNDEF;
114 int X86IDFN(is_athlon_cached)   	= X86ID_IS_CACHED_UNDEF;
115 int X86IDFN(is_hammer_cached)   	= X86ID_IS_CACHED_UNDEF;
116 int X86IDFN(is_gh_cached)       	= X86ID_IS_CACHED_UNDEF;
117 int X86IDFN(is_gh_a_cached)     	= X86ID_IS_CACHED_UNDEF;
118 int X86IDFN(is_gh_b_cached)     	= X86ID_IS_CACHED_UNDEF;
119 int X86IDFN(is_shanghai_cached) 	= X86ID_IS_CACHED_UNDEF;
120 int X86IDFN(is_istanbul_cached) 	= X86ID_IS_CACHED_UNDEF;
121 int X86IDFN(is_bulldozer_cached) 	= X86ID_IS_CACHED_UNDEF;
122 int X86IDFN(is_piledriver_cached) 	= X86ID_IS_CACHED_UNDEF;
123 int X86IDFN(is_k7_cached)       	= X86ID_IS_CACHED_UNDEF;
124 int X86IDFN(is_ia32e_cached)    	= X86ID_IS_CACHED_UNDEF;
125 int X86IDFN(is_p4_cached)       	= X86ID_IS_CACHED_UNDEF;
126 int X86IDFN(is_knl_cached)      	= X86ID_IS_CACHED_UNDEF;
127 int X86IDFN(is_x86_64_cached)    	= X86ID_IS_CACHED_UNDEF;
128 int X86IDFN(is_f16c_cached)      	= X86ID_IS_CACHED_UNDEF;
129 
130 /*
131  * Return whether extended control register has requested bits set.
132  * Assumes that the processor has the xgetbv instruction.
133  * Return:  0 == register does not have bit(s) set or __pgi_getbv() failed.
134  *          1 == bits set.
135  */
136 
137 static
is_xcr_set(uint32_t xcr_indx,uint64_t xcr_mask)138 int is_xcr_set(uint32_t xcr_indx, uint64_t xcr_mask)
139 {
140     uint64_t xcr;
141 
142     if( __pgi_getbv( xcr_indx, &xcr ) == 0 ) {
143         DEBUG_PRINTF("_pgi_getbv() failed xcr_indx=%#8.8x, "
144             "xcr_mask=%#16.16lx", xcr_indx, xcr_mask);
145         return 0;
146     }
147 
148     DEBUG_PRINTF("xcr[%u]=%#16.16x, xcr_mask=%#16.16lx",
149         xcr_indx, xcr, xcr_mask);
150     return (xcr & xcr_mask) == xcr_mask;
151 }
152 
153 /*
154  * cache values returned from __pgi_cpuid.
155  * cpuid instructions on Windows are costly
156  */
157 int
X86IDFN(idcache)158 X86IDFN(idcache)(uint32_t f, uint32_t *r)
159 {
160   int j, rv = 1;
161   static struct{
162     int set;
163     uint32_t f;
164     uint32_t i[4];
165   }saved[] = {
166     { 0, 0U, { 0, 0, 0, 0}}, //
167     { 0, 1U, { 0, 0, 0, 0}}, //
168     { 0, 2U, { 0, 0, 0, 0}}, //
169     { 0, 0x80000000U, { 0, 0, 0, 0}}, //
170     { 0, 0x80000001U, { 0, 0, 0, 0}}, //
171     { 0, 0x80000002U, { 0, 0, 0, 0}}, //
172     { 0, 0x80000003U, { 0, 0, 0, 0}}, //
173     { 0, 0x80000004U, { 0, 0, 0, 0}}, //
174     { 0, 0x80000006U, { 0, 0, 0, 0}}, //
175     { 0, 0x80000008U, { 0, 0, 0, 0}}, //
176     {-1, 0U, { 0, 0, 0, 0}} };
177   for (j = 0; saved[j].set >= 0; ++j) {
178     if (saved[j].f == f) {
179       if (!saved[j].set) {
180         /* call cpuid once, save its value */
181         rv = __pgi_cpuid(f, saved[j].i);
182         saved[j].set = 1;
183       }
184       /* return the saved value */
185       r[0] = saved[j].i[0];
186       r[1] = saved[j].i[1];
187       r[2] = saved[j].i[2];
188       r[3] = saved[j].i[3];
189       break;
190     } else if (saved[j].set == -1) {
191       /* we're not caching this value */
192       rv = __pgi_cpuid(f, r);
193       break;
194     }
195   }
196   return rv;
197 }
198 
199 /*
200  * is_amd_family(uint32_t family, uint32_t * model)
201  * Return true if processor is AMD and of specific family.
202  * Always return model.
203  */
204 
205 static
is_amd_family(uint32_t family,uint32_t * model)206 int is_amd_family(uint32_t family, uint32_t *model)
207 {
208     ACPU1 c1;
209 
210     if ((X86IDFN(is_amd)() == 0) || (X86IDFN(idcache)( 1, c1.i ) == 0)) {
211         return 0;
212     }
213 
214     DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
215 	c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
216     *model = c1.u.eax.model;
217     return ( c1.u.eax.family == family);
218 }
219 
220 /*
221  * Check that this is a Genuine Intel processor
222  */
223 int
X86IDFN(is_intel)224 X86IDFN(is_intel)(void)
225 // is_intel:       eax 0x00000014 ebx 0x756e6547 ecx 0x6c65746e edx 0x49656e69
226 // is_intel:       eax 0x00000014 ebx 0x756e6547 ecx 0x49656e69 edx 0x6c65746e
227 {
228     unsigned int h;
229     CPU0 c0;
230     X86IDFN(idcache)( 0, c0.i );
231     DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
232 	    c0.i[0], c0.i[1], c0.i[2], c0.i[3] );
233     X86IDFN(is_intel_cached) =
234         ((signature_INTEL_ebx ^ c0.i[1]) |
235          (signature_INTEL_ecx ^ c0.i[2]) |
236          (signature_INTEL_edx ^ c0.i[3])) == 0;
237     return X86IDFN(is_intel_cached);
238 }/* is_intel */
239 
240 /*
241  * Check that this is an Authentic AMD processor
242  */
243 int
X86IDFN(is_amd)244 X86IDFN(is_amd)(void)
245 {
246     CPU0 c0;
247     unsigned int h;
248     X86IDFN(idcache)( 0, c0.i );
249     DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
250 	    c0.i[0], c0.i[1], c0.i[2], c0.i[3] );
251     X86IDFN(is_amd_cached) =
252         ((signature_AMD_ebx ^ c0.i[1]) |
253          (signature_AMD_ecx ^ c0.i[2]) |
254          (signature_AMD_edx ^ c0.i[3])) == 0;
255     return X86IDFN(is_amd_cached);
256 }/* is_amd */
257 
258 /*
259  * test(p6)
260  *  either manufacturer
261  *  cpuid(1) returns fpu and cmov flag, then must be at least p6
262  */
263 int
X86IDFN(is_ip6)264 X86IDFN(is_ip6)(void)
265 {
266     ICPU1 c1;
267 
268     if( X86IDFN(idcache)( 1, c1.i ) == 0 )
269 	return X86IDFN(is_ip6_cached) = 0;
270     DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
271         c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
272 
273     return X86IDFN(is_ip6_cached) = ( c1.u.edx.fpu && c1.u.edx.cmov );
274 }/* is_ip6 */
275 
276 /*
277  * test(sse)
278  *  call with either AMD or Intel
279  *  test sse bit, same bit for either manufacturer
280  */
281 int
X86IDFN(is_sse)282 X86IDFN(is_sse)(void)
283 {
284     ICPU1 c1;
285     if( !X86IDFN(is_intel)() && !X86IDFN(is_amd)() )
286         return X86IDFN(is_sse_cached) = 0;
287     if( X86IDFN(idcache)( 1, c1.i ) == 0 )
288         return X86IDFN(is_sse_cached) = 0;
289     DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
290 	    c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
291     return X86IDFN(is_sse_cached) = ( c1.u.edx.sse != 0);
292 }/* is_sse */
293 
294 /*
295  * test(sse2)
296  *  call with either AMD or Intel
297  *  test sse2 bit, same bit for either manufacturer
298  */
299 int
X86IDFN(is_sse2)300 X86IDFN(is_sse2)(void)
301 {
302     ICPU1 c1;
303     if( !X86IDFN(is_intel)() && !X86IDFN(is_amd)() )
304         return X86IDFN(is_sse2_cached) = 0;
305     if( X86IDFN(idcache)( 1, c1.i ) == 0 )
306         return X86IDFN(is_sse2_cached) = 0;
307     DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
308 	    c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
309     return X86IDFN(is_sse2_cached) = ( c1.u.edx.sse2 != 0);
310 }/* is_sse2 */
311 
312 /*
313  * test(sse3)
314  *  call with either AMD or Intel
315  */
316 int
X86IDFN(is_sse3)317 X86IDFN(is_sse3)(void)
318 {
319     ICPU1 c1;
320     if( !X86IDFN(is_intel)() && !X86IDFN(is_amd)() )
321         return X86IDFN(is_sse3_cached) = 0;
322     if( X86IDFN(idcache)( 1, c1.i ) == 0 )
323         return X86IDFN(is_sse3_cached) = 0;
324     DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
325 	    c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
326     return X86IDFN(is_sse3_cached) = ( c1.u.ecx.sse3 != 0);
327 }/* is_sse3 */
328 
329 /*
330  * test(ssse3)
331  *  call with either AMD or Intel
332  *  test ssse3 bit, same bit for either manufacturer
333  */
334 int
X86IDFN(is_ssse3)335 X86IDFN(is_ssse3)(void)
336 {
337     ICPU1 c1;
338     if( !X86IDFN(is_intel)() && !X86IDFN(is_amd)() )
339         return X86IDFN(is_ssse3_cached) = 0;
340     if( X86IDFN(idcache)( 1, c1.i ) == 0 )
341         return X86IDFN(is_ssse3_cached) = 0;
342     DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
343 	c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
344         return X86IDFN(is_ssse3_cached) = ( c1.u.ecx.ssse3 != 0);
345 }/* is_ssse3 */
346 
347 /*
348  * test(sse4a)
349  *  right now, it's just the greyhound check
350  */
351 int
X86IDFN(is_sse4a)352 X86IDFN(is_sse4a)(void)
353 {
354     CPU80 c80;
355     ACPU81 c81;
356     if( !X86IDFN(is_amd)() )
357         return X86IDFN(is_sse4a_cached) = 0;
358     if( X86IDFN(idcache)( 0x80000000, c80.i ) == 0 )
359         return X86IDFN(is_sse4a_cached) = 0;
360     if( c80.b.largest < 0x80000001 )
361         return X86IDFN(is_sse4a_cached) = 0;
362     if( X86IDFN(idcache)( 0x80000001, c81.i ) == 0 )
363         return X86IDFN(is_sse4a_cached) = 0;
364     return X86IDFN(is_sse4a_cached) = ( c81.u.ecx.sse4a != 0);
365 }/* is_sse4a */
366 
367 /*
368  * test(sse41)
369  *  right now, it's just the penryn check
370  */
371 int
X86IDFN(is_sse41)372 X86IDFN(is_sse41)(void)
373 {
374     ICPU1 c1;
375     if( !X86IDFN(is_intel)() )
376         return X86IDFN(is_sse41_cached) = 0;
377     if( X86IDFN(idcache)( 1, c1.i ) == 0 )
378         return X86IDFN(is_sse41_cached) = 0;
379     DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
380 	    c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
381     return X86IDFN(is_sse41_cached) = ( c1.u.ecx.sse41 != 0);
382 }/* is_sse41 */
383 
384 /*
385  * test(sse42)
386  */
387 int
X86IDFN(is_sse42)388 X86IDFN(is_sse42)(void)
389 {
390     ICPU1 c1;
391     if( !X86IDFN(is_intel)() && !X86IDFN(is_amd)() )
392         return X86IDFN(is_sse42_cached) = 0;
393     if( X86IDFN(idcache)( 1, c1.i ) == 0 )
394         return X86IDFN(is_sse42_cached) = 0;
395     DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
396 	c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
397     return X86IDFN(is_sse42_cached) = ( c1.u.ecx.sse42 != 0);
398 }/* is_sse42 */
399 
400 /*
401  * test(aes)
402  */
403 int
X86IDFN(is_aes)404 X86IDFN(is_aes)(void)
405 {
406     ICPU1 c1;
407     if( !X86IDFN(is_intel)() && !X86IDFN(is_amd)() )
408         return X86IDFN(is_aes_cached) = 0;
409     if( X86IDFN(idcache)( 1, c1.i ) == 0 )
410         return X86IDFN(is_aes_cached) = 0;
411     DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
412 	c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
413     return X86IDFN(is_aes_cached) = ( c1.u.ecx.aes != 0);
414 }/* is_aes */
415 
416 /*
417  * test(avx)
418  */
419 int
X86IDFN(is_avx)420 X86IDFN(is_avx)(void)
421 {
422     ICPU1 c1;
423 
424     if( !X86IDFN(is_intel)() && !X86IDFN(is_amd)() )
425         return X86IDFN(is_avx_cached) = 0;
426     if( X86IDFN(idcache)( 1, c1.i ) == 0 )
427         return X86IDFN(is_avx_cached) = 0;
428     DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
429 	c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
430     if( !c1.u.ecx.avx )
431         return X86IDFN(is_avx_cached) = 0;
432     /* see whether the OS will save the ymm state */
433     if( !c1.u.ecx.osxsave )
434         return X86IDFN(is_avx_cached) = 0;
435 
436     return X86IDFN(is_avx_cached) = is_xcr_set(0, xcr0_mask_YMM);
437 }/* is_avx */
438 
439 
440 /*
441  * test(avx2)
442  */
443 int
X86IDFN(is_avx2)444 X86IDFN(is_avx2)(void)
445 {
446     ICPU7 c7;
447 
448     if ( !X86IDFN(is_intel)() && !X86IDFN(is_amd)() )
449         return X86IDFN(is_avx2_cached) = 0;
450 
451     if ( !X86IDFN(is_avx)() )
452         return X86IDFN(is_avx2_cached) = 0;
453 
454     if ( __pgi_cpuid_ecx( 7, c7.i, 0 ) == 0 )
455         return X86IDFN(is_avx2_cached) = 0;
456 
457     DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
458 	  c7.i[0], c7.i[1], c7.i[2], c7.i[3] );
459 
460     return X86IDFN(is_avx2_cached) = (c7.u.ebx.avx2 != 0);
461 }/* is_avx2 */
462 
463 /*
464  * test(avx512)
465  * Determine whether processor and O/S support AVX512.
466  */
467 int
X86IDFN(is_avx512)468 X86IDFN(is_avx512)(void)
469 {
470     if( !X86IDFN(is_intel)() )
471         return X86IDFN(is_avx512_cached) = 0;
472 
473     if ( !X86IDFN(is_avx)() )
474         return X86IDFN(is_avx512_cached) = 0;
475 
476     return X86IDFN(is_avx512_cached) = is_xcr_set(0, xcr0_mask_ZMM);
477 }
478 
479 /*
480  * test(avx512f)
481  */
482 int
X86IDFN(is_avx512f)483 X86IDFN(is_avx512f)(void)
484 {
485     ICPU7 c7;
486 
487     if ( !X86IDFN(is_avx512)() )
488         return X86IDFN(is_avx512f_cached) = 0;
489     if( __pgi_cpuid_ecx( 7, c7.i, 0 ) == 0 )
490         return X86IDFN(is_avx512f_cached) = 0;
491     DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
492 	c7.i[0], c7.i[1], c7.i[2], c7.i[3] );
493     return X86IDFN(is_avx512f_cached) = ( c7.u.ebx.avx512f != 0);
494 }/* is_avx512f */
495 
496 /*
497  * test(avx512vl)
498  */
499 int
X86IDFN(is_avx512vl)500 X86IDFN(is_avx512vl)(void)
501 {
502     ICPU7 c7;
503 
504     if( !X86IDFN(is_avx512f)() )
505         return X86IDFN(is_avx512vl_cached) = 0;
506     if( __pgi_cpuid_ecx( 7, c7.i, 0 ) == 0 )
507         return X86IDFN(is_avx512vl_cached) = 0;
508     DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
509 	c7.i[0], c7.i[1], c7.i[2], c7.i[3] );
510     return X86IDFN(is_avx512vl_cached) = ( c7.u.ebx.avx512vl != 0);
511 }/* is_avx51vlf */
512 
513 /*
514  * test(f16c)
515  */
516 int
X86IDFN(is_f16c)517 X86IDFN(is_f16c)(void)
518 {
519     ICPU1 c1;
520 
521     if ( !X86IDFN(is_intel)() && !X86IDFN(is_amd)() )
522         return X86IDFN(is_f16c_cached) = 0;
523 
524     if ( !X86IDFN(is_avx)() )
525         return X86IDFN(is_f16c_cached) = 0;
526 
527     if ( X86IDFN(idcache)( 1, c1.i ) == 0 )
528         return X86IDFN(is_f16c_cached) = 0;
529 
530     DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
531 	  c7.i[0], c7.i[1], c7.i[2], c7.i[3] );
532 
533     return X86IDFN(is_f16c_cached) = (c1.u.ecx.f16c != 0);
534 }/* is_f16c */
535 
536 /*
537  * test(fma)
538  */
539 int
X86IDFN(is_fma)540 X86IDFN(is_fma)(void)
541 {
542     ICPU1 c1;
543     if( !X86IDFN(is_intel)() && !X86IDFN(is_amd)() )
544         return X86IDFN(is_fma_cached) = 0;
545     if( X86IDFN(idcache)( 1, c1.i ) == 0 )
546         return X86IDFN(is_fma_cached) = 0;
547     DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
548 	c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
549     return X86IDFN(is_fma_cached) = ( c1.u.ecx.fma != 0);
550 }/* is_fma */
551 
552 /*
553  * test(fma4)
554  */
555 int
X86IDFN(is_fma4)556 X86IDFN(is_fma4)(void)
557 {
558     CPU80 c80;
559     ACPU81 c81;
560     if( !X86IDFN(is_amd)() )
561         return X86IDFN(is_fma4_cached) = 0;
562     if( X86IDFN(idcache)( 0x80000000, c80.i ) == 0 )
563         return X86IDFN(is_fma4_cached) = 0;
564     if( c80.b.largest < 0x80000001 )
565         return X86IDFN(is_fma4_cached) = 0;
566     if( X86IDFN(idcache)( 0x80000001, c81.i ) == 0 )
567         return X86IDFN(is_fma4_cached) = 0;
568     return X86IDFN(is_fma4_cached) = ( c81.u.ecx.fma4 != 0);
569 }/* is_fma4 */
570 
571 /*
572  * test(ht)
573  *  call with Intel
574  *  test sse3 bit, same bit for either manufacturer
575  */
576 int
X86IDFN(is_ht)577 X86IDFN(is_ht)(void)
578 {
579     ICPU1 c1;
580     if( !X86IDFN(is_intel)() )
581         return X86IDFN(is_ht_cached) = 0;
582     if( X86IDFN(idcache)( 1, c1.i ) == 0 )
583         return X86IDFN(is_ht_cached) = 0;
584     DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
585 	c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
586     if( c1.u.edx.htt )
587         return X86IDFN(is_ht_cached) =  c1.u.ebx.proccount;
588     return X86IDFN(is_ht_cached) = 0;
589 }/* is_ht */
590 
591 /*
592  * test(athlon)
593  *  test AMD
594  *  test family==15, or model == 1,2,4,6
595  */
596 int
X86IDFN(is_athlon)597 X86IDFN(is_athlon)(void)
598 {
599     ACPU1 c1;
600     if( !X86IDFN(is_amd)() )
601         return X86IDFN(is_athlon_cached) = 0;
602     if( X86IDFN(idcache)( 1, c1.i ) == 0 )
603         return X86IDFN(is_athlon_cached) = 0;
604     DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
605 	c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
606     if( c1.u.eax.family == 15 )
607         return X86IDFN(is_athlon_cached) = 1;
608     if( c1.u.eax.family != 6 )
609         return X86IDFN(is_athlon_cached) = 0;
610     switch( c1.u.eax.model ){
611     case 1 :
612     case 2 :
613     case 4 :
614     case 6 :
615         return X86IDFN(is_athlon_cached) = 1;
616     }
617     return X86IDFN(is_athlon_cached) = 0;
618 }/* is_athlon */
619 
620 /*
621  * test(hammer)
622  *  test for AMD
623  *  test for family == 15
624  */
625 int
X86IDFN(is_hammer)626 X86IDFN(is_hammer)(void)
627 {
628     ACPU1 c1;
629     if( !X86IDFN(is_amd)() )
630         return X86IDFN(is_hammer_cached) = 0;
631     if( X86IDFN(idcache)( 1, c1.i ) == 0 )
632         return X86IDFN(is_hammer_cached) = 0;
633     DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
634 	c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
635     return X86IDFN(is_hammer_cached) = ( c1.u.eax.family == 15 );
636 }/* is_hammer */
637 
638 /*
639  * test(gh)
640  *  test for AMD
641  *  test for family == 16
642  */
643 int
X86IDFN(is_gh)644 X86IDFN(is_gh)(void)
645 {
646     ACPU1 c1;
647     if( !X86IDFN(is_amd)() )
648         return X86IDFN(is_gh_cached) = 0;
649     if( X86IDFN(idcache)( 1, c1.i ) == 0 )
650         return X86IDFN(is_gh_cached) = 0;
651     DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
652 	c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
653     return X86IDFN(is_gh_cached) = ( c1.u.eax.family == 15 && c1.u.eax.extfamily == 1);
654 }/* is_gh */
655 
656 /*
657  * test(gh-a)
658  *  test for gh
659  *  test for model == 0
660  */
661 int
X86IDFN(is_gh_a)662 X86IDFN(is_gh_a)(void)
663 {
664     ACPU1 c1;
665     if( !X86IDFN(is_gh)() )
666         return X86IDFN(is_gh_a_cached) = 0;
667     if( X86IDFN(idcache)( 1, c1.i ) == 0 )
668         return X86IDFN(is_gh_a_cached) = 0;
669     DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
670 	c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
671     return X86IDFN(is_gh_a_cached) = ( c1.u.eax.model == 0 );
672 }/* is_gh_a */
673 
674 /*
675  * test(gh-b)
676  *  test for gh
677  *  test for model == 1
678  */
679 
680 /*
681  *
682  * Code from rte/pgc/hammer/src/cpuinfo.c
683  *
684  * {
685  *   CPUID c1;
686  *   CPUMODEL m1;
687  *   ACPU81 c81;
688  *
689  *   if (!__pgi_is_gh())
690  *     return 0;
691  *
692  *   if (X86IDFN(idcache)(1, c1.i) == 0)
693  *     return 0;
694  *
695  *   m1.i = c1.reg.eax;
696  *
697  *   if (m1.bits.model >= 2) {
698  *     if (X86IDFN(idcache)(0x80000001, c81.i) == 0)
699  *       return 0;
700  *     if (c81.u.ecx.mas) {
701  *       return 1;
702  *     }
703  *   }
704  *
705  *   return 0;
706  * }
707  */
708 
709 int
X86IDFN(is_gh_b)710 X86IDFN(is_gh_b)(void)
711 {
712     ACPU1 c1;
713     if( !X86IDFN(is_gh)() )
714         return X86IDFN(is_gh_b_cached) = 0;
715     if( X86IDFN(idcache)( 1, c1.i ) == 0 )
716         return X86IDFN(is_gh_b_cached) = 0;
717     DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
718 	c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
719     return X86IDFN(is_gh_b_cached) = ( c1.u.eax.model >= 2 );
720 }/* is_gh_b */
721 
722 /*
723  * test(shanghai)
724  *  test for shanghai
725  *  test for is a gh, and cache size >= 6MB
726  */
727 int
X86IDFN(is_shanghai)728 X86IDFN(is_shanghai)(void)
729 {
730     CPU80 c80;
731     ACPU86 c86;
732     if( !X86IDFN(is_gh)() )
733         return X86IDFN(is_shanghai_cached) = 0;
734     if( X86IDFN(idcache)( 0x80000000, c80.i ) == 0 )
735         return X86IDFN(is_shanghai_cached) = 0;
736     if( c80.b.largest < 0x80000006U )
737         return X86IDFN(is_shanghai_cached) = 0;
738     if( X86IDFN(idcache)( 0x80000006U, c86.i ) == 0 )
739         return X86IDFN(is_shanghai_cached) = 0;
740     return X86IDFN(is_shanghai_cached) = ( c86.u.l3cache.size >= 6 );
741 }/* is_shanghai */
742 
743 /*
744  * test(istanbul)
745  *  test for istanbul
746  *  test for is a shanghai, and model > 4
747  */
748 int
X86IDFN(is_istanbul)749 X86IDFN(is_istanbul)(void)
750 {
751     ACPU1 c1;
752     if( !X86IDFN(is_shanghai)() )
753         return X86IDFN(is_istanbul_cached) = 0;
754     if( X86IDFN(idcache)( 1, c1.i ) == 0 )
755         return X86IDFN(is_istanbul_cached) = 0;
756     return X86IDFN(is_istanbul_cached) = ( c1.u.eax.model > 4 );
757 }/* is_istanbul */
758 
759 
760 /*
761  * test(bulldozer)
762  *  test for bulldozer
763  *  test for family == 21
764  */
765 int
X86IDFN(is_bulldozer)766 X86IDFN(is_bulldozer)(void)
767 {
768     ACPU1 c1;
769 
770     if ( (X86IDFN(is_amd)() == 0) || (X86IDFN(idcache)( 1, c1.i ) == 0)) {
771         return X86IDFN(is_bulldozer_cached) = 0;
772     }
773     DEBUG_PRINTF("eax %8.8x ebx %8.8x ecx %8.8x edx %8.8x",
774 	c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
775     return X86IDFN(is_bulldozer_cached) = ( c1.u.eax.family == 15 && c1.u.eax.extfamily == 6);
776 }/* is_bulldozer */
777 
778 /*
779  * test(piledriver)
780  *  test for bulldozer & fma
781  */
782 int
X86IDFN(is_piledriver)783 X86IDFN(is_piledriver)(void)
784 {
785     return X86IDFN(is_piledriver_cached) = ( X86IDFN(is_bulldozer)() && X86IDFN(is_fma)() );
786 }/* is_piledriver */
787 
788 /*
789  * test(k7)
790  *  test AMD
791  *  test family == 6
792  */
793 int
X86IDFN(is_k7)794 X86IDFN(is_k7)(void)
795 {
796     ACPU1 c1;
797     if( !X86IDFN(is_amd)() )
798         return X86IDFN(is_k7_cached) = 0;
799     if( X86IDFN(idcache)( 1, c1.i ) == 0 )
800         return X86IDFN(is_k7_cached) = 0;
801     DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
802 	c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
803     return X86IDFN(is_k7_cached) = ( c1.u.eax.family == 6 );
804 }/* is_k7 */
805 
806 /*
807  * test(ia32e)
808  *  test Intel
809  *  test family == 15 and lm
810  */
811 int
X86IDFN(is_ia32e)812 X86IDFN(is_ia32e)(void)
813 {
814     ICPU1 c1;
815     CPU80 c80;
816     ICPU81 c81;
817     if( !X86IDFN(is_intel)() )
818         return X86IDFN(is_ia32e_cached) = 0;
819     if( X86IDFN(idcache)( 1, c1.i ) == 0 )
820         return X86IDFN(is_ia32e_cached) = 0;
821     DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
822 	c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
823     if( c1.u.eax.family != 15 )
824         return X86IDFN(is_ia32e_cached) = 0;
825     if( X86IDFN(idcache)( 0x80000000, c80.i ) == 0 )
826         return X86IDFN(is_ia32e_cached) = 0;
827     DEBUG_PRINTF("eax %#8.8x", c80.i[0] );
828     if( c80.b.largest < 0x80000001 )
829         return X86IDFN(is_ia32e_cached) = 0; /* no extended flags */
830     if( X86IDFN(idcache)( 0x80000001, c81.i ) == 0 )
831         return X86IDFN(is_ia32e_cached) = 0;
832     DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
833 	c81.i[0], c81.i[1], c81.i[2], c81.i[3] );
834     return X86IDFN(is_ia32e_cached) = ( c81.u.edx.lm != 0);
835 }/* is_ia32e */
836 
837 /*
838  * test(p4)
839  *  test Intel
840  *  test family == 15
841  */
842 int
X86IDFN(is_p4)843 X86IDFN(is_p4)(void)
844 {
845     ICPU1 c1;
846     if( !X86IDFN(is_intel)() )
847         return X86IDFN(is_p4_cached) = 0;
848     if( X86IDFN(idcache)( 1, c1.i ) == 0 )
849         return X86IDFN(is_p4_cached) = 0;
850     DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
851 	c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
852     return X86IDFN(is_p4_cached) = ( c1.u.eax.family == 15 );
853 }/* is_p4 */
854 
855 /*
856  * test(knl)
857  *  test Intel
858  *  test family == 6 && model == 0x57
859  */
860 int
X86IDFN(is_knl)861 X86IDFN(is_knl)(void)
862 {
863     ICPU1 c1;
864     if( !X86IDFN(is_intel)() )
865         return X86IDFN(is_knl_cached) = 0;
866     if( X86IDFN(idcache)( 1, c1.i ) == 0 )
867         return X86IDFN(is_knl_cached) = 0;
868     DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
869 	c1.i[0], c1.i[1], c1.i[2], c1.i[3] );
870     if( c1.u.eax.family == 6 ){
871         int model = ((int)c1.u.eax.extmodel << 4) + (int)c1.u.eax.model;
872 	    return X86IDFN(is_knl_cached) = ( model == 0x57 );
873     }
874     return X86IDFN(is_knl_cached) = 0;
875 }/* is_knl */
876 
877 /*
878  * either manufacturer
879  * test for lm flag in extended features
880  */
881 int
X86IDFN(is_x86_64)882 X86IDFN(is_x86_64)(void)
883 {
884     CPU80 c80;
885     ICPU81 c81;
886 
887     if( X86IDFN(idcache)( 0x80000000, c80.i ) == 0 )
888         return X86IDFN(is_x86_64_cached) = 0;
889     DEBUG_PRINTF("eax %#8.8x", c80.i[0] );
890     if( c80.b.largest < 0x80000001 )
891         return X86IDFN(is_x86_64_cached) = 0;
892     if( X86IDFN(idcache)( 0x80000001, c81.i ) == 0 )
893         return X86IDFN(is_x86_64_cached) = 0;
894     DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
895 	c81.i[0], c81.i[1], c81.i[2], c81.i[3] );
896     return X86IDFN(is_x86_64_cached) = ( c81.u.edx.lm != 0);
897 }/* is_x86_64 */
898 
899 /*
900  * Initialize global variable X86IDFN(hw_features).
901  */
902 
903 uint32_t
X86IDFN(init_hw_features)904 X86IDFN(init_hw_features)(uint32_t old_hw_features)
905 {
906     if (X86IDFN(is_sse3)()) {       // Implies SSE, SSE2, SSE3
907         X86IDFN(hw_features) |= HW_SSE;
908     }
909 
910     /*
911      * AMD processors with SSE4A does not necessarily imply support for SSSE3.
912      */
913     if (X86IDFN(is_ssse3)()) {
914         X86IDFN(hw_features) |= HW_SSSE3;
915     }
916 
917     if (X86IDFN(is_sse42)()) {      // Implies SSE4A, SSE41, and SSE42
918         X86IDFN(hw_features) |= HW_SSE4;
919     }
920 
921     if (X86IDFN(is_avx)()) {
922         X86IDFN(hw_features) |= HW_AVX;
923     }
924 
925     if (X86IDFN(is_avx2)()) {
926         X86IDFN(hw_features) |= HW_AVX2;
927     }
928 
929     if (X86IDFN(is_avx512)()) {
930         X86IDFN(hw_features) |= HW_AVX512;
931     }
932 
933     if (X86IDFN(is_avx512f)()) {
934         X86IDFN(hw_features) |= HW_AVX512F;
935     }
936 
937     if (X86IDFN(is_avx512vl)()) {
938         X86IDFN(hw_features) |= HW_AVX512VL;
939     }
940 
941     if (X86IDFN(is_fma)()) {
942         X86IDFN(hw_features) |= HW_FMA;
943     }
944 
945     if (X86IDFN(is_fma4)()) {
946         X86IDFN(hw_features) |= HW_FMA4;
947     }
948 
949     if (X86IDFN(is_knl)()) {
950         X86IDFN(hw_features) |= HW_KNL;
951     }
952 
953     if (X86IDFN(is_f16c)()) {
954         X86IDFN(hw_features) |= HW_F16C;
955     }
956 
957     if (old_hw_features != X86IDFN(hw_features)) {
958         return X86IDFN(hw_features);
959     }
960 
961     /*
962      * Either the processor does not have at a minimum SSE3 support, or
963      * this routine has been now called twice with same input argument.
964      * Abort and avoid infinite loop since nothing is going to change.
965      */
966 
967 #if defined(TARGET_WIN_X8664) && ! defined(_NO_CRT_STDIO_INLINE)
968     /*
969      * Exception! Windows - building x86id.obj for libcpuid.lib:
970      * It is unclear why fprintf() can't be used when x86id.c is being
971      * compiled for libcpuid.lib.
972      */
973 
974     printf("Error: %s called twice with hw_features=%#x\n", __func__,
975         X86IDFN(hw_features));
976 #else
977     // All other architectures/platforms/libraries can safely use fprintf().
978     fprintf(stderr, "Error: %s called twice with hw_features=%#x\n", __func__,
979         X86IDFN(hw_features));
980 #endif
981     exit(EXIT_FAILURE);     // XXX XXX - should be __abort(1, "some string");
982 
983 }/* init_hw_features */
984 
985 /*
986  * Locally defined functions.
987  */
988 
989 
990 /*
991  * for Intel processors, the values returned by cpuid(2)
992  * are an encoding of the cache size, as below
993  * other values encode TLB sizes, etc.
994  */
995 static int
ia_cachecode(int code)996 ia_cachecode( int code )
997 {
998     switch( code ){
999     case 0x39:
1000     case 0x3b:
1001     case 0x41:
1002     case 0x79:
1003     case 0x81:
1004 	return 128*1024; /*"128KB L2 cache"*/
1005     case 0x3c:
1006     case 0x42:
1007     case 0x7a:
1008     case 0x82:
1009 	return 256*1024; /*"256KB L2 cache"*/
1010     case 0x43:
1011     case 0x7b:
1012     case 0x7f:
1013     case 0x83:
1014     case 0x86:
1015 	return 512*1024; /*"512KB L2 cache"*/
1016     case 0x44:
1017     case 0x7c:
1018     case 0x84:
1019     case 0x87:
1020 	return 1024*1024; /*"1MB L2 cache"*/
1021     case 0x45:
1022     case 0x7d:
1023     case 0x85:
1024 	return 2048*1024; /*"2MB L2 cache"*/
1025     case 0x4e:
1026 	return 6*1024*1024; /*"6MB L2 cache"*/
1027     case 0xe4:
1028 	return 8*1024*1024; /*"8MB L3 cache"*/
1029     }
1030     return 0;
1031 }/* ia_cachecode */
1032 
1033 /*
1034  * return cache size for Intel processors
1035  */
1036 static int
ia_cachesize(void)1037 ia_cachesize(void)
1038 {
1039     CPU0 c0;
1040     ICPU2 c2;
1041     CPU80 c80;
1042     ICPU86 c86;
1043     ICPU4 c4;
1044     int i, n, r;
1045 
1046     if( X86IDFN(idcache)( 0, c0.i ) == 0 )
1047 	return 0;
1048     if (c0.b.largest >= 4) {
1049 	r = ia_unifiedcache();
1050 	if (r) {
1051 	    return r;
1052 	}
1053     }
1054     if( X86IDFN(idcache)( 0x80000000, c80.i ) == 0 )
1055 	return 0;
1056     DEBUG_PRINTF("eax %#8.8x", c80.i[0] );
1057     if( c80.b.largest >= 0x80000006 ){
1058 	if( X86IDFN(idcache)( 0x80000006, c86.i ) ){
1059 	    DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
1060 		c86.i[0], c86.i[1], c86.i[2], c86.i[3] );
1061 	    return c86.u.ecx.size * 1024;
1062 	}
1063     }
1064 
1065     DEBUG_PRINTF("largest=%d", c0.b.largest );
1066 
1067     if( c0.b.largest < 2 )
1068 	return 0;
1069 
1070     X86IDFN(idcache)( 2, c2.i );
1071     DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
1072 	c2.i[0], c2.i[1], c2.i[2], c2.i[3] );
1073     n = c2.u[0].c1;
1074     while( n-- ){
1075 	for( i = 0; i < 4; ++i ){
1076 	    if( c2.u[i].invalid == 0 ){
1077 		if( i > 0 ){	/* 1st byte in eax is something else */
1078 		    r = ia_cachecode( c2.u[i].c1 );
1079 		    if( r )
1080 			return r;
1081 		}
1082 		r = ia_cachecode( c2.u[i].c2 );
1083 		if( r )
1084 		    return r;
1085 		r = ia_cachecode( c2.u[i].c3 );
1086 		if( r )
1087 		    return r;
1088 		r = ia_cachecode( c2.u[i].c4 );
1089 		if( r )
1090 		    return r;
1091 	    }
1092 	}
1093 	X86IDFN(idcache)( 2, c2.i );
1094 	DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
1095 	    c2.i[0], c2.i[1], c2.i[2], c2.i[3] );
1096     }
1097     return 0;
1098 }/* ia_cachesize */
1099 
1100 static int
ia_unifiedcache(void)1101 ia_unifiedcache(void) {
1102     ICPU4        c4;
1103     int          n;
1104     int          i;
1105     int          r, r2, r3;
1106     /* cache size information available */
1107 
1108     r2 = r3 = 0;
1109     for (i = 0; i <= 3; i++) {
1110 	__pgi_cpuid_ecx( 4, c4.i, i );
1111 	DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
1112 	    c4.i[0], c4.i[1], c4.i[2], c4.i[3] );
1113 	switch (c4.u.eax.cachetype) {
1114 	default:
1115 	    goto done;
1116 	case 1:
1117 	    /*
1118 	    printf("Data Cache\n");
1119 	    printf("+++ level %d\n", c4.u.eax.cachelevel);
1120 	    printf("+++ #bytes %d\n",
1121 		( (c4.u.ebx.assoc+1) *
1122 		  (c4.u.ebx.partitions+1) *
1123 		  (c4.u.ebx.linesize+1) *
1124 		  (c4.u.nsets+1) ) ;
1125 	    );
1126 	    */
1127 	    break;
1128 	case 2:
1129 	    /*
1130 	    printf("Instruction Cache\n");
1131 	    printf("+++ level %d\n", c4.u.eax.cachelevel);
1132 	    */
1133 	    break;
1134 	case 3:
1135 	    /*
1136 	    printf("Unified Cache\n");
1137 	    printf("+++ level %d\n", c4.u.eax.cachelevel);
1138 	    printf("+++ #bytes %d\n",
1139 		( (c4.u.ebx.assoc+1) *
1140 		  (c4.u.ebx.partitions+1) *
1141 		  (c4.u.ebx.linesize+1) *
1142 		  (c4.u.nsets+1) )
1143 	    );
1144 	    */
1145 	    r =  (c4.u.ebx.assoc+1) *
1146 		 (c4.u.ebx.partitions+1) *
1147 		 (c4.u.ebx.linesize+1) *
1148 		 (c4.u.nsets+1);
1149 	    if (c4.u.eax.cachelevel == 2)
1150 		r2 = r;
1151 	    else if (c4.u.eax.cachelevel == 3) {
1152 		r3 = r;
1153 	    }
1154 	    break;
1155 	}
1156     }
1157 done:
1158     if (r3)
1159 	return r3;
1160     return r2;
1161 }
1162 
1163 /*
1164  * return cache size for AMD processors
1165  */
1166 static int
amd_cachesize(void)1167 amd_cachesize(void)
1168 {
1169     CPU80 c80;
1170     ACPU86 c86;
1171 
1172     if( X86IDFN(idcache)( 0x80000000U, c80.i ) == 0 )
1173 	return 0;
1174     DEBUG_PRINTF("largest=%#8.8x", c80.b.largest );
1175     if( c80.b.largest < 0x80000006U )
1176 	return 0;
1177     if( X86IDFN(idcache)( 0x80000006U, c86.i ) == 0 )
1178 	return 0;
1179     if( c86.u.l3cache.size ) {
1180 	return c86.u.l3cache.size * 512 * 1024;
1181     }
1182     return c86.u.l2cache.size * 1024;
1183 }/* amd_cachesize */
1184 
1185 /*
1186  * test(cachesize)
1187  *  return intel or amd cache size
1188  */
1189 int
X86IDFN(get_cachesize)1190 X86IDFN(get_cachesize)(void)
1191 {
1192     if( X86IDFN(is_intel)() )
1193 	return ia_cachesize();
1194     if( X86IDFN(is_amd)() )
1195 	return amd_cachesize();
1196     return 0;
1197 }/* get_cachesize */
1198 
1199 /*
1200  * return cores for Intel processors
1201  */
1202 static int
ia_cores(void)1203 ia_cores(void)
1204 {
1205     CPU0 c0;
1206     ICPU4 c4;
1207     int i, n, r;
1208 
1209     if( X86IDFN(idcache)( 0, c0.i ) == 0 )
1210 	return 0;
1211     DEBUG_PRINTF("largest=%d", c0.b.largest );
1212 
1213     if( c0.b.largest < 4 )
1214 	return 0;
1215 
1216     __pgi_cpuid_ecx( 4, c4.i, 0 );
1217     DEBUG_PRINTF("eax %#8.8x ebx %#8.8x ecx %#8.8x edx %#8.8x",
1218 	c4.i[0], c4.i[1], c4.i[2], c4.i[3] );
1219     return c4.u.eax.ncores + 1;
1220 }/* ia_cores */
1221 
1222 /*
1223  * return cores for AMD processors
1224  */
1225 static int
amd_cores(void)1226 amd_cores(void)
1227 {
1228     CPU80 c80;
1229     ACPU88 c88;
1230 
1231     if( X86IDFN(idcache)( 0x80000000U, c80.i ) == 0 )
1232 	return 0;
1233     DEBUG_PRINTF("largest=%d", c80.b.largest );
1234     if( c80.b.largest < 0x80000008U )
1235 	return 0;
1236     if( X86IDFN(idcache)( 0x80000008U, c88.i ) == 0 )
1237 	return 0;
1238     return c88.u.ecx.cores + 1;
1239 }/* amd_cores */
1240 
1241 /*
1242  * test(cpuname)
1243  *  return processor name string
1244  */
1245 static char processor_name[50];
1246 char *
X86IDFN(get_processor_name)1247 X86IDFN(get_processor_name)(void)
1248 {
1249     CPU80 c80;
1250     int i;
1251     if( X86IDFN(idcache)( 0x80000000, c80.i ) == 0 )
1252 	return 0;
1253     DEBUG_PRINTF("eax %#8.8x", c80.i[0] );
1254     if( c80.b.largest < 0x80000004 ){
1255 	processor_name[0] = '\0';
1256 	return processor_name;	/* no processor name string */
1257     }
1258     if( X86IDFN(idcache)( 0x80000002, (unsigned int*)(processor_name+0) ) == 0 ){
1259 	processor_name[0] = '\0';
1260 	return processor_name;	/* no processor name string */
1261     }
1262     if( X86IDFN(idcache)( 0x80000003, (unsigned int*)(processor_name+16) ) == 0 ){
1263 	processor_name[0] = '\0';
1264 	return processor_name;	/* no processor name string */
1265     }
1266     if( X86IDFN(idcache)( 0x80000004, (unsigned int*)(processor_name+32) ) == 0 ){
1267 	processor_name[0] = '\0';
1268 	return processor_name;	/* no processor name string */
1269     }
1270     processor_name[48] = '\0';
1271     for( i = 0; i < 48; ++i ){
1272 	if( processor_name[i] != ' ' )
1273 	    return processor_name+i;
1274     }
1275     return processor_name;
1276 }/* get_processor_name */
1277