1 /*
2 * Copyright © 2010-2019 Inria. All rights reserved.
3 * Copyright © 2010-2013 Université Bordeaux
4 * Copyright © 2010-2011 Cisco Systems, Inc. All rights reserved.
5 * See COPYING in top-level directory.
6 *
7 *
8 * This backend is only used when the operating system does not export
9 * the necessary hardware topology information to user-space applications.
10 * Currently, only the FreeBSD backend relies on this x86 backend.
11 *
12 * Other backends such as Linux have their own way to retrieve various
13 * pieces of hardware topology information from the operating system
14 * on various architectures, without having to use this x86-specific code.
15 */
16
17 #include <private/autogen/config.h>
18 #include <hwloc.h>
19 #include <private/private.h>
20 #include <private/debug.h>
21 #include <private/misc.h>
22
23 #include <private/cpuid-x86.h>
24
25 #ifdef HAVE_VALGRIND_VALGRIND_H
26 #include <valgrind/valgrind.h>
27 #endif
28
29 struct hwloc_x86_backend_data_s {
30 unsigned nbprocs;
31 hwloc_bitmap_t apicid_set;
32 int apicid_unique;
33 int is_knl;
34 };
35
36 #define has_topoext(features) ((features)[6] & (1 << 22))
37 #define has_x2apic(features) ((features)[4] & (1 << 21))
38
39 struct cacheinfo {
40 unsigned type;
41 unsigned level;
42 unsigned nbthreads_sharing;
43 unsigned cacheid;
44
45 unsigned linesize;
46 unsigned linepart;
47 int inclusive;
48 int ways;
49 unsigned sets;
50 unsigned long size;
51 };
52
53 struct procinfo {
54 unsigned present;
55 unsigned apicid;
56 unsigned max_log_proc;
57 unsigned max_nbcores;
58 unsigned max_nbthreads;
59 unsigned packageid;
60 unsigned dieid;
61 unsigned nodeid;
62 unsigned unitid;
63 unsigned logprocid;
64 unsigned threadid;
65 unsigned coreid;
66 unsigned *otherids;
67 unsigned levels;
68 unsigned numcaches;
69 struct cacheinfo *cache;
70 char cpuvendor[13];
71 char cpumodel[3*4*4+1];
72 unsigned cpustepping;
73 unsigned cpumodelnumber;
74 unsigned cpufamilynumber;
75 };
76
77 enum cpuid_type {
78 intel,
79 amd,
80 zhaoxin,
81 hygon,
82 unknown
83 };
84
fill_amd_cache(struct procinfo * infos,unsigned level,int type,unsigned cpuid)85 static void fill_amd_cache(struct procinfo *infos, unsigned level, int type, unsigned cpuid)
86 {
87 struct cacheinfo *cache, *tmpcaches;
88 unsigned cachenum;
89 unsigned long size = 0;
90
91 if (level == 1)
92 size = ((cpuid >> 24)) << 10;
93 else if (level == 2)
94 size = ((cpuid >> 16)) << 10;
95 else if (level == 3)
96 size = ((cpuid >> 18)) << 19;
97 if (!size)
98 return;
99
100 tmpcaches = realloc(infos->cache, (infos->numcaches+1)*sizeof(*infos->cache));
101 if (!tmpcaches)
102 /* failed to allocated, ignore that cache */
103 return;
104 infos->cache = tmpcaches;
105 cachenum = infos->numcaches++;
106
107 cache = &infos->cache[cachenum];
108
109 cache->type = type;
110 cache->level = level;
111 if (level <= 2)
112 cache->nbthreads_sharing = 1;
113 else
114 cache->nbthreads_sharing = infos->max_log_proc;
115 cache->linesize = cpuid & 0xff;
116 cache->linepart = 0;
117 cache->inclusive = 0; /* old AMD (K8-K10) supposed to have exclusive caches */
118
119 if (level == 1) {
120 cache->ways = (cpuid >> 16) & 0xff;
121 if (cache->ways == 0xff)
122 /* Fully associative */
123 cache->ways = -1;
124 } else {
125 static const unsigned ways_tab[] = { 0, 1, 2, 0, 4, 0, 8, 0, 16, 0, 32, 48, 64, 96, 128, -1 };
126 unsigned ways = (cpuid >> 12) & 0xf;
127 cache->ways = ways_tab[ways];
128 }
129 cache->size = size;
130 cache->sets = 0;
131
132 hwloc_debug("cache L%u t%u linesize %u ways %d size %luKB\n", cache->level, cache->nbthreads_sharing, cache->linesize, cache->ways, cache->size >> 10);
133 }
134
look_exttopoenum(struct procinfo * infos,unsigned leaf)135 static void look_exttopoenum(struct procinfo *infos, unsigned leaf)
136 {
137 unsigned level, apic_nextshift, apic_number, apic_type, apic_id = 0, apic_shift = 0, id;
138 unsigned threadid __hwloc_attribute_unused = 0; /* shut-up compiler */
139 unsigned eax, ebx, ecx = 0, edx;
140 int apic_packageshift = 0;
141
142 for (level = 0; ; level++) {
143 ecx = level;
144 eax = leaf;
145 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
146 if (!eax && !ebx)
147 break;
148 apic_packageshift = eax & 0x1f;
149 }
150
151 if (level) {
152 infos->otherids = malloc(level * sizeof(*infos->otherids));
153 if (infos->otherids) {
154 infos->levels = level;
155 for (level = 0; ; level++) {
156 ecx = level;
157 eax = leaf;
158 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
159 if (!eax && !ebx)
160 break;
161 apic_nextshift = eax & 0x1f;
162 apic_number = ebx & 0xffff;
163 apic_type = (ecx & 0xff00) >> 8;
164 apic_id = edx;
165 id = (apic_id >> apic_shift) & ((1 << (apic_packageshift - apic_shift))
166 - 1);
167 hwloc_debug("x2APIC %08x %u: nextshift %u num %2u type %u id %2u\n", apic_id, level, apic_nextshift, apic_number, apic_type, id);
168 infos->apicid = apic_id;
169 infos->otherids[level] = UINT_MAX;
170 switch (apic_type) {
171 case 1:
172 infos->threadid = id;
173 /* apic_number is the actual number of threads per core */
174 break;
175 case 2:
176 infos->coreid = id;
177 /* apic_number is the actual number of threads per module */
178 break;
179 case 5:
180 infos->dieid = id;
181 /* apic_number is the actual number of threads per package */
182 break;
183 default:
184 hwloc_debug("x2APIC %u: unknown type %u\n", level, apic_type);
185 infos->otherids[level] = apic_id >> apic_shift;
186 break;
187 }
188 apic_shift = apic_nextshift;
189 }
190 infos->apicid = apic_id;
191 infos->packageid = apic_id >> apic_shift;
192 hwloc_debug("x2APIC remainder: %u\n", infos->packageid);
193 hwloc_debug("this is thread %u of core %u\n", infos->threadid, infos->coreid);
194 }
195 }
196 }
197
198 /* Fetch information from the processor itself thanks to cpuid and store it in
199 * infos for summarize to analyze them globally */
look_proc(struct hwloc_backend * backend,struct procinfo * infos,unsigned highest_cpuid,unsigned highest_ext_cpuid,unsigned * features,enum cpuid_type cpuid_type)200 static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, unsigned highest_cpuid, unsigned highest_ext_cpuid, unsigned *features, enum cpuid_type cpuid_type)
201 {
202 struct hwloc_x86_backend_data_s *data = backend->private_data;
203 unsigned eax, ebx, ecx = 0, edx;
204 unsigned cachenum;
205 struct cacheinfo *cache;
206 unsigned regs[4];
207 unsigned _model, _extendedmodel, _family, _extendedfamily;
208
209 infos->present = 1;
210
211 /* on return from this function, the following fields must be set in infos:
212 * packageid, nodeid, unitid, coreid, threadid, or -1
213 * apicid
214 * levels and levels slots in otherids[]
215 * numcaches and numcaches slots in caches[]
216 *
217 * max_log_proc, max_nbthreads, max_nbcores, logprocid
218 * are only used temporarily inside this function and its callees.
219 */
220
221 /* Get apicid, max_log_proc, packageid, logprocid from cpuid 0x01 */
222 eax = 0x01;
223 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
224 infos->apicid = ebx >> 24;
225 if (edx & (1 << 28))
226 infos->max_log_proc = 1 << hwloc_flsl(((ebx >> 16) & 0xff) - 1);
227 else
228 infos->max_log_proc = 1;
229 hwloc_debug("APIC ID 0x%02x max_log_proc %u\n", infos->apicid, infos->max_log_proc);
230 infos->packageid = infos->apicid / infos->max_log_proc;
231 infos->logprocid = infos->apicid % infos->max_log_proc;
232 hwloc_debug("phys %u thread %u\n", infos->packageid, infos->logprocid);
233
234 /* Get cpu model/family/stepping numbers from same cpuid */
235 _model = (eax>>4) & 0xf;
236 _extendedmodel = (eax>>16) & 0xf;
237 _family = (eax>>8) & 0xf;
238 _extendedfamily = (eax>>20) & 0xff;
239 if ((cpuid_type == intel || cpuid_type == amd || cpuid_type == hygon) && _family == 0xf) {
240 infos->cpufamilynumber = _family + _extendedfamily;
241 } else {
242 infos->cpufamilynumber = _family;
243 }
244 if ((cpuid_type == intel && (_family == 0x6 || _family == 0xf))
245 || ((cpuid_type == amd || cpuid_type == hygon) && _family == 0xf)
246 || (cpuid_type == zhaoxin && (_family == 0x6 || _family == 0x7))) {
247 infos->cpumodelnumber = _model + (_extendedmodel << 4);
248 } else {
249 infos->cpumodelnumber = _model;
250 }
251 infos->cpustepping = eax & 0xf;
252
253 if (cpuid_type == intel && infos->cpufamilynumber == 0x6 &&
254 (infos->cpumodelnumber == 0x57 || infos->cpumodelnumber == 0x85))
255 data->is_knl = 1; /* KNM is the same as KNL */
256
257 /* Get cpu vendor string from cpuid 0x00 */
258 memset(regs, 0, sizeof(regs));
259 regs[0] = 0;
260 hwloc_x86_cpuid(®s[0], ®s[1], ®s[3], ®s[2]);
261 memcpy(infos->cpuvendor, regs+1, 4*3);
262 /* infos was calloc'ed, already ends with \0 */
263
264 /* Get cpu model string from cpuid 0x80000002-4 */
265 if (highest_ext_cpuid >= 0x80000004) {
266 memset(regs, 0, sizeof(regs));
267 regs[0] = 0x80000002;
268 hwloc_x86_cpuid(®s[0], ®s[1], ®s[2], ®s[3]);
269 memcpy(infos->cpumodel, regs, 4*4);
270 regs[0] = 0x80000003;
271 hwloc_x86_cpuid(®s[0], ®s[1], ®s[2], ®s[3]);
272 memcpy(infos->cpumodel + 4*4, regs, 4*4);
273 regs[0] = 0x80000004;
274 hwloc_x86_cpuid(®s[0], ®s[1], ®s[2], ®s[3]);
275 memcpy(infos->cpumodel + 4*4*2, regs, 4*4);
276 /* infos was calloc'ed, already ends with \0 */
277 }
278
279 /* Get core/thread information from cpuid 0x80000008
280 * (not supported on Intel)
281 */
282 if (cpuid_type != intel && cpuid_type != zhaoxin && highest_ext_cpuid >= 0x80000008) {
283 unsigned coreidsize;
284 eax = 0x80000008;
285 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
286 coreidsize = (ecx >> 12) & 0xf;
287 hwloc_debug("core ID size: %u\n", coreidsize);
288 if (!coreidsize) {
289 infos->max_nbcores = (ecx & 0xff) + 1;
290 } else
291 infos->max_nbcores = 1 << coreidsize;
292 hwloc_debug("Thus max # of cores: %u\n", infos->max_nbcores);
293 /* Still no multithreaded AMD */
294 infos->max_nbthreads = 1 ;
295 hwloc_debug("and max # of threads: %u\n", infos->max_nbthreads);
296 /* The legacy max_log_proc is deprecated, it can be smaller than max_nbcores,
297 * which is the maximum number of cores that the processor could theoretically support
298 * (see "Multiple Core Calculation" in the AMD CPUID specification).
299 * Recompute packageid/logprocid/threadid/coreid accordingly.
300 */
301 infos->packageid = infos->apicid / infos->max_nbcores;
302 infos->logprocid = infos->apicid % infos->max_nbcores;
303 infos->threadid = infos->logprocid % infos->max_nbthreads;
304 infos->coreid = infos->logprocid / infos->max_nbthreads;
305 hwloc_debug("this is thread %u of core %u\n", infos->threadid, infos->coreid);
306 }
307
308 infos->numcaches = 0;
309 infos->cache = NULL;
310
311 /* Get apicid, nodeid, unitid from cpuid 0x8000001e
312 * and cache information from cpuid 0x8000001d
313 * (AMD topology extension)
314 */
315 if (cpuid_type != intel && cpuid_type != zhaoxin && has_topoext(features)) {
316 unsigned apic_id, node_id, nodes_per_proc;
317
318 /* the code below doesn't want any other cache yet */
319 assert(!infos->numcaches);
320
321 eax = 0x8000001e;
322 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
323 infos->apicid = apic_id = eax;
324
325 if (infos->cpufamilynumber == 0x16) {
326 /* ecx is reserved */
327 node_id = 0;
328 nodes_per_proc = 1;
329 } else {
330 /* AMD other families or Hygon family 18h */
331 node_id = ecx & 0xff;
332 nodes_per_proc = ((ecx >> 8) & 7) + 1;
333 }
334 infos->nodeid = node_id;
335 if ((infos->cpufamilynumber == 0x15 && nodes_per_proc > 2)
336 || ((infos->cpufamilynumber == 0x17 || infos->cpufamilynumber == 0x18) && nodes_per_proc > 4)) {
337 hwloc_debug("warning: undefined nodes_per_proc value %u, assuming it means %u\n", nodes_per_proc, nodes_per_proc);
338 }
339
340 if (infos->cpufamilynumber <= 0x16) { /* topoext appeared in 0x15 and compute-units were only used in 0x15 and 0x16 */
341 unsigned unit_id, cores_per_unit;
342 infos->unitid = unit_id = ebx & 0xff;
343 cores_per_unit = ((ebx >> 8) & 0xff) + 1;
344 hwloc_debug("topoext %08x, %u nodes, node %u, %u cores in unit %u\n", apic_id, nodes_per_proc, node_id, cores_per_unit, unit_id);
345 } else {
346 unsigned core_id, threads_per_core;
347 infos->coreid = core_id = ebx & 0xff;
348 threads_per_core = ((ebx >> 8) & 0xff) + 1;
349 hwloc_debug("topoext %08x, %u nodes, node %u, %u threads in core %u\n", apic_id, nodes_per_proc, node_id, threads_per_core, core_id);
350 }
351
352 for (cachenum = 0; ; cachenum++) {
353 unsigned type;
354 eax = 0x8000001d;
355 ecx = cachenum;
356 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
357 type = eax & 0x1f;
358 if (type == 0)
359 break;
360 infos->numcaches++;
361 }
362
363 cache = infos->cache = malloc(infos->numcaches * sizeof(*infos->cache));
364 if (cache) {
365 for (cachenum = 0; ; cachenum++) {
366 unsigned long linesize, linepart, ways, sets;
367 unsigned type;
368 eax = 0x8000001d;
369 ecx = cachenum;
370 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
371
372 type = eax & 0x1f;
373
374 if (type == 0)
375 break;
376
377 cache->type = type;
378 cache->level = (eax >> 5) & 0x7;
379 /* Note: actually number of cores */
380 cache->nbthreads_sharing = ((eax >> 14) & 0xfff) + 1;
381
382 cache->linesize = linesize = (ebx & 0xfff) + 1;
383 cache->linepart = linepart = ((ebx >> 12) & 0x3ff) + 1;
384 ways = ((ebx >> 22) & 0x3ff) + 1;
385
386 if (eax & (1 << 9))
387 /* Fully associative */
388 cache->ways = -1;
389 else
390 cache->ways = ways;
391 cache->sets = sets = ecx + 1;
392 cache->size = linesize * linepart * ways * sets;
393 cache->inclusive = edx & 0x2;
394
395 hwloc_debug("cache %u type %u L%u t%u c%u linesize %lu linepart %lu ways %lu sets %lu, size %luKB\n", cachenum, cache->type, cache->level, cache->nbthreads_sharing, infos->max_nbcores, linesize, linepart, ways, sets, cache->size >> 10);
396
397 cache++;
398 }
399 } else {
400 infos->numcaches = 0;
401 }
402 } else {
403 /* If there's no topoext,
404 * get cache information from cpuid 0x80000005 and 0x80000006
405 * (not supported on Intel)
406 */
407 if (cpuid_type != intel && cpuid_type != zhaoxin && highest_ext_cpuid >= 0x80000005) {
408 eax = 0x80000005;
409 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
410 fill_amd_cache(infos, 1, 1, ecx); /* L1d */
411 fill_amd_cache(infos, 1, 2, edx); /* L1i */
412 }
413 if (cpuid_type != intel && cpuid_type != zhaoxin && highest_ext_cpuid >= 0x80000006) {
414 eax = 0x80000006;
415 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
416 if (ecx & 0xf000)
417 /* This is actually supported on Intel but LinePerTag isn't returned in bits 8-11.
418 * Could be useful if some Intels (at least before Core micro-architecture)
419 * support this leaf without leaf 0x4.
420 */
421 fill_amd_cache(infos, 2, 3, ecx); /* L2u */
422 if (edx & 0xf000)
423 fill_amd_cache(infos, 3, 3, edx); /* L3u */
424 }
425 }
426
427 /* Get thread/core + cache information from cpuid 0x04
428 * (not supported on AMD)
429 */
430 if ((cpuid_type != amd && cpuid_type != hygon) && highest_cpuid >= 0x04) {
431 unsigned level;
432 struct cacheinfo *tmpcaches;
433 unsigned oldnumcaches = infos->numcaches; /* in case we got caches above */
434
435 for (cachenum = 0; ; cachenum++) {
436 unsigned type;
437 eax = 0x04;
438 ecx = cachenum;
439 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
440
441 type = eax & 0x1f;
442
443 hwloc_debug("cache %u type %u\n", cachenum, type);
444
445 if (type == 0)
446 break;
447 level = (eax >> 5) & 0x7;
448 if (data->is_knl && level == 3)
449 /* KNL reports wrong L3 information (size always 0, cpuset always the entire machine, ignore it */
450 break;
451 infos->numcaches++;
452
453 if (!cachenum) {
454 /* by the way, get thread/core information from the first cache */
455 infos->max_nbcores = ((eax >> 26) & 0x3f) + 1;
456 infos->max_nbthreads = infos->max_log_proc / infos->max_nbcores;
457 hwloc_debug("thus %u threads\n", infos->max_nbthreads);
458 infos->threadid = infos->logprocid % infos->max_nbthreads;
459 infos->coreid = infos->logprocid / infos->max_nbthreads;
460 hwloc_debug("this is thread %u of core %u\n", infos->threadid, infos->coreid);
461 }
462 }
463
464 tmpcaches = realloc(infos->cache, infos->numcaches * sizeof(*infos->cache));
465 if (!tmpcaches) {
466 infos->numcaches = oldnumcaches;
467 } else {
468 infos->cache = tmpcaches;
469 cache = &infos->cache[oldnumcaches];
470
471 for (cachenum = 0; ; cachenum++) {
472 unsigned long linesize, linepart, ways, sets;
473 unsigned type;
474 eax = 0x04;
475 ecx = cachenum;
476 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
477
478 type = eax & 0x1f;
479
480 if (type == 0)
481 break;
482 level = (eax >> 5) & 0x7;
483 if (data->is_knl && level == 3)
484 /* KNL reports wrong L3 information (size always 0, cpuset always the entire machine, ignore it */
485 break;
486
487 cache->type = type;
488 cache->level = level;
489 cache->nbthreads_sharing = ((eax >> 14) & 0xfff) + 1;
490
491 cache->linesize = linesize = (ebx & 0xfff) + 1;
492 cache->linepart = linepart = ((ebx >> 12) & 0x3ff) + 1;
493 ways = ((ebx >> 22) & 0x3ff) + 1;
494 if (eax & (1 << 9))
495 /* Fully associative */
496 cache->ways = -1;
497 else
498 cache->ways = ways;
499 cache->sets = sets = ecx + 1;
500 cache->size = linesize * linepart * ways * sets;
501 cache->inclusive = edx & 0x2;
502
503 hwloc_debug("cache %u type %u L%u t%u c%u linesize %lu linepart %lu ways %lu sets %lu, size %luKB\n", cachenum, cache->type, cache->level, cache->nbthreads_sharing, infos->max_nbcores, linesize, linepart, ways, sets, cache->size >> 10);
504
505 cache++;
506 }
507 }
508 }
509
510 if ((cpuid_type == intel) && highest_cpuid >= 0x1f) {
511 /* Get package/die/module/tile/core/thread information from cpuid 0x1f
512 * (Intel v2 Extended Topology Enumeration)
513 */
514 look_exttopoenum(infos, 0x1f);
515
516 } else if ((cpuid_type == intel || cpuid_type == zhaoxin) && highest_cpuid >= 0x0b && has_x2apic(features)) {
517 /* Get package/core/thread information from cpuid 0x0b
518 * (Intel v1 Extended Topology Enumeration)
519 */
520 look_exttopoenum(infos, 0x0b);
521 }
522
523 /* Now that we have all info, compute cacheids and apply quirks */
524 for (cachenum = 0; cachenum < infos->numcaches; cachenum++) {
525 cache = &infos->cache[cachenum];
526
527 /* default cacheid value */
528 cache->cacheid = infos->apicid / cache->nbthreads_sharing;
529
530 if (cpuid_type == amd) {
531 /* AMD quirks */
532 if (infos->cpufamilynumber == 0x17
533 && cache->level == 3 && cache->nbthreads_sharing == 6) {
534 /* AMD family 0x17 always shares L3 between 8 APIC ids,
535 * even when only 6 APIC ids are enabled and reported in nbthreads_sharing
536 * (on 24-core CPUs).
537 */
538 cache->cacheid = infos->apicid / 8;
539
540 } else if (infos->cpufamilynumber== 0x10 && infos->cpumodelnumber == 0x9
541 && cache->level == 3
542 && (cache->ways == -1 || (cache->ways % 2 == 0)) && cache->nbthreads_sharing >= 8) {
543 /* Fix AMD family 0x10 model 0x9 (Magny-Cours) with 8 or 12 cores.
544 * The L3 (and its associativity) is actually split into two halves).
545 */
546 if (cache->nbthreads_sharing == 16)
547 cache->nbthreads_sharing = 12; /* nbthreads_sharing is a power of 2 but the processor actually has 8 or 12 cores */
548 cache->nbthreads_sharing /= 2;
549 cache->size /= 2;
550 if (cache->ways != -1)
551 cache->ways /= 2;
552 /* AMD Magny-Cours 12-cores processor reserve APIC ids as AAAAAABBBBBB....
553 * among first L3 (A), second L3 (B), and unexisting cores (.).
554 * On multi-socket servers, L3 in non-first sockets may have APIC id ranges
555 * such as [16-21] that are not aligned on multiple of nbthreads_sharing (6).
556 * That means, we can't just compare apicid/nbthreads_sharing to identify siblings.
557 */
558 cache->cacheid = (infos->apicid % infos->max_log_proc) / cache->nbthreads_sharing /* cacheid within the package */
559 + 2 * (infos->apicid / infos->max_log_proc); /* add 2 caches per previous package */
560
561 } else if (infos->cpufamilynumber == 0x15
562 && (infos->cpumodelnumber == 0x1 /* Bulldozer */ || infos->cpumodelnumber == 0x2 /* Piledriver */)
563 && cache->level == 3 && cache->nbthreads_sharing == 6) {
564 /* AMD Bulldozer and Piledriver 12-core processors have same APIC ids as Magny-Cours above,
565 * but we can't merge the checks because the original nbthreads_sharing must be exactly 6 here.
566 */
567 cache->cacheid = (infos->apicid % infos->max_log_proc) / cache->nbthreads_sharing /* cacheid within the package */
568 + 2 * (infos->apicid / infos->max_log_proc); /* add 2 cache per previous package */
569 }
570 } else if (cpuid_type == hygon) {
571 if (infos->cpufamilynumber == 0x18
572 && cache->level == 3 && cache->nbthreads_sharing == 6) {
573 /* Hygon family 0x18 always shares L3 between 8 APIC ids,
574 * even when only 6 APIC ids are enabled and reported in nbthreads_sharing
575 * (on 24-core CPUs).
576 */
577 cache->cacheid = infos->apicid / 8;
578 }
579 }
580 }
581
582 if (hwloc_bitmap_isset(data->apicid_set, infos->apicid))
583 data->apicid_unique = 0;
584 else
585 hwloc_bitmap_set(data->apicid_set, infos->apicid);
586 }
587
588 static void
hwloc_x86_add_cpuinfos(hwloc_obj_t obj,struct procinfo * info,int nodup)589 hwloc_x86_add_cpuinfos(hwloc_obj_t obj, struct procinfo *info, int nodup)
590 {
591 char number[8];
592 hwloc_obj_add_info_nodup(obj, "CPUVendor", info->cpuvendor, nodup);
593 snprintf(number, sizeof(number), "%u", info->cpufamilynumber);
594 hwloc_obj_add_info_nodup(obj, "CPUFamilyNumber", number, nodup);
595 snprintf(number, sizeof(number), "%u", info->cpumodelnumber);
596 hwloc_obj_add_info_nodup(obj, "CPUModelNumber", number, nodup);
597 if (info->cpumodel[0]) {
598 const char *c = info->cpumodel;
599 while (*c == ' ')
600 c++;
601 hwloc_obj_add_info_nodup(obj, "CPUModel", c, nodup);
602 }
603 snprintf(number, sizeof(number), "%u", info->cpustepping);
604 hwloc_obj_add_info_nodup(obj, "CPUStepping", number, nodup);
605 }
606
607 /* Analyse information stored in infos, and build/annotate topology levels accordingly */
summarize(struct hwloc_backend * backend,struct procinfo * infos,int fulldiscovery)608 static int summarize(struct hwloc_backend *backend, struct procinfo *infos, int fulldiscovery)
609 {
610 struct hwloc_topology *topology = backend->topology;
611 struct hwloc_x86_backend_data_s *data = backend->private_data;
612 unsigned nbprocs = data->nbprocs;
613 hwloc_bitmap_t complete_cpuset = hwloc_bitmap_alloc();
614 unsigned i, j, l, level, type;
615 unsigned nbpackages = 0;
616 int one = -1;
617 unsigned next_group_depth = topology->next_group_depth;
618 int caches_added = 0;
619 hwloc_bitmap_t remaining_cpuset;
620
621 for (i = 0; i < nbprocs; i++)
622 if (infos[i].present) {
623 hwloc_bitmap_set(complete_cpuset, i);
624 one = i;
625 }
626
627 if (one == -1) {
628 hwloc_bitmap_free(complete_cpuset);
629 return 0;
630 }
631
632 remaining_cpuset = hwloc_bitmap_alloc();
633
634 /* Ideally, when fulldiscovery=0, we could add any object that doesn't exist yet.
635 * But what if the x86 and the native backends disagree because one is buggy? Which one to trust?
636 * Only annotate existing objects for now.
637 */
638
639 /* Look for packages */
640 if (fulldiscovery) {
641 hwloc_bitmap_t package_cpuset;
642 hwloc_obj_t package;
643
644 hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
645 while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
646 unsigned packageid = infos[i].packageid;
647
648 package_cpuset = hwloc_bitmap_alloc();
649 for (j = i; j < nbprocs; j++) {
650 if (infos[j].packageid == packageid) {
651 hwloc_bitmap_set(package_cpuset, j);
652 hwloc_bitmap_clr(remaining_cpuset, j);
653 }
654 }
655 package = hwloc_alloc_setup_object(HWLOC_OBJ_PACKAGE, packageid);
656 package->cpuset = package_cpuset;
657
658 hwloc_x86_add_cpuinfos(package, &infos[i], 0);
659
660 hwloc_debug_1arg_bitmap("os package %u has cpuset %s\n",
661 packageid, package_cpuset);
662 hwloc_insert_object_by_cpuset(topology, package);
663 nbpackages++;
664 }
665
666 } else {
667 /* Annotate packages previously-existing packages */
668 hwloc_obj_t package = NULL;
669 int same = 1;
670 nbpackages = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PACKAGE);
671 /* check whether all packages have the same info */
672 for(i=1; i<nbprocs; i++) {
673 if (strcmp(infos[i].cpumodel, infos[0].cpumodel)) {
674 same = 0;
675 break;
676 }
677 }
678 /* now iterate over packages and annotate them */
679 while ((package = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_PACKAGE, package)) != NULL) {
680 if (package->os_index == (unsigned) -1) {
681 /* try to fix the package OS index if unknown.
682 * FIXME: ideally, we should check all bits in case x86 and the native backend disagree.
683 */
684 for(i=0; i<nbprocs; i++) {
685 if (hwloc_bitmap_isset(package->cpuset, i)) {
686 package->os_index = infos[i].packageid;
687 break;
688 }
689 }
690 }
691 for(i=0; i<nbprocs; i++) {
692 /* if there's a single package, it's the one we want.
693 * if the index is ok, it's the one we want.
694 * if the index is unknown but all packages have the same id, that's fine
695 */
696 if (nbpackages == 1 || infos[i].packageid == package->os_index || (same && package->os_index == (unsigned) -1)) {
697 hwloc_x86_add_cpuinfos(package, &infos[i], 1);
698 break;
699 }
700 }
701 }
702 }
703 /* If there was no package, annotate the Machine instead */
704 if ((!nbpackages) && infos[0].cpumodel[0]) {
705 hwloc_x86_add_cpuinfos(hwloc_get_root_obj(topology), &infos[0], 1);
706 }
707
708 /* Look for Numa nodes inside packages */
709 if (fulldiscovery && getenv("HWLOC_X86_TOPOEXT_NUMANODES")) {
710 hwloc_bitmap_t node_cpuset;
711 hwloc_obj_t node;
712
713 hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
714 while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
715 unsigned packageid = infos[i].packageid;
716 unsigned nodeid = infos[i].nodeid;
717
718 if (nodeid == (unsigned)-1) {
719 hwloc_bitmap_clr(remaining_cpuset, i);
720 continue;
721 }
722
723 node_cpuset = hwloc_bitmap_alloc();
724 for (j = i; j < nbprocs; j++) {
725 if (infos[j].nodeid == (unsigned) -1) {
726 hwloc_bitmap_clr(remaining_cpuset, j);
727 continue;
728 }
729
730 if (infos[j].packageid == packageid && infos[j].nodeid == nodeid) {
731 hwloc_bitmap_set(node_cpuset, j);
732 hwloc_bitmap_clr(remaining_cpuset, j);
733 }
734 }
735 node = hwloc_alloc_setup_object(HWLOC_OBJ_NUMANODE, nodeid);
736 node->cpuset = node_cpuset;
737 node->nodeset = hwloc_bitmap_alloc();
738 hwloc_bitmap_set(node->nodeset, nodeid);
739 hwloc_debug_1arg_bitmap("os node %u has cpuset %s\n",
740 nodeid, node_cpuset);
741 hwloc_insert_object_by_cpuset(topology, node);
742 }
743 }
744
745 if (fulldiscovery) {
746 hwloc_bitmap_t unit_cpuset, die_cpuset;
747 hwloc_obj_t unit, die;
748 char *env;
749 int dont_merge;
750
751 /* Look for Compute units inside packages */
752 hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
753 while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
754 unsigned packageid = infos[i].packageid;
755 unsigned unitid = infos[i].unitid;
756
757 if (unitid == (unsigned)-1) {
758 hwloc_bitmap_clr(remaining_cpuset, i);
759 continue;
760 }
761
762 unit_cpuset = hwloc_bitmap_alloc();
763 for (j = i; j < nbprocs; j++) {
764 if (infos[j].unitid == (unsigned) -1) {
765 hwloc_bitmap_clr(remaining_cpuset, j);
766 continue;
767 }
768
769 if (infos[j].packageid == packageid && infos[j].unitid == unitid) {
770 hwloc_bitmap_set(unit_cpuset, j);
771 hwloc_bitmap_clr(remaining_cpuset, j);
772 }
773 }
774 unit = hwloc_alloc_setup_object(HWLOC_OBJ_GROUP, unitid);
775 unit->cpuset = unit_cpuset;
776 hwloc_obj_add_info(unit, "Type", "ComputeUnit");
777 hwloc_debug_1arg_bitmap("os unit %u has cpuset %s\n",
778 unitid, unit_cpuset);
779 hwloc_insert_object_by_cpuset(topology, unit);
780 }
781
782 /* Look for Dies inside packages */
783 env = getenv("HWLOC_DONT_MERGE_DIE_GROUPS");
784 dont_merge = env && atoi(env);
785 hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
786 while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
787 unsigned packageid = infos[i].packageid;
788 unsigned dieid = infos[i].dieid;
789
790 if (dieid == (unsigned)-1) {
791 hwloc_bitmap_clr(remaining_cpuset, i);
792 continue;
793 }
794
795 die_cpuset = hwloc_bitmap_alloc();
796 for (j = i; j < nbprocs; j++) {
797 if (infos[j].dieid == (unsigned) -1) {
798 hwloc_bitmap_clr(remaining_cpuset, j);
799 continue;
800 }
801
802 if (infos[j].packageid == packageid && infos[j].dieid == dieid) {
803 hwloc_bitmap_set(die_cpuset, j);
804 hwloc_bitmap_clr(remaining_cpuset, j);
805 }
806 }
807 die = hwloc_alloc_setup_object(HWLOC_OBJ_GROUP, dieid);
808 die->cpuset = die_cpuset;
809 hwloc_obj_add_info(die, "Type", "Die");
810 die->attr->group.dont_merge = dont_merge;
811 hwloc_debug_1arg_bitmap("os die %u has cpuset %s\n",
812 dieid, die_cpuset);
813 hwloc_insert_object_by_cpuset(topology, die);
814 }
815
816 /* Look for unknown objects */
817 if (infos[one].otherids) {
818 for (level = infos[one].levels-1; level <= infos[one].levels-1; level--) {
819 if (infos[one].otherids[level] != UINT_MAX) {
820 hwloc_bitmap_t unknown_cpuset;
821 hwloc_obj_t unknown_obj;
822
823 hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
824 while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
825 unsigned unknownid = infos[i].otherids[level];
826
827 unknown_cpuset = hwloc_bitmap_alloc();
828 for (j = i; j < nbprocs; j++) {
829 if (infos[j].otherids[level] == unknownid) {
830 hwloc_bitmap_set(unknown_cpuset, j);
831 hwloc_bitmap_clr(remaining_cpuset, j);
832 }
833 }
834 unknown_obj = hwloc_alloc_setup_object(HWLOC_OBJ_GROUP, unknownid);
835 unknown_obj->cpuset = unknown_cpuset;
836 unknown_obj->os_level = level;
837 unknown_obj->attr->group.depth = topology->next_group_depth + level;
838 if (next_group_depth <= topology->next_group_depth + level)
839 next_group_depth = topology->next_group_depth + level + 1;
840 hwloc_debug_2args_bitmap("os unknown%u %u has cpuset %s\n",
841 level, unknownid, unknown_cpuset);
842 hwloc_insert_object_by_cpuset(topology, unknown_obj);
843 }
844 }
845 }
846 }
847 }
848
849 /* Look for cores */
850 if (fulldiscovery) {
851 hwloc_bitmap_t core_cpuset;
852 hwloc_obj_t core;
853
854 hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
855 while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
856 unsigned packageid = infos[i].packageid;
857 unsigned nodeid = infos[i].nodeid;
858 unsigned coreid = infos[i].coreid;
859
860 if (coreid == (unsigned) -1) {
861 hwloc_bitmap_clr(remaining_cpuset, i);
862 continue;
863 }
864
865 core_cpuset = hwloc_bitmap_alloc();
866 for (j = i; j < nbprocs; j++) {
867 if (infos[j].coreid == (unsigned) -1) {
868 hwloc_bitmap_clr(remaining_cpuset, j);
869 continue;
870 }
871
872 if (infos[j].packageid == packageid && infos[j].nodeid == nodeid && infos[j].coreid == coreid) {
873 hwloc_bitmap_set(core_cpuset, j);
874 hwloc_bitmap_clr(remaining_cpuset, j);
875 }
876 }
877 core = hwloc_alloc_setup_object(HWLOC_OBJ_CORE, coreid);
878 core->cpuset = core_cpuset;
879 hwloc_debug_1arg_bitmap("os core %u has cpuset %s\n",
880 coreid, core_cpuset);
881 hwloc_insert_object_by_cpuset(topology, core);
882 }
883 }
884
885 /* Look for PUs */
886 if (fulldiscovery) {
887 hwloc_debug("%s", "\n\n * CPU cpusets *\n\n");
888 for (i=0; i<nbprocs; i++)
889 if(infos[i].present) { /* Only add present PU. We don't know if others actually exist */
890 struct hwloc_obj *obj = hwloc_alloc_setup_object(HWLOC_OBJ_PU, i);
891 obj->cpuset = hwloc_bitmap_alloc();
892 hwloc_bitmap_only(obj->cpuset, i);
893 hwloc_debug_1arg_bitmap("PU %u has cpuset %s\n", i, obj->cpuset);
894 hwloc_insert_object_by_cpuset(topology, obj);
895 }
896 }
897
898 /* Look for caches */
899 /* First find max level */
900 level = 0;
901 for (i = 0; i < nbprocs; i++)
902 for (j = 0; j < infos[i].numcaches; j++)
903 if (infos[i].cache[j].level > level)
904 level = infos[i].cache[j].level;
905 while (level > 0) {
906 for (type = 1; type <= 3; type++) {
907 /* Look for caches of that type at level level */
908 {
909 hwloc_obj_t cache;
910
911 hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
912 while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
913 hwloc_bitmap_t puset;
914 int depth;
915
916 for (l = 0; l < infos[i].numcaches; l++) {
917 if (infos[i].cache[l].level == level && infos[i].cache[l].type == type)
918 break;
919 }
920 if (l == infos[i].numcaches) {
921 /* no cache Llevel of that type in i */
922 hwloc_bitmap_clr(remaining_cpuset, i);
923 continue;
924 }
925
926 puset = hwloc_bitmap_alloc();
927 hwloc_bitmap_set(puset, i);
928 depth = hwloc_get_cache_type_depth(topology, level,
929 type == 1 ? HWLOC_OBJ_CACHE_DATA : type == 2 ? HWLOC_OBJ_CACHE_INSTRUCTION : HWLOC_OBJ_CACHE_UNIFIED);
930 if (depth != HWLOC_TYPE_DEPTH_UNKNOWN)
931 cache = hwloc_get_next_obj_covering_cpuset_by_depth(topology, puset, depth, NULL);
932 else
933 cache = NULL;
934 hwloc_bitmap_free(puset);
935
936 if (cache) {
937 /* Found cache above that PU, annotate if no such attribute yet */
938 if (!hwloc_obj_get_info_by_name(cache, "Inclusive"))
939 hwloc_obj_add_info(cache, "Inclusive", infos[i].cache[l].inclusive ? "1" : "0");
940 hwloc_bitmap_andnot(remaining_cpuset, remaining_cpuset, cache->cpuset);
941 } else {
942 /* Add the missing cache */
943 hwloc_bitmap_t cache_cpuset;
944 unsigned packageid = infos[i].packageid;
945 unsigned cacheid = infos[i].cache[l].cacheid;
946 /* Now look for others sharing it */
947 cache_cpuset = hwloc_bitmap_alloc();
948 for (j = i; j < nbprocs; j++) {
949 unsigned l2;
950 for (l2 = 0; l2 < infos[j].numcaches; l2++) {
951 if (infos[j].cache[l2].level == level && infos[j].cache[l2].type == type)
952 break;
953 }
954 if (l2 == infos[j].numcaches) {
955 /* no cache Llevel of that type in j */
956 hwloc_bitmap_clr(remaining_cpuset, j);
957 continue;
958 }
959 if (infos[j].packageid == packageid && infos[j].cache[l2].cacheid == cacheid) {
960 hwloc_bitmap_set(cache_cpuset, j);
961 hwloc_bitmap_clr(remaining_cpuset, j);
962 }
963 }
964 cache = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, cacheid);
965 cache->attr->cache.depth = level;
966 cache->attr->cache.size = infos[i].cache[l].size;
967 cache->attr->cache.linesize = infos[i].cache[l].linesize;
968 cache->attr->cache.associativity = infos[i].cache[l].ways;
969 switch (infos[i].cache[l].type) {
970 case 1:
971 cache->attr->cache.type = HWLOC_OBJ_CACHE_DATA;
972 break;
973 case 2:
974 cache->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION;
975 break;
976 case 3:
977 cache->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED;
978 break;
979 }
980 cache->cpuset = cache_cpuset;
981 hwloc_obj_add_info(cache, "Inclusive", infos[i].cache[l].inclusive ? "1" : "0");
982 hwloc_debug_2args_bitmap("os L%u cache %u has cpuset %s\n",
983 level, cacheid, cache_cpuset);
984 hwloc_insert_object_by_cpuset(topology, cache);
985 caches_added++;
986 }
987 }
988 }
989 }
990 level--;
991 }
992
993 hwloc_bitmap_free(remaining_cpuset);
994 hwloc_bitmap_free(complete_cpuset);
995 topology->next_group_depth = next_group_depth;
996
997 return fulldiscovery || caches_added;
998 }
999
1000 static int
look_procs(struct hwloc_backend * backend,struct procinfo * infos,int fulldiscovery,unsigned highest_cpuid,unsigned highest_ext_cpuid,unsigned * features,enum cpuid_type cpuid_type,int (* get_cpubind)(hwloc_topology_t topology,hwloc_cpuset_t set,int flags),int (* set_cpubind)(hwloc_topology_t topology,hwloc_const_cpuset_t set,int flags))1001 look_procs(struct hwloc_backend *backend, struct procinfo *infos, int fulldiscovery,
1002 unsigned highest_cpuid, unsigned highest_ext_cpuid, unsigned *features, enum cpuid_type cpuid_type,
1003 int (*get_cpubind)(hwloc_topology_t topology, hwloc_cpuset_t set, int flags),
1004 int (*set_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags))
1005 {
1006 struct hwloc_x86_backend_data_s *data = backend->private_data;
1007 struct hwloc_topology *topology = backend->topology;
1008 unsigned nbprocs = data->nbprocs;
1009 hwloc_bitmap_t orig_cpuset = hwloc_bitmap_alloc();
1010 hwloc_bitmap_t set;
1011 unsigned i;
1012 int ret = 0;
1013
1014 if (get_cpubind(topology, orig_cpuset, HWLOC_CPUBIND_STRICT)) {
1015 hwloc_bitmap_free(orig_cpuset);
1016 return -1;
1017 }
1018
1019 set = hwloc_bitmap_alloc();
1020
1021 for (i = 0; i < nbprocs; i++) {
1022 hwloc_bitmap_only(set, i);
1023 hwloc_debug("binding to CPU%u\n", i);
1024 if (set_cpubind(topology, set, HWLOC_CPUBIND_STRICT)) {
1025 hwloc_debug("could not bind to CPU%u: %s\n", i, strerror(errno));
1026 continue;
1027 }
1028 look_proc(backend, &infos[i], highest_cpuid, highest_ext_cpuid, features, cpuid_type);
1029 }
1030
1031 set_cpubind(topology, orig_cpuset, 0);
1032 hwloc_bitmap_free(set);
1033 hwloc_bitmap_free(orig_cpuset);
1034
1035 if (!data->apicid_unique)
1036 fulldiscovery = 0;
1037 else
1038 ret = summarize(backend, infos, fulldiscovery);
1039 return ret;
1040 }
1041
1042 #if defined HWLOC_FREEBSD_SYS && defined HAVE_CPUSET_SETID
1043 #include <sys/param.h>
1044 #include <sys/cpuset.h>
1045 typedef cpusetid_t hwloc_x86_os_state_t;
hwloc_x86_os_state_save(hwloc_x86_os_state_t * state)1046 static void hwloc_x86_os_state_save(hwloc_x86_os_state_t *state)
1047 {
1048 /* temporary make all cpus available during discovery */
1049 cpuset_getid(CPU_LEVEL_CPUSET, CPU_WHICH_PID, -1, state);
1050 cpuset_setid(CPU_WHICH_PID, -1, 0);
1051 }
hwloc_x86_os_state_restore(hwloc_x86_os_state_t * state)1052 static void hwloc_x86_os_state_restore(hwloc_x86_os_state_t *state)
1053 {
1054 /* restore initial cpuset */
1055 cpuset_setid(CPU_WHICH_PID, -1, *state);
1056 }
1057 #else /* !defined HWLOC_FREEBSD_SYS || !defined HAVE_CPUSET_SETID */
1058 typedef void * hwloc_x86_os_state_t;
hwloc_x86_os_state_save(hwloc_x86_os_state_t * state __hwloc_attribute_unused)1059 static void hwloc_x86_os_state_save(hwloc_x86_os_state_t *state __hwloc_attribute_unused) { }
hwloc_x86_os_state_restore(hwloc_x86_os_state_t * state __hwloc_attribute_unused)1060 static void hwloc_x86_os_state_restore(hwloc_x86_os_state_t *state __hwloc_attribute_unused) { }
1061 #endif /* !defined HWLOC_FREEBSD_SYS || !defined HAVE_CPUSET_SETID */
1062
1063
1064 #define INTEL_EBX ('G' | ('e'<<8) | ('n'<<16) | ('u'<<24))
1065 #define INTEL_EDX ('i' | ('n'<<8) | ('e'<<16) | ('I'<<24))
1066 #define INTEL_ECX ('n' | ('t'<<8) | ('e'<<16) | ('l'<<24))
1067
1068 #define AMD_EBX ('A' | ('u'<<8) | ('t'<<16) | ('h'<<24))
1069 #define AMD_EDX ('e' | ('n'<<8) | ('t'<<16) | ('i'<<24))
1070 #define AMD_ECX ('c' | ('A'<<8) | ('M'<<16) | ('D'<<24))
1071
1072 /* HYGON "HygonGenuine" */
1073 #define HYGON_EBX ('H' | ('y'<<8) | ('g'<<16) | ('o'<<24))
1074 #define HYGON_EDX ('n' | ('G'<<8) | ('e'<<16) | ('n'<<24))
1075 #define HYGON_ECX ('u' | ('i'<<8) | ('n'<<16) | ('e'<<24))
1076
1077 /* (Zhaoxin) CentaurHauls */
1078 #define ZX_EBX ('C' | ('e'<<8) | ('n'<<16) | ('t'<<24))
1079 #define ZX_EDX ('a' | ('u'<<8) | ('r'<<16) | ('H'<<24))
1080 #define ZX_ECX ('a' | ('u'<<8) | ('l'<<16) | ('s'<<24))
1081
1082 #define SH_EBX (' ' | (' '<<8) | ('S'<<16) | ('h'<<24))
1083 #define SH_EDX ('a' | ('n'<<8) | ('g'<<16) | ('h'<<24))
1084 #define SH_ECX ('a' | ('i'<<8) | (' '<<16) | (' '<<24))
1085
1086 /* fake cpubind for when nbprocs=1 and no binding support */
fake_get_cpubind(hwloc_topology_t topology __hwloc_attribute_unused,hwloc_cpuset_t set __hwloc_attribute_unused,int flags __hwloc_attribute_unused)1087 static int fake_get_cpubind(hwloc_topology_t topology __hwloc_attribute_unused,
1088 hwloc_cpuset_t set __hwloc_attribute_unused,
1089 int flags __hwloc_attribute_unused)
1090 {
1091 return 0;
1092 }
fake_set_cpubind(hwloc_topology_t topology __hwloc_attribute_unused,hwloc_const_cpuset_t set __hwloc_attribute_unused,int flags __hwloc_attribute_unused)1093 static int fake_set_cpubind(hwloc_topology_t topology __hwloc_attribute_unused,
1094 hwloc_const_cpuset_t set __hwloc_attribute_unused,
1095 int flags __hwloc_attribute_unused)
1096 {
1097 return 0;
1098 }
1099
1100 static
hwloc_look_x86(struct hwloc_backend * backend,int fulldiscovery)1101 int hwloc_look_x86(struct hwloc_backend *backend, int fulldiscovery)
1102 {
1103 struct hwloc_x86_backend_data_s *data = backend->private_data;
1104 unsigned nbprocs = data->nbprocs;
1105 unsigned eax, ebx, ecx = 0, edx;
1106 unsigned i;
1107 unsigned highest_cpuid;
1108 unsigned highest_ext_cpuid;
1109 /* This stores cpuid features with the same indexing as Linux */
1110 unsigned features[10] = { 0 };
1111 struct procinfo *infos = NULL;
1112 enum cpuid_type cpuid_type = unknown;
1113 hwloc_x86_os_state_t os_state;
1114 struct hwloc_binding_hooks hooks;
1115 struct hwloc_topology_support support;
1116 struct hwloc_topology_membind_support memsupport __hwloc_attribute_unused;
1117 int (*get_cpubind)(hwloc_topology_t topology, hwloc_cpuset_t set, int flags);
1118 int (*set_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags);
1119 int ret = -1;
1120
1121 /* check if binding works */
1122 memset(&hooks, 0, sizeof(hooks));
1123 support.membind = &memsupport;
1124 hwloc_set_native_binding_hooks(&hooks, &support);
1125 if (hooks.get_thisthread_cpubind && hooks.set_thisthread_cpubind) {
1126 get_cpubind = hooks.get_thisthread_cpubind;
1127 set_cpubind = hooks.set_thisthread_cpubind;
1128 } else if (hooks.get_thisproc_cpubind && hooks.set_thisproc_cpubind) {
1129 get_cpubind = hooks.get_thisproc_cpubind;
1130 set_cpubind = hooks.set_thisproc_cpubind;
1131 } else {
1132 /* we need binding support if there are multiple PUs */
1133 if (nbprocs > 1)
1134 goto out;
1135 get_cpubind = fake_get_cpubind;
1136 set_cpubind = fake_set_cpubind;
1137 }
1138
1139 if (!hwloc_have_x86_cpuid())
1140 goto out;
1141
1142 infos = calloc(nbprocs, sizeof(struct procinfo));
1143 if (NULL == infos)
1144 goto out;
1145 for (i = 0; i < nbprocs; i++) {
1146 infos[i].nodeid = (unsigned) -1;
1147 infos[i].packageid = (unsigned) -1;
1148 infos[i].dieid = (unsigned) -1;
1149 infos[i].unitid = (unsigned) -1;
1150 infos[i].coreid = (unsigned) -1;
1151 infos[i].threadid = (unsigned) -1;
1152 }
1153
1154 eax = 0x00;
1155 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
1156 highest_cpuid = eax;
1157 if (ebx == INTEL_EBX && ecx == INTEL_ECX && edx == INTEL_EDX)
1158 cpuid_type = intel;
1159 if (ebx == AMD_EBX && ecx == AMD_ECX && edx == AMD_EDX)
1160 cpuid_type = amd;
1161 /* support for zhaoxin x86 cpu vendor id */
1162 if (ebx == ZX_EBX && ecx == ZX_ECX && edx == ZX_EDX)
1163 cpuid_type = zhaoxin;
1164 if (ebx == SH_EBX && ecx == SH_ECX && edx == SH_EDX)
1165 cpuid_type = zhaoxin;
1166 else if (ebx == HYGON_EBX && ecx == HYGON_ECX && edx == HYGON_EDX)
1167 cpuid_type = hygon;
1168
1169 hwloc_debug("highest cpuid %x, cpuid type %u\n", highest_cpuid, cpuid_type);
1170 if (highest_cpuid < 0x01) {
1171 goto out_with_infos;
1172 }
1173
1174 eax = 0x01;
1175 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
1176 features[0] = edx;
1177 features[4] = ecx;
1178
1179 eax = 0x80000000;
1180 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
1181 highest_ext_cpuid = eax;
1182
1183 hwloc_debug("highest extended cpuid %x\n", highest_ext_cpuid);
1184
1185 if (highest_cpuid >= 0x7) {
1186 eax = 0x7;
1187 ecx = 0;
1188 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
1189 features[9] = ebx;
1190 }
1191
1192 if (cpuid_type != intel && highest_ext_cpuid >= 0x80000001) {
1193 eax = 0x80000001;
1194 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
1195 features[1] = edx;
1196 features[6] = ecx;
1197 }
1198
1199 hwloc_x86_os_state_save(&os_state);
1200
1201 ret = look_procs(backend, infos, fulldiscovery,
1202 highest_cpuid, highest_ext_cpuid, features, cpuid_type,
1203 get_cpubind, set_cpubind);
1204 if (ret >= 0)
1205 /* success, we're done */
1206 goto out_with_os_state;
1207
1208 if (nbprocs == 1) {
1209 /* only one processor, no need to bind */
1210 look_proc(backend, &infos[0], highest_cpuid, highest_ext_cpuid, features, cpuid_type);
1211 ret = summarize(backend, infos, fulldiscovery);
1212 }
1213
1214 out_with_os_state:
1215 hwloc_x86_os_state_restore(&os_state);
1216
1217 out_with_infos:
1218 if (NULL != infos) {
1219 for (i = 0; i < nbprocs; i++) {
1220 free(infos[i].cache);
1221 if (infos[i].otherids)
1222 free(infos[i].otherids);
1223 }
1224 free(infos);
1225 }
1226
1227 out:
1228 return ret;
1229 }
1230
1231 static int
hwloc_x86_discover(struct hwloc_backend * backend)1232 hwloc_x86_discover(struct hwloc_backend *backend)
1233 {
1234 struct hwloc_x86_backend_data_s *data = backend->private_data;
1235 struct hwloc_topology *topology = backend->topology;
1236 int alreadypus = 0;
1237 int ret;
1238
1239 #if HAVE_DECL_RUNNING_ON_VALGRIND
1240 if (RUNNING_ON_VALGRIND) {
1241 fprintf(stderr, "hwloc x86 backend cannot work under Valgrind, disabling.\n");
1242 return 0;
1243 }
1244 #endif
1245
1246 data->nbprocs = hwloc_fallback_nbprocessors(topology);
1247
1248 if (!topology->is_thissystem) {
1249 hwloc_debug("%s", "\nno x86 detection (not thissystem)\n");
1250 return 0;
1251 }
1252
1253 if (topology->levels[0][0]->cpuset) {
1254 /* somebody else discovered things */
1255 if (topology->nb_levels == 2 && topology->level_nbobjects[1] == data->nbprocs) {
1256 /* only PUs were discovered, as much as we would, complete the topology with everything else */
1257 alreadypus = 1;
1258 goto fulldiscovery;
1259 }
1260
1261 /* several object types were added, we can't easily complete, just do partial discovery */
1262 ret = hwloc_look_x86(backend, 0);
1263 if (ret)
1264 hwloc_obj_add_info(topology->levels[0][0], "Backend", "x86");
1265 return ret;
1266 } else {
1267 /* topology is empty, initialize it */
1268 hwloc_alloc_obj_cpusets(topology->levels[0][0]);
1269 }
1270
1271 fulldiscovery:
1272 if (hwloc_look_x86(backend, 1) < 0) {
1273 /* if failed, create PUs */
1274 if (!alreadypus)
1275 hwloc_setup_pu_level(topology, data->nbprocs);
1276 }
1277
1278 hwloc_obj_add_info(topology->levels[0][0], "Backend", "x86");
1279
1280 #ifdef HAVE_UNAME
1281 hwloc_add_uname_info(topology, NULL); /* we already know is_thissystem() is true */
1282 #else
1283 /* uname isn't available, manually setup the "Architecture" info */
1284 #ifdef HWLOC_X86_64_ARCH
1285 hwloc_obj_add_info(topology->levels[0][0], "Architecture", "x86_64");
1286 #else
1287 hwloc_obj_add_info(topology->levels[0][0], "Architecture", "x86");
1288 #endif
1289 #endif
1290 return 1;
1291 }
1292
1293 static void
hwloc_x86_backend_disable(struct hwloc_backend * backend)1294 hwloc_x86_backend_disable(struct hwloc_backend *backend)
1295 {
1296 struct hwloc_x86_backend_data_s *data = backend->private_data;
1297 hwloc_bitmap_free(data->apicid_set);
1298 free(data);
1299 }
1300
1301 static struct hwloc_backend *
hwloc_x86_component_instantiate(struct hwloc_disc_component * component,const void * _data1 __hwloc_attribute_unused,const void * _data2 __hwloc_attribute_unused,const void * _data3 __hwloc_attribute_unused)1302 hwloc_x86_component_instantiate(struct hwloc_disc_component *component,
1303 const void *_data1 __hwloc_attribute_unused,
1304 const void *_data2 __hwloc_attribute_unused,
1305 const void *_data3 __hwloc_attribute_unused)
1306 {
1307 struct hwloc_backend *backend;
1308 struct hwloc_x86_backend_data_s *data;
1309
1310 backend = hwloc_backend_alloc(component);
1311 if (!backend)
1312 goto out;
1313
1314 data = malloc(sizeof(*data));
1315 if (!data) {
1316 errno = ENOMEM;
1317 goto out_with_backend;
1318 }
1319
1320 backend->private_data = data;
1321 backend->flags = HWLOC_BACKEND_FLAG_NEED_LEVELS;
1322 backend->discover = hwloc_x86_discover;
1323 backend->disable = hwloc_x86_backend_disable;
1324
1325 /* default values */
1326 data->is_knl = 0;
1327 data->apicid_set = hwloc_bitmap_alloc();
1328 data->apicid_unique = 1;
1329
1330 return backend;
1331
1332 out_with_backend:
1333 free(backend);
1334 out:
1335 return NULL;
1336 }
1337
1338 static struct hwloc_disc_component hwloc_x86_disc_component = {
1339 HWLOC_DISC_COMPONENT_TYPE_CPU,
1340 "x86",
1341 HWLOC_DISC_COMPONENT_TYPE_GLOBAL,
1342 hwloc_x86_component_instantiate,
1343 45, /* between native and no_os */
1344 NULL
1345 };
1346
1347 const struct hwloc_component hwloc_x86_component = {
1348 HWLOC_COMPONENT_ABI,
1349 NULL, NULL,
1350 HWLOC_COMPONENT_TYPE_DISC,
1351 0,
1352 &hwloc_x86_disc_component
1353 };
1354