1 /*
2 * Copyright © 2010-2017 Inria. All rights reserved.
3 * Copyright © 2010-2013 Université Bordeaux
4 * Copyright © 2010-2011 Cisco Systems, Inc. All rights reserved.
5 * See COPYING in top-level directory.
6 *
7 *
8 * This backend is only used when the operating system does not export
9 * the necessary hardware topology information to user-space applications.
10 * Currently, only the FreeBSD backend relies on this x86 backend.
11 *
12 * Other backends such as Linux have their own way to retrieve various
13 * pieces of hardware topology information from the operating system
14 * on various architectures, without having to use this x86-specific code.
15 */
16
17 #include <private/autogen/config.h>
18 #include <hwloc.h>
19 #include <private/private.h>
20 #include <private/debug.h>
21 #include <private/misc.h>
22
23 #include <private/cpuid-x86.h>
24
25 #ifdef HAVE_VALGRIND_VALGRIND_H
26 #include <valgrind/valgrind.h>
27 #endif
28
29 struct hwloc_x86_backend_data_s {
30 unsigned nbprocs;
31 hwloc_bitmap_t apicid_set;
32 int apicid_unique;
33 int is_knl;
34 };
35
36 #define has_topoext(features) ((features)[6] & (1 << 22))
37 #define has_x2apic(features) ((features)[4] & (1 << 21))
38
39 struct cacheinfo {
40 unsigned type;
41 unsigned level;
42 unsigned nbthreads_sharing;
43 unsigned cacheid;
44
45 unsigned linesize;
46 unsigned linepart;
47 int inclusive;
48 int ways;
49 unsigned sets;
50 unsigned long size;
51 };
52
53 struct procinfo {
54 unsigned present;
55 unsigned apicid;
56 unsigned max_log_proc;
57 unsigned max_nbcores;
58 unsigned max_nbthreads;
59 unsigned packageid;
60 unsigned nodeid;
61 unsigned unitid;
62 unsigned logprocid;
63 unsigned threadid;
64 unsigned coreid;
65 unsigned *otherids;
66 unsigned levels;
67 unsigned numcaches;
68 struct cacheinfo *cache;
69 char cpuvendor[13];
70 char cpumodel[3*4*4+1];
71 unsigned cpustepping;
72 unsigned cpumodelnumber;
73 unsigned cpufamilynumber;
74 };
75
76 enum cpuid_type {
77 intel,
78 amd,
79 unknown
80 };
81
fill_amd_cache(struct procinfo * infos,unsigned level,int type,unsigned cpuid)82 static void fill_amd_cache(struct procinfo *infos, unsigned level, int type, unsigned cpuid)
83 {
84 struct cacheinfo *cache, *tmpcaches;
85 unsigned cachenum;
86 unsigned long size = 0;
87
88 if (level == 1)
89 size = ((cpuid >> 24)) << 10;
90 else if (level == 2)
91 size = ((cpuid >> 16)) << 10;
92 else if (level == 3)
93 size = ((cpuid >> 18)) << 19;
94 if (!size)
95 return;
96
97 tmpcaches = realloc(infos->cache, (infos->numcaches+1)*sizeof(*infos->cache));
98 if (!tmpcaches)
99 /* failed to allocated, ignore that cache */
100 return;
101 infos->cache = tmpcaches;
102 cachenum = infos->numcaches++;
103
104 cache = &infos->cache[cachenum];
105
106 cache->type = type;
107 cache->level = level;
108 if (level <= 2)
109 cache->nbthreads_sharing = 1;
110 else
111 cache->nbthreads_sharing = infos->max_log_proc;
112 cache->linesize = cpuid & 0xff;
113 cache->linepart = 0;
114 cache->inclusive = 0; /* old AMD (K8-K10) supposed to have exclusive caches */
115
116 if (level == 1) {
117 cache->ways = (cpuid >> 16) & 0xff;
118 if (cache->ways == 0xff)
119 /* Fully associative */
120 cache->ways = -1;
121 } else {
122 static const unsigned ways_tab[] = { 0, 1, 2, 0, 4, 0, 8, 0, 16, 0, 32, 48, 64, 96, 128, -1 };
123 unsigned ways = (cpuid >> 12) & 0xf;
124 cache->ways = ways_tab[ways];
125 }
126 cache->size = size;
127 cache->sets = 0;
128
129 hwloc_debug("cache L%u t%u linesize %u ways %u size %luKB\n", cache->level, cache->nbthreads_sharing, cache->linesize, cache->ways, cache->size >> 10);
130 }
131
132 /* Fetch information from the processor itself thanks to cpuid and store it in
133 * infos for summarize to analyze them globally */
look_proc(struct hwloc_backend * backend,struct procinfo * infos,unsigned highest_cpuid,unsigned highest_ext_cpuid,unsigned * features,enum cpuid_type cpuid_type)134 static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, unsigned highest_cpuid, unsigned highest_ext_cpuid, unsigned *features, enum cpuid_type cpuid_type)
135 {
136 struct hwloc_x86_backend_data_s *data = backend->private_data;
137 unsigned eax, ebx, ecx = 0, edx;
138 unsigned cachenum;
139 struct cacheinfo *cache;
140 unsigned regs[4];
141 unsigned _model, _extendedmodel, _family, _extendedfamily;
142
143 infos->present = 1;
144
145 /* on return from this function, the following fields must be set in infos:
146 * packageid, nodeid, unitid, coreid, threadid, or -1
147 * apicid
148 * levels and levels slots in otherids[]
149 * numcaches and numcaches slots in caches[]
150 *
151 * max_log_proc, max_nbthreads, max_nbcores, logprocid
152 * are only used temporarily inside this function and its callees.
153 */
154
155 /* Get apicid, max_log_proc, packageid, logprocid from cpuid 0x01 */
156 eax = 0x01;
157 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
158 infos->apicid = ebx >> 24;
159 if (edx & (1 << 28))
160 infos->max_log_proc = 1 << hwloc_flsl(((ebx >> 16) & 0xff) - 1);
161 else
162 infos->max_log_proc = 1;
163 hwloc_debug("APIC ID 0x%02x max_log_proc %u\n", infos->apicid, infos->max_log_proc);
164 infos->packageid = infos->apicid / infos->max_log_proc;
165 infos->logprocid = infos->apicid % infos->max_log_proc;
166 hwloc_debug("phys %u thread %u\n", infos->packageid, infos->logprocid);
167
168 /* Get cpu model/family/stepping numbers from same cpuid */
169 _model = (eax>>4) & 0xf;
170 _extendedmodel = (eax>>16) & 0xf;
171 _family = (eax>>8) & 0xf;
172 _extendedfamily = (eax>>20) & 0xff;
173 if ((cpuid_type == intel || cpuid_type == amd) && _family == 0xf) {
174 infos->cpufamilynumber = _family + _extendedfamily;
175 } else {
176 infos->cpufamilynumber = _family;
177 }
178 if ((cpuid_type == intel && (_family == 0x6 || _family == 0xf))
179 || (cpuid_type == amd && _family == 0xf)) {
180 infos->cpumodelnumber = _model + (_extendedmodel << 4);
181 } else {
182 infos->cpumodelnumber = _model;
183 }
184 infos->cpustepping = eax & 0xf;
185
186 if (cpuid_type == intel && infos->cpufamilynumber == 0x6 &&
187 (infos->cpumodelnumber == 0x57 || infos->cpumodelnumber == 0x85))
188 data->is_knl = 1; /* KNM is the same as KNL */
189
190 /* Get cpu vendor string from cpuid 0x00 */
191 memset(regs, 0, sizeof(regs));
192 regs[0] = 0;
193 hwloc_x86_cpuid(®s[0], ®s[1], ®s[3], ®s[2]);
194 memcpy(infos->cpuvendor, regs+1, 4*3);
195 /* infos was calloc'ed, already ends with \0 */
196
197 /* Get cpu model string from cpuid 0x80000002-4 */
198 if (highest_ext_cpuid >= 0x80000004) {
199 memset(regs, 0, sizeof(regs));
200 regs[0] = 0x80000002;
201 hwloc_x86_cpuid(®s[0], ®s[1], ®s[2], ®s[3]);
202 memcpy(infos->cpumodel, regs, 4*4);
203 regs[0] = 0x80000003;
204 hwloc_x86_cpuid(®s[0], ®s[1], ®s[2], ®s[3]);
205 memcpy(infos->cpumodel + 4*4, regs, 4*4);
206 regs[0] = 0x80000004;
207 hwloc_x86_cpuid(®s[0], ®s[1], ®s[2], ®s[3]);
208 memcpy(infos->cpumodel + 4*4*2, regs, 4*4);
209 /* infos was calloc'ed, already ends with \0 */
210 }
211
212 /* Get core/thread information from cpuid 0x80000008
213 * (not supported on Intel)
214 */
215 if (cpuid_type != intel && highest_ext_cpuid >= 0x80000008) {
216 unsigned coreidsize;
217 eax = 0x80000008;
218 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
219 coreidsize = (ecx >> 12) & 0xf;
220 hwloc_debug("core ID size: %u\n", coreidsize);
221 if (!coreidsize) {
222 infos->max_nbcores = (ecx & 0xff) + 1;
223 } else
224 infos->max_nbcores = 1 << coreidsize;
225 hwloc_debug("Thus max # of cores: %u\n", infos->max_nbcores);
226 /* Still no multithreaded AMD */
227 infos->max_nbthreads = 1 ;
228 hwloc_debug("and max # of threads: %u\n", infos->max_nbthreads);
229 /* The legacy max_log_proc is deprecated, it can be smaller than max_nbcores,
230 * which is the maximum number of cores that the processor could theoretically support
231 * (see "Multiple Core Calculation" in the AMD CPUID specification).
232 * Recompute packageid/logprocid/threadid/coreid accordingly.
233 */
234 infos->packageid = infos->apicid / infos->max_nbcores;
235 infos->logprocid = infos->apicid % infos->max_nbcores;
236 infos->threadid = infos->logprocid % infos->max_nbthreads;
237 infos->coreid = infos->logprocid / infos->max_nbthreads;
238 hwloc_debug("this is thread %u of core %u\n", infos->threadid, infos->coreid);
239 }
240
241 infos->numcaches = 0;
242 infos->cache = NULL;
243
244 /* Get apicid, nodeid, unitid from cpuid 0x8000001e
245 * and cache information from cpuid 0x8000001d
246 * (AMD topology extension)
247 */
248 if (cpuid_type != intel && has_topoext(features)) {
249 unsigned apic_id, node_id, nodes_per_proc;
250
251 eax = 0x8000001e;
252 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
253 infos->apicid = apic_id = eax;
254
255 if (infos->cpufamilynumber == 0x16) {
256 /* ecx is reserved */
257 node_id = 0;
258 nodes_per_proc = 1;
259 } else {
260 node_id = ecx & 0xff;
261 nodes_per_proc = ((ecx >> 8) & 7) + 1;
262 }
263 infos->nodeid = node_id;
264 if ((infos->cpufamilynumber == 0x15 && nodes_per_proc > 2)
265 || (infos->cpufamilynumber == 0x17 && nodes_per_proc > 4)) {
266 hwloc_debug("warning: undefined nodes_per_proc value %d, assuming it means %d\n", nodes_per_proc, nodes_per_proc);
267 }
268
269 if (infos->cpufamilynumber <= 0x16) { /* topoext appeared in 0x15 and compute-units were only used in 0x15 and 0x16 */
270 unsigned unit_id, cores_per_unit;
271 infos->unitid = unit_id = ebx & 0xff;
272 cores_per_unit = ((ebx >> 8) & 0xff) + 1;
273 hwloc_debug("topoext %08x, %d nodes, node %d, %d cores in unit %d\n", apic_id, nodes_per_proc, node_id, cores_per_unit, unit_id);
274 } else {
275 unsigned core_id, threads_per_core;
276 infos->coreid = core_id = ebx & 0xff;
277 threads_per_core = ((ebx >> 8) & 0xff) + 1;
278 hwloc_debug("topoext %08x, %d nodes, node %d, %d threads in core %d\n", apic_id, nodes_per_proc, node_id, threads_per_core, core_id);
279 }
280
281 for (cachenum = 0; ; cachenum++) {
282 unsigned type;
283 eax = 0x8000001d;
284 ecx = cachenum;
285 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
286 type = eax & 0x1f;
287 if (type == 0)
288 break;
289 infos->numcaches++;
290 }
291
292 cache = infos->cache = malloc(infos->numcaches * sizeof(*infos->cache));
293
294 for (cachenum = 0; ; cachenum++) {
295 unsigned long linesize, linepart, ways, sets;
296 unsigned type;
297 eax = 0x8000001d;
298 ecx = cachenum;
299 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
300
301 type = eax & 0x1f;
302
303 if (type == 0)
304 break;
305
306 cache->type = type;
307 cache->level = (eax >> 5) & 0x7;
308 /* Note: actually number of cores */
309 cache->nbthreads_sharing = ((eax >> 14) & 0xfff) + 1;
310
311 cache->linesize = linesize = (ebx & 0xfff) + 1;
312 cache->linepart = linepart = ((ebx >> 12) & 0x3ff) + 1;
313 ways = ((ebx >> 22) & 0x3ff) + 1;
314
315 if (eax & (1 << 9))
316 /* Fully associative */
317 cache->ways = -1;
318 else
319 cache->ways = ways;
320 cache->sets = sets = ecx + 1;
321 cache->size = linesize * linepart * ways * sets;
322 cache->inclusive = edx & 0x2;
323
324 hwloc_debug("cache %u type %u L%u t%u c%u linesize %lu linepart %lu ways %lu sets %lu, size %uKB\n", cachenum, cache->type, cache->level, cache->nbthreads_sharing, infos->max_nbcores, linesize, linepart, ways, sets, cache->size >> 10);
325
326 cache++;
327 }
328 } else {
329 /* If there's no topoext,
330 * get cache information from cpuid 0x80000005 and 0x80000006
331 * (not supported on Intel)
332 */
333 if (cpuid_type != intel && highest_ext_cpuid >= 0x80000005) {
334 eax = 0x80000005;
335 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
336 fill_amd_cache(infos, 1, 1, ecx); /* L1d */
337 fill_amd_cache(infos, 1, 2, edx); /* L1i */
338 }
339 if (cpuid_type != intel && highest_ext_cpuid >= 0x80000006) {
340 eax = 0x80000006;
341 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
342 if (ecx & 0xf000)
343 /* This is actually supported on Intel but LinePerTag isn't returned in bits 8-11.
344 * Could be useful if some Intels (at least before Core micro-architecture)
345 * support this leaf without leaf 0x4.
346 */
347 fill_amd_cache(infos, 2, 3, ecx); /* L2u */
348 if (edx & 0xf000)
349 fill_amd_cache(infos, 3, 3, edx); /* L3u */
350 }
351 }
352
353 /* Get thread/core + cache information from cpuid 0x04
354 * (not supported on AMD)
355 */
356 if (cpuid_type != amd && highest_cpuid >= 0x04) {
357 unsigned level;
358 for (cachenum = 0; ; cachenum++) {
359 unsigned type;
360 eax = 0x04;
361 ecx = cachenum;
362 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
363
364 type = eax & 0x1f;
365
366 hwloc_debug("cache %u type %u\n", cachenum, type);
367
368 if (type == 0)
369 break;
370 level = (eax >> 5) & 0x7;
371 if (data->is_knl && level == 3)
372 /* KNL reports wrong L3 information (size always 0, cpuset always the entire machine, ignore it */
373 break;
374 infos->numcaches++;
375
376 if (!cachenum) {
377 /* by the way, get thread/core information from the first cache */
378 infos->max_nbcores = ((eax >> 26) & 0x3f) + 1;
379 infos->max_nbthreads = infos->max_log_proc / infos->max_nbcores;
380 hwloc_debug("thus %u threads\n", infos->max_nbthreads);
381 infos->threadid = infos->logprocid % infos->max_nbthreads;
382 infos->coreid = infos->logprocid / infos->max_nbthreads;
383 hwloc_debug("this is thread %u of core %u\n", infos->threadid, infos->coreid);
384 }
385 }
386
387 cache = infos->cache = malloc(infos->numcaches * sizeof(*infos->cache));
388
389 for (cachenum = 0; ; cachenum++) {
390 unsigned long linesize, linepart, ways, sets;
391 unsigned type;
392 eax = 0x04;
393 ecx = cachenum;
394 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
395
396 type = eax & 0x1f;
397
398 if (type == 0)
399 break;
400 level = (eax >> 5) & 0x7;
401 if (data->is_knl && level == 3)
402 /* KNL reports wrong L3 information (size always 0, cpuset always the entire machine, ignore it */
403 break;
404
405 cache->type = type;
406 cache->level = level;
407 cache->nbthreads_sharing = ((eax >> 14) & 0xfff) + 1;
408
409 cache->linesize = linesize = (ebx & 0xfff) + 1;
410 cache->linepart = linepart = ((ebx >> 12) & 0x3ff) + 1;
411 ways = ((ebx >> 22) & 0x3ff) + 1;
412 if (eax & (1 << 9))
413 /* Fully associative */
414 cache->ways = -1;
415 else
416 cache->ways = ways;
417 cache->sets = sets = ecx + 1;
418 cache->size = linesize * linepart * ways * sets;
419 cache->inclusive = edx & 0x2;
420
421 hwloc_debug("cache %u type %u L%u t%u c%u linesize %lu linepart %lu ways %lu sets %lu, size %uKB\n", cachenum, cache->type, cache->level, cache->nbthreads_sharing, infos->max_nbcores, linesize, linepart, ways, sets, cache->size >> 10);
422
423 cache++;
424 }
425 }
426
427 /* Get package/core/thread information from cpuid 0x0b
428 * (Intel x2APIC)
429 */
430 if (cpuid_type == intel && highest_cpuid >= 0x0b && has_x2apic(features)) {
431 unsigned level, apic_nextshift, apic_number, apic_type, apic_id = 0, apic_shift = 0, id;
432 for (level = 0; ; level++) {
433 ecx = level;
434 eax = 0x0b;
435 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
436 if (!eax && !ebx)
437 break;
438 }
439 if (level) {
440 infos->levels = level;
441 infos->otherids = malloc(level * sizeof(*infos->otherids));
442 for (level = 0; ; level++) {
443 ecx = level;
444 eax = 0x0b;
445 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
446 if (!eax && !ebx)
447 break;
448 apic_nextshift = eax & 0x1f;
449 apic_number = ebx & 0xffff;
450 apic_type = (ecx & 0xff00) >> 8;
451 apic_id = edx;
452 id = (apic_id >> apic_shift) & ((1 << (apic_nextshift - apic_shift)) - 1);
453 hwloc_debug("x2APIC %08x %d: nextshift %d num %2d type %d id %2d\n", apic_id, level, apic_nextshift, apic_number, apic_type, id);
454 infos->apicid = apic_id;
455 infos->otherids[level] = UINT_MAX;
456 switch (apic_type) {
457 case 1:
458 infos->threadid = id;
459 break;
460 case 2:
461 infos->coreid = id;
462 break;
463 default:
464 hwloc_debug("x2APIC %d: unknown type %d\n", level, apic_type);
465 infos->otherids[level] = apic_id >> apic_shift;
466 break;
467 }
468 apic_shift = apic_nextshift;
469 }
470 infos->apicid = apic_id;
471 infos->packageid = apic_id >> apic_shift;
472 hwloc_debug("x2APIC remainder: %d\n", infos->packageid);
473 hwloc_debug("this is thread %u of core %u\n", infos->threadid, infos->coreid);
474 }
475 }
476
477 /* Now that we have all info, compute cacheids and apply quirks */
478 for (cachenum = 0; cachenum < infos->numcaches; cachenum++) {
479 cache = &infos->cache[cachenum];
480
481 /* default cacheid value */
482 cache->cacheid = infos->apicid / cache->nbthreads_sharing;
483
484 /* AMD quirk */
485 if (cpuid_type == amd
486 && infos->cpufamilynumber== 0x10 && infos->cpumodelnumber == 0x9
487 && cache->level == 3
488 && (cache->ways == -1 || (cache->ways % 2 == 0)) && cache->nbthreads_sharing >= 8) {
489 /* Fix AMD family 0x10 model 0x9 (Magny-Cours) with 8 or 12 cores.
490 * The L3 (and its associativity) is actually split into two halves).
491 */
492 if (cache->nbthreads_sharing == 16)
493 cache->nbthreads_sharing = 12; /* nbthreads_sharing is a power of 2 but the processor actually has 8 or 12 cores */
494 cache->nbthreads_sharing /= 2;
495 cache->size /= 2;
496 if (cache->ways != -1)
497 cache->ways /= 2;
498 /* AMD Magny-Cours 12-cores processor reserve APIC ids as AAAAAABBBBBB....
499 * among first L3 (A), second L3 (B), and unexisting cores (.).
500 * On multi-socket servers, L3 in non-first sockets may have APIC id ranges
501 * such as [16-21] that are not aligned on multiple of nbthreads_sharing (6).
502 * That means, we can't just compare apicid/nbthreads_sharing to identify siblings.
503 */
504 cache->cacheid = (infos->apicid % infos->max_log_proc) / cache->nbthreads_sharing /* cacheid within the package */
505 + 2 * (infos->apicid / infos->max_log_proc); /* add 2 caches per previous package */
506
507 } else if (cpuid_type == amd
508 && infos->cpufamilynumber == 0x15
509 && (infos->cpumodelnumber == 0x1 /* Bulldozer */ || infos->cpumodelnumber == 0x2 /* Piledriver */)
510 && cache->level == 3 && cache->nbthreads_sharing == 6) {
511 /* AMD Bulldozer and Piledriver 12-core processors have same APIC ids as Magny-Cours above,
512 * but we can't merge the checks because the original nbthreads_sharing must be exactly 6 here.
513 */
514 cache->cacheid = (infos->apicid % infos->max_log_proc) / cache->nbthreads_sharing /* cacheid within the package */
515 + 2 * (infos->apicid / infos->max_log_proc); /* add 2 cache per previous package */
516 }
517 }
518
519 if (hwloc_bitmap_isset(data->apicid_set, infos->apicid))
520 data->apicid_unique = 0;
521 else
522 hwloc_bitmap_set(data->apicid_set, infos->apicid);
523 }
524
525 static void
hwloc_x86_add_cpuinfos(hwloc_obj_t obj,struct procinfo * info,int nodup)526 hwloc_x86_add_cpuinfos(hwloc_obj_t obj, struct procinfo *info, int nodup)
527 {
528 char number[8];
529 hwloc_obj_add_info_nodup(obj, "CPUVendor", info->cpuvendor, nodup);
530 snprintf(number, sizeof(number), "%u", info->cpufamilynumber);
531 hwloc_obj_add_info_nodup(obj, "CPUFamilyNumber", number, nodup);
532 snprintf(number, sizeof(number), "%u", info->cpumodelnumber);
533 hwloc_obj_add_info_nodup(obj, "CPUModelNumber", number, nodup);
534 if (info->cpumodel[0]) {
535 const char *c = info->cpumodel;
536 while (*c == ' ')
537 c++;
538 hwloc_obj_add_info_nodup(obj, "CPUModel", c, nodup);
539 }
540 snprintf(number, sizeof(number), "%u", info->cpustepping);
541 hwloc_obj_add_info_nodup(obj, "CPUStepping", number, nodup);
542 }
543
544 /* Analyse information stored in infos, and build/annotate topology levels accordingly */
summarize(struct hwloc_backend * backend,struct procinfo * infos,int fulldiscovery)545 static int summarize(struct hwloc_backend *backend, struct procinfo *infos, int fulldiscovery)
546 {
547 struct hwloc_topology *topology = backend->topology;
548 struct hwloc_x86_backend_data_s *data = backend->private_data;
549 unsigned nbprocs = data->nbprocs;
550 hwloc_bitmap_t complete_cpuset = hwloc_bitmap_alloc();
551 unsigned i, j, l, level, type;
552 unsigned nbpackages = 0;
553 int one = -1;
554 unsigned next_group_depth = topology->next_group_depth;
555 int caches_added = 0;
556 hwloc_bitmap_t remaining_cpuset;
557
558 for (i = 0; i < nbprocs; i++)
559 if (infos[i].present) {
560 hwloc_bitmap_set(complete_cpuset, i);
561 one = i;
562 }
563
564 if (one == -1) {
565 hwloc_bitmap_free(complete_cpuset);
566 return 0;
567 }
568
569 remaining_cpuset = hwloc_bitmap_alloc();
570
571 /* Ideally, when fulldiscovery=0, we could add any object that doesn't exist yet.
572 * But what if the x86 and the native backends disagree because one is buggy? Which one to trust?
573 * Only annotate existing objects for now.
574 */
575
576 /* Look for packages */
577 if (fulldiscovery) {
578 hwloc_bitmap_t package_cpuset;
579 hwloc_obj_t package;
580
581 hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
582 while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
583 unsigned packageid = infos[i].packageid;
584
585 package_cpuset = hwloc_bitmap_alloc();
586 for (j = i; j < nbprocs; j++) {
587 if (infos[j].packageid == packageid) {
588 hwloc_bitmap_set(package_cpuset, j);
589 hwloc_bitmap_clr(remaining_cpuset, j);
590 }
591 }
592 package = hwloc_alloc_setup_object(HWLOC_OBJ_PACKAGE, packageid);
593 package->cpuset = package_cpuset;
594
595 hwloc_x86_add_cpuinfos(package, &infos[i], 0);
596
597 hwloc_debug_1arg_bitmap("os package %u has cpuset %s\n",
598 packageid, package_cpuset);
599 hwloc_insert_object_by_cpuset(topology, package);
600 nbpackages++;
601 }
602
603 } else {
604 /* Annotate packages previously-existing packages */
605 hwloc_obj_t package = NULL;
606 int same = 1;
607 nbpackages = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PACKAGE);
608 /* check whether all packages have the same info */
609 for(i=1; i<nbprocs; i++) {
610 if (strcmp(infos[i].cpumodel, infos[0].cpumodel)) {
611 same = 0;
612 break;
613 }
614 }
615 /* now iterate over packages and annotate them */
616 while ((package = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_PACKAGE, package)) != NULL) {
617 if (package->os_index == (unsigned) -1) {
618 /* try to fix the package OS index if unknown.
619 * FIXME: ideally, we should check all bits in case x86 and the native backend disagree.
620 */
621 for(i=0; i<nbprocs; i++) {
622 if (hwloc_bitmap_isset(package->cpuset, i)) {
623 package->os_index = infos[i].packageid;
624 break;
625 }
626 }
627 }
628 for(i=0; i<nbprocs; i++) {
629 /* if there's a single package, it's the one we want.
630 * if the index is ok, it's the one we want.
631 * if the index is unknown but all packages have the same id, that's fine
632 */
633 if (nbpackages == 1 || infos[i].packageid == package->os_index || (same && package->os_index == (unsigned) -1)) {
634 hwloc_x86_add_cpuinfos(package, &infos[i], 1);
635 break;
636 }
637 }
638 }
639 }
640 /* If there was no package, annotate the Machine instead */
641 if ((!nbpackages) && infos[0].cpumodel[0]) {
642 hwloc_x86_add_cpuinfos(hwloc_get_root_obj(topology), &infos[0], 1);
643 }
644
645 /* Look for Numa nodes inside packages */
646 if (fulldiscovery) {
647 hwloc_bitmap_t node_cpuset;
648 hwloc_obj_t node;
649
650 hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
651 while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
652 unsigned packageid = infos[i].packageid;
653 unsigned nodeid = infos[i].nodeid;
654
655 if (nodeid == (unsigned)-1) {
656 hwloc_bitmap_clr(remaining_cpuset, i);
657 continue;
658 }
659
660 node_cpuset = hwloc_bitmap_alloc();
661 for (j = i; j < nbprocs; j++) {
662 if (infos[j].nodeid == (unsigned) -1) {
663 hwloc_bitmap_clr(remaining_cpuset, j);
664 continue;
665 }
666
667 if (infos[j].packageid == packageid && infos[j].nodeid == nodeid) {
668 hwloc_bitmap_set(node_cpuset, j);
669 hwloc_bitmap_clr(remaining_cpuset, j);
670 }
671 }
672 node = hwloc_alloc_setup_object(HWLOC_OBJ_NUMANODE, nodeid);
673 node->cpuset = node_cpuset;
674 node->nodeset = hwloc_bitmap_alloc();
675 hwloc_bitmap_set(node->nodeset, nodeid);
676 hwloc_debug_1arg_bitmap("os node %u has cpuset %s\n",
677 nodeid, node_cpuset);
678 hwloc_insert_object_by_cpuset(topology, node);
679 }
680 }
681
682 /* Look for Compute units inside packages */
683 if (fulldiscovery) {
684 hwloc_bitmap_t unit_cpuset;
685 hwloc_obj_t unit;
686
687 hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
688 while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
689 unsigned packageid = infos[i].packageid;
690 unsigned unitid = infos[i].unitid;
691
692 if (unitid == (unsigned)-1) {
693 hwloc_bitmap_clr(remaining_cpuset, i);
694 continue;
695 }
696
697 unit_cpuset = hwloc_bitmap_alloc();
698 for (j = i; j < nbprocs; j++) {
699 if (infos[j].unitid == (unsigned) -1) {
700 hwloc_bitmap_clr(remaining_cpuset, j);
701 continue;
702 }
703
704 if (infos[j].packageid == packageid && infos[j].unitid == unitid) {
705 hwloc_bitmap_set(unit_cpuset, j);
706 hwloc_bitmap_clr(remaining_cpuset, j);
707 }
708 }
709 unit = hwloc_alloc_setup_object(HWLOC_OBJ_GROUP, unitid);
710 unit->cpuset = unit_cpuset;
711 hwloc_obj_add_info(unit, "Type", "ComputeUnit");
712 hwloc_debug_1arg_bitmap("os unit %u has cpuset %s\n",
713 unitid, unit_cpuset);
714 hwloc_insert_object_by_cpuset(topology, unit);
715 }
716 }
717
718 /* Look for unknown objects */
719 if (infos[one].otherids) {
720 for (level = infos[one].levels-1; level <= infos[one].levels-1; level--) {
721 if (infos[one].otherids[level] != UINT_MAX) {
722 hwloc_bitmap_t unknown_cpuset;
723 hwloc_obj_t unknown_obj;
724
725 hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
726 while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
727 unsigned unknownid = infos[i].otherids[level];
728
729 unknown_cpuset = hwloc_bitmap_alloc();
730 for (j = i; j < nbprocs; j++) {
731 if (infos[j].otherids[level] == unknownid) {
732 hwloc_bitmap_set(unknown_cpuset, j);
733 hwloc_bitmap_clr(remaining_cpuset, j);
734 }
735 }
736 unknown_obj = hwloc_alloc_setup_object(HWLOC_OBJ_GROUP, unknownid);
737 unknown_obj->cpuset = unknown_cpuset;
738 unknown_obj->os_level = level;
739 unknown_obj->attr->group.depth = topology->next_group_depth + level;
740 if (next_group_depth <= topology->next_group_depth + level)
741 next_group_depth = topology->next_group_depth + level + 1;
742 hwloc_debug_2args_bitmap("os unknown%d %u has cpuset %s\n",
743 level, unknownid, unknown_cpuset);
744 hwloc_insert_object_by_cpuset(topology, unknown_obj);
745 }
746 }
747 }
748 }
749
750 /* Look for cores */
751 if (fulldiscovery) {
752 hwloc_bitmap_t core_cpuset;
753 hwloc_obj_t core;
754
755 hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
756 while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
757 unsigned packageid = infos[i].packageid;
758 unsigned nodeid = infos[i].nodeid;
759 unsigned coreid = infos[i].coreid;
760
761 if (coreid == (unsigned) -1) {
762 hwloc_bitmap_clr(remaining_cpuset, i);
763 continue;
764 }
765
766 core_cpuset = hwloc_bitmap_alloc();
767 for (j = i; j < nbprocs; j++) {
768 if (infos[j].coreid == (unsigned) -1) {
769 hwloc_bitmap_clr(remaining_cpuset, j);
770 continue;
771 }
772
773 if (infos[j].packageid == packageid && infos[j].nodeid == nodeid && infos[j].coreid == coreid) {
774 hwloc_bitmap_set(core_cpuset, j);
775 hwloc_bitmap_clr(remaining_cpuset, j);
776 }
777 }
778 core = hwloc_alloc_setup_object(HWLOC_OBJ_CORE, coreid);
779 core->cpuset = core_cpuset;
780 hwloc_debug_1arg_bitmap("os core %u has cpuset %s\n",
781 coreid, core_cpuset);
782 hwloc_insert_object_by_cpuset(topology, core);
783 }
784 }
785
786 /* Look for PUs */
787 if (fulldiscovery) {
788 hwloc_debug("%s", "\n\n * CPU cpusets *\n\n");
789 for (i=0; i<nbprocs; i++)
790 if(infos[i].present) { /* Only add present PU. We don't know if others actually exist */
791 struct hwloc_obj *obj = hwloc_alloc_setup_object(HWLOC_OBJ_PU, i);
792 obj->cpuset = hwloc_bitmap_alloc();
793 hwloc_bitmap_only(obj->cpuset, i);
794 hwloc_debug_1arg_bitmap("PU %u has cpuset %s\n", i, obj->cpuset);
795 hwloc_insert_object_by_cpuset(topology, obj);
796 }
797 }
798
799 /* Look for caches */
800 /* First find max level */
801 level = 0;
802 for (i = 0; i < nbprocs; i++)
803 for (j = 0; j < infos[i].numcaches; j++)
804 if (infos[i].cache[j].level > level)
805 level = infos[i].cache[j].level;
806 while (level > 0) {
807 for (type = 1; type <= 3; type++) {
808 /* Look for caches of that type at level level */
809 {
810 hwloc_obj_t cache;
811
812 hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
813 while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
814 hwloc_bitmap_t puset;
815 int depth;
816
817 for (l = 0; l < infos[i].numcaches; l++) {
818 if (infos[i].cache[l].level == level && infos[i].cache[l].type == type)
819 break;
820 }
821 if (l == infos[i].numcaches) {
822 /* no cache Llevel of that type in i */
823 hwloc_bitmap_clr(remaining_cpuset, i);
824 continue;
825 }
826
827 puset = hwloc_bitmap_alloc();
828 hwloc_bitmap_set(puset, i);
829 depth = hwloc_get_cache_type_depth(topology, level,
830 type == 1 ? HWLOC_OBJ_CACHE_DATA : type == 2 ? HWLOC_OBJ_CACHE_INSTRUCTION : HWLOC_OBJ_CACHE_UNIFIED);
831 if (depth != HWLOC_TYPE_DEPTH_UNKNOWN)
832 cache = hwloc_get_next_obj_covering_cpuset_by_depth(topology, puset, depth, NULL);
833 else
834 cache = NULL;
835 hwloc_bitmap_free(puset);
836
837 if (cache) {
838 /* Found cache above that PU, annotate if no such attribute yet */
839 if (!hwloc_obj_get_info_by_name(cache, "Inclusive"))
840 hwloc_obj_add_info(cache, "Inclusive", infos[i].cache[l].inclusive ? "1" : "0");
841 hwloc_bitmap_andnot(remaining_cpuset, remaining_cpuset, cache->cpuset);
842 } else {
843 /* Add the missing cache */
844 hwloc_bitmap_t cache_cpuset;
845 unsigned packageid = infos[i].packageid;
846 unsigned cacheid = infos[i].cache[l].cacheid;
847 /* Now look for others sharing it */
848 cache_cpuset = hwloc_bitmap_alloc();
849 for (j = i; j < nbprocs; j++) {
850 unsigned l2;
851 for (l2 = 0; l2 < infos[j].numcaches; l2++) {
852 if (infos[j].cache[l2].level == level && infos[j].cache[l2].type == type)
853 break;
854 }
855 if (l2 == infos[j].numcaches) {
856 /* no cache Llevel of that type in j */
857 hwloc_bitmap_clr(remaining_cpuset, j);
858 continue;
859 }
860 if (infos[j].packageid == packageid && infos[j].cache[l2].cacheid == cacheid) {
861 hwloc_bitmap_set(cache_cpuset, j);
862 hwloc_bitmap_clr(remaining_cpuset, j);
863 }
864 }
865 cache = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, cacheid);
866 cache->attr->cache.depth = level;
867 cache->attr->cache.size = infos[i].cache[l].size;
868 cache->attr->cache.linesize = infos[i].cache[l].linesize;
869 cache->attr->cache.associativity = infos[i].cache[l].ways;
870 switch (infos[i].cache[l].type) {
871 case 1:
872 cache->attr->cache.type = HWLOC_OBJ_CACHE_DATA;
873 break;
874 case 2:
875 cache->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION;
876 break;
877 case 3:
878 cache->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED;
879 break;
880 }
881 cache->cpuset = cache_cpuset;
882 hwloc_obj_add_info(cache, "Inclusive", infos[i].cache[l].inclusive ? "1" : "0");
883 hwloc_debug_2args_bitmap("os L%u cache %u has cpuset %s\n",
884 level, cacheid, cache_cpuset);
885 hwloc_insert_object_by_cpuset(topology, cache);
886 caches_added++;
887 }
888 }
889 }
890 }
891 level--;
892 }
893
894 hwloc_bitmap_free(remaining_cpuset);
895 hwloc_bitmap_free(complete_cpuset);
896 topology->next_group_depth = next_group_depth;
897
898 return fulldiscovery || caches_added;
899 }
900
901 static int
look_procs(struct hwloc_backend * backend,struct procinfo * infos,int fulldiscovery,unsigned highest_cpuid,unsigned highest_ext_cpuid,unsigned * features,enum cpuid_type cpuid_type,int (* get_cpubind)(hwloc_topology_t topology,hwloc_cpuset_t set,int flags),int (* set_cpubind)(hwloc_topology_t topology,hwloc_const_cpuset_t set,int flags))902 look_procs(struct hwloc_backend *backend, struct procinfo *infos, int fulldiscovery,
903 unsigned highest_cpuid, unsigned highest_ext_cpuid, unsigned *features, enum cpuid_type cpuid_type,
904 int (*get_cpubind)(hwloc_topology_t topology, hwloc_cpuset_t set, int flags),
905 int (*set_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags))
906 {
907 struct hwloc_x86_backend_data_s *data = backend->private_data;
908 struct hwloc_topology *topology = backend->topology;
909 unsigned nbprocs = data->nbprocs;
910 hwloc_bitmap_t orig_cpuset = hwloc_bitmap_alloc();
911 hwloc_bitmap_t set;
912 unsigned i;
913 int ret = 0;
914
915 if (get_cpubind(topology, orig_cpuset, HWLOC_CPUBIND_STRICT)) {
916 hwloc_bitmap_free(orig_cpuset);
917 return -1;
918 }
919
920 set = hwloc_bitmap_alloc();
921
922 for (i = 0; i < nbprocs; i++) {
923 hwloc_bitmap_only(set, i);
924 hwloc_debug("binding to CPU%d\n", i);
925 if (set_cpubind(topology, set, HWLOC_CPUBIND_STRICT)) {
926 hwloc_debug("could not bind to CPU%d: %s\n", i, strerror(errno));
927 continue;
928 }
929 look_proc(backend, &infos[i], highest_cpuid, highest_ext_cpuid, features, cpuid_type);
930 }
931
932 set_cpubind(topology, orig_cpuset, 0);
933 hwloc_bitmap_free(set);
934 hwloc_bitmap_free(orig_cpuset);
935
936 if (!data->apicid_unique)
937 fulldiscovery = 0;
938 else
939 ret = summarize(backend, infos, fulldiscovery);
940 return ret;
941 }
942
943 #if defined HWLOC_FREEBSD_SYS && defined HAVE_CPUSET_SETID
944 #include <sys/param.h>
945 #include <sys/cpuset.h>
946 typedef cpusetid_t hwloc_x86_os_state_t;
hwloc_x86_os_state_save(hwloc_x86_os_state_t * state)947 static void hwloc_x86_os_state_save(hwloc_x86_os_state_t *state)
948 {
949 /* temporary make all cpus available during discovery */
950 cpuset_getid(CPU_LEVEL_CPUSET, CPU_WHICH_PID, -1, state);
951 cpuset_setid(CPU_WHICH_PID, -1, 0);
952 }
hwloc_x86_os_state_restore(hwloc_x86_os_state_t * state)953 static void hwloc_x86_os_state_restore(hwloc_x86_os_state_t *state)
954 {
955 /* restore initial cpuset */
956 cpuset_setid(CPU_WHICH_PID, -1, *state);
957 }
958 #else /* !defined HWLOC_FREEBSD_SYS || !defined HAVE_CPUSET_SETID */
959 typedef void * hwloc_x86_os_state_t;
hwloc_x86_os_state_save(hwloc_x86_os_state_t * state __hwloc_attribute_unused)960 static void hwloc_x86_os_state_save(hwloc_x86_os_state_t *state __hwloc_attribute_unused) { }
hwloc_x86_os_state_restore(hwloc_x86_os_state_t * state __hwloc_attribute_unused)961 static void hwloc_x86_os_state_restore(hwloc_x86_os_state_t *state __hwloc_attribute_unused) { }
962 #endif /* !defined HWLOC_FREEBSD_SYS || !defined HAVE_CPUSET_SETID */
963
964
965 #define INTEL_EBX ('G' | ('e'<<8) | ('n'<<16) | ('u'<<24))
966 #define INTEL_EDX ('i' | ('n'<<8) | ('e'<<16) | ('I'<<24))
967 #define INTEL_ECX ('n' | ('t'<<8) | ('e'<<16) | ('l'<<24))
968
969 #define AMD_EBX ('A' | ('u'<<8) | ('t'<<16) | ('h'<<24))
970 #define AMD_EDX ('e' | ('n'<<8) | ('t'<<16) | ('i'<<24))
971 #define AMD_ECX ('c' | ('A'<<8) | ('M'<<16) | ('D'<<24))
972
973 /* fake cpubind for when nbprocs=1 and no binding support */
fake_get_cpubind(hwloc_topology_t topology __hwloc_attribute_unused,hwloc_cpuset_t set __hwloc_attribute_unused,int flags __hwloc_attribute_unused)974 static int fake_get_cpubind(hwloc_topology_t topology __hwloc_attribute_unused,
975 hwloc_cpuset_t set __hwloc_attribute_unused,
976 int flags __hwloc_attribute_unused)
977 {
978 return 0;
979 }
fake_set_cpubind(hwloc_topology_t topology __hwloc_attribute_unused,hwloc_const_cpuset_t set __hwloc_attribute_unused,int flags __hwloc_attribute_unused)980 static int fake_set_cpubind(hwloc_topology_t topology __hwloc_attribute_unused,
981 hwloc_const_cpuset_t set __hwloc_attribute_unused,
982 int flags __hwloc_attribute_unused)
983 {
984 return 0;
985 }
986
987 static
hwloc_look_x86(struct hwloc_backend * backend,int fulldiscovery)988 int hwloc_look_x86(struct hwloc_backend *backend, int fulldiscovery)
989 {
990 struct hwloc_x86_backend_data_s *data = backend->private_data;
991 unsigned nbprocs = data->nbprocs;
992 unsigned eax, ebx, ecx = 0, edx;
993 unsigned i;
994 unsigned highest_cpuid;
995 unsigned highest_ext_cpuid;
996 /* This stores cpuid features with the same indexing as Linux */
997 unsigned features[10] = { 0 };
998 struct procinfo *infos = NULL;
999 enum cpuid_type cpuid_type = unknown;
1000 hwloc_x86_os_state_t os_state;
1001 struct hwloc_binding_hooks hooks;
1002 struct hwloc_topology_support support;
1003 struct hwloc_topology_membind_support memsupport __hwloc_attribute_unused;
1004 int (*get_cpubind)(hwloc_topology_t topology, hwloc_cpuset_t set, int flags);
1005 int (*set_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags);
1006 int ret = -1;
1007
1008 /* check if binding works */
1009 memset(&hooks, 0, sizeof(hooks));
1010 support.membind = &memsupport;
1011 hwloc_set_native_binding_hooks(&hooks, &support);
1012 if (hooks.get_thisthread_cpubind && hooks.set_thisthread_cpubind) {
1013 get_cpubind = hooks.get_thisthread_cpubind;
1014 set_cpubind = hooks.set_thisthread_cpubind;
1015 } else if (hooks.get_thisproc_cpubind && hooks.set_thisproc_cpubind) {
1016 get_cpubind = hooks.get_thisproc_cpubind;
1017 set_cpubind = hooks.set_thisproc_cpubind;
1018 } else {
1019 /* we need binding support if there are multiple PUs */
1020 if (nbprocs > 1)
1021 goto out;
1022 get_cpubind = fake_get_cpubind;
1023 set_cpubind = fake_set_cpubind;
1024 }
1025
1026 if (!hwloc_have_x86_cpuid())
1027 goto out;
1028
1029 infos = calloc(nbprocs, sizeof(struct procinfo));
1030 if (NULL == infos)
1031 goto out;
1032 for (i = 0; i < nbprocs; i++) {
1033 infos[i].nodeid = (unsigned) -1;
1034 infos[i].packageid = (unsigned) -1;
1035 infos[i].unitid = (unsigned) -1;
1036 infos[i].coreid = (unsigned) -1;
1037 infos[i].threadid = (unsigned) -1;
1038 }
1039
1040 eax = 0x00;
1041 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
1042 highest_cpuid = eax;
1043 if (ebx == INTEL_EBX && ecx == INTEL_ECX && edx == INTEL_EDX)
1044 cpuid_type = intel;
1045 if (ebx == AMD_EBX && ecx == AMD_ECX && edx == AMD_EDX)
1046 cpuid_type = amd;
1047
1048 hwloc_debug("highest cpuid %x, cpuid type %u\n", highest_cpuid, cpuid_type);
1049 if (highest_cpuid < 0x01) {
1050 goto out_with_infos;
1051 }
1052
1053 eax = 0x01;
1054 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
1055 features[0] = edx;
1056 features[4] = ecx;
1057
1058 eax = 0x80000000;
1059 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
1060 highest_ext_cpuid = eax;
1061
1062 hwloc_debug("highest extended cpuid %x\n", highest_ext_cpuid);
1063
1064 if (highest_cpuid >= 0x7) {
1065 eax = 0x7;
1066 ecx = 0;
1067 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
1068 features[9] = ebx;
1069 }
1070
1071 if (cpuid_type != intel && highest_ext_cpuid >= 0x80000001) {
1072 eax = 0x80000001;
1073 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
1074 features[1] = edx;
1075 features[6] = ecx;
1076 }
1077
1078 hwloc_x86_os_state_save(&os_state);
1079
1080 ret = look_procs(backend, infos, fulldiscovery,
1081 highest_cpuid, highest_ext_cpuid, features, cpuid_type,
1082 get_cpubind, set_cpubind);
1083 if (ret >= 0)
1084 /* success, we're done */
1085 goto out_with_os_state;
1086
1087 if (nbprocs == 1) {
1088 /* only one processor, no need to bind */
1089 look_proc(backend, &infos[0], highest_cpuid, highest_ext_cpuid, features, cpuid_type);
1090 ret = summarize(backend, infos, fulldiscovery);
1091 }
1092
1093 out_with_os_state:
1094 hwloc_x86_os_state_restore(&os_state);
1095
1096 out_with_infos:
1097 if (NULL != infos) {
1098 for (i = 0; i < nbprocs; i++) {
1099 free(infos[i].cache);
1100 if (infos[i].otherids)
1101 free(infos[i].otherids);
1102 }
1103 free(infos);
1104 }
1105
1106 out:
1107 return ret;
1108 }
1109
1110 static int
hwloc_x86_discover(struct hwloc_backend * backend)1111 hwloc_x86_discover(struct hwloc_backend *backend)
1112 {
1113 struct hwloc_x86_backend_data_s *data = backend->private_data;
1114 struct hwloc_topology *topology = backend->topology;
1115 int alreadypus = 0;
1116 int ret;
1117
1118 #if HAVE_DECL_RUNNING_ON_VALGRIND
1119 if (RUNNING_ON_VALGRIND) {
1120 fprintf(stderr, "hwloc x86 backend cannot work under Valgrind, disabling.\n");
1121 return 0;
1122 }
1123 #endif
1124
1125 data->nbprocs = hwloc_fallback_nbprocessors(topology);
1126
1127 if (!topology->is_thissystem) {
1128 hwloc_debug("%s", "\nno x86 detection (not thissystem)\n");
1129 return 0;
1130 }
1131
1132 if (topology->levels[0][0]->cpuset) {
1133 /* somebody else discovered things */
1134 if (topology->nb_levels == 2 && topology->level_nbobjects[1] == data->nbprocs) {
1135 /* only PUs were discovered, as much as we would, complete the topology with everything else */
1136 alreadypus = 1;
1137 goto fulldiscovery;
1138 }
1139
1140 /* several object types were added, we can't easily complete, just do partial discovery */
1141 ret = hwloc_look_x86(backend, 0);
1142 if (ret)
1143 hwloc_obj_add_info(topology->levels[0][0], "Backend", "x86");
1144 return ret;
1145 } else {
1146 /* topology is empty, initialize it */
1147 hwloc_alloc_obj_cpusets(topology->levels[0][0]);
1148 }
1149
1150 fulldiscovery:
1151 if (hwloc_look_x86(backend, 1) < 0) {
1152 /* if failed, create PUs */
1153 if (!alreadypus)
1154 hwloc_setup_pu_level(topology, data->nbprocs);
1155 }
1156
1157 hwloc_obj_add_info(topology->levels[0][0], "Backend", "x86");
1158
1159 #ifdef HAVE_UNAME
1160 hwloc_add_uname_info(topology, NULL); /* we already know is_thissystem() is true */
1161 #else
1162 /* uname isn't available, manually setup the "Architecture" info */
1163 #ifdef HWLOC_X86_64_ARCH
1164 hwloc_obj_add_info(topology->levels[0][0], "Architecture", "x86_64");
1165 #else
1166 hwloc_obj_add_info(topology->levels[0][0], "Architecture", "x86");
1167 #endif
1168 #endif
1169 return 1;
1170 }
1171
1172 static void
hwloc_x86_backend_disable(struct hwloc_backend * backend)1173 hwloc_x86_backend_disable(struct hwloc_backend *backend)
1174 {
1175 struct hwloc_x86_backend_data_s *data = backend->private_data;
1176 hwloc_bitmap_free(data->apicid_set);
1177 free(data);
1178 }
1179
1180 static struct hwloc_backend *
hwloc_x86_component_instantiate(struct hwloc_disc_component * component,const void * _data1 __hwloc_attribute_unused,const void * _data2 __hwloc_attribute_unused,const void * _data3 __hwloc_attribute_unused)1181 hwloc_x86_component_instantiate(struct hwloc_disc_component *component,
1182 const void *_data1 __hwloc_attribute_unused,
1183 const void *_data2 __hwloc_attribute_unused,
1184 const void *_data3 __hwloc_attribute_unused)
1185 {
1186 struct hwloc_backend *backend;
1187 struct hwloc_x86_backend_data_s *data;
1188
1189 backend = hwloc_backend_alloc(component);
1190 if (!backend)
1191 goto out;
1192
1193 data = malloc(sizeof(*data));
1194 if (!data) {
1195 errno = ENOMEM;
1196 goto out_with_backend;
1197 }
1198
1199 backend->private_data = data;
1200 backend->flags = HWLOC_BACKEND_FLAG_NEED_LEVELS;
1201 backend->discover = hwloc_x86_discover;
1202 backend->disable = hwloc_x86_backend_disable;
1203
1204 /* default values */
1205 data->is_knl = 0;
1206 data->apicid_set = hwloc_bitmap_alloc();
1207 data->apicid_unique = 1;
1208
1209 return backend;
1210
1211 out_with_backend:
1212 free(backend);
1213 out:
1214 return NULL;
1215 }
1216
1217 static struct hwloc_disc_component hwloc_x86_disc_component = {
1218 HWLOC_DISC_COMPONENT_TYPE_CPU,
1219 "x86",
1220 HWLOC_DISC_COMPONENT_TYPE_GLOBAL,
1221 hwloc_x86_component_instantiate,
1222 45, /* between native and no_os */
1223 NULL
1224 };
1225
1226 const struct hwloc_component hwloc_x86_component = {
1227 HWLOC_COMPONENT_ABI,
1228 NULL, NULL,
1229 HWLOC_COMPONENT_TYPE_DISC,
1230 0,
1231 &hwloc_x86_disc_component
1232 };
1233