1 /*
2  * Copyright © 2010-2017 Inria.  All rights reserved.
3  * Copyright © 2010-2013 Université Bordeaux
4  * Copyright © 2010-2011 Cisco Systems, Inc.  All rights reserved.
5  * See COPYING in top-level directory.
6  *
7  *
8  * This backend is only used when the operating system does not export
9  * the necessary hardware topology information to user-space applications.
10  * Currently, only the FreeBSD backend relies on this x86 backend.
11  *
12  * Other backends such as Linux have their own way to retrieve various
13  * pieces of hardware topology information from the operating system
14  * on various architectures, without having to use this x86-specific code.
15  */
16 
17 #include <private/autogen/config.h>
18 #include <hwloc.h>
19 #include <private/private.h>
20 #include <private/debug.h>
21 #include <private/misc.h>
22 
23 #include <private/cpuid-x86.h>
24 
25 #ifdef HAVE_VALGRIND_VALGRIND_H
26 #include <valgrind/valgrind.h>
27 #endif
28 
29 struct hwloc_x86_backend_data_s {
30   unsigned nbprocs;
31   hwloc_bitmap_t apicid_set;
32   int apicid_unique;
33   int is_knl;
34 };
35 
36 #define has_topoext(features) ((features)[6] & (1 << 22))
37 #define has_x2apic(features) ((features)[4] & (1 << 21))
38 
39 struct cacheinfo {
40   unsigned type;
41   unsigned level;
42   unsigned nbthreads_sharing;
43   unsigned cacheid;
44 
45   unsigned linesize;
46   unsigned linepart;
47   int inclusive;
48   int ways;
49   unsigned sets;
50   unsigned long size;
51 };
52 
53 struct procinfo {
54   unsigned present;
55   unsigned apicid;
56   unsigned max_log_proc;
57   unsigned max_nbcores;
58   unsigned max_nbthreads;
59   unsigned packageid;
60   unsigned nodeid;
61   unsigned unitid;
62   unsigned logprocid;
63   unsigned threadid;
64   unsigned coreid;
65   unsigned *otherids;
66   unsigned levels;
67   unsigned numcaches;
68   struct cacheinfo *cache;
69   char cpuvendor[13];
70   char cpumodel[3*4*4+1];
71   unsigned cpustepping;
72   unsigned cpumodelnumber;
73   unsigned cpufamilynumber;
74 };
75 
76 enum cpuid_type {
77   intel,
78   amd,
79   unknown
80 };
81 
fill_amd_cache(struct procinfo * infos,unsigned level,int type,unsigned cpuid)82 static void fill_amd_cache(struct procinfo *infos, unsigned level, int type, unsigned cpuid)
83 {
84   struct cacheinfo *cache, *tmpcaches;
85   unsigned cachenum;
86   unsigned long size = 0;
87 
88   if (level == 1)
89     size = ((cpuid >> 24)) << 10;
90   else if (level == 2)
91     size = ((cpuid >> 16)) << 10;
92   else if (level == 3)
93     size = ((cpuid >> 18)) << 19;
94   if (!size)
95     return;
96 
97   tmpcaches = realloc(infos->cache, (infos->numcaches+1)*sizeof(*infos->cache));
98   if (!tmpcaches)
99     /* failed to allocated, ignore that cache */
100     return;
101   infos->cache = tmpcaches;
102   cachenum = infos->numcaches++;
103 
104   cache = &infos->cache[cachenum];
105 
106   cache->type = type;
107   cache->level = level;
108   if (level <= 2)
109     cache->nbthreads_sharing = 1;
110   else
111     cache->nbthreads_sharing = infos->max_log_proc;
112   cache->linesize = cpuid & 0xff;
113   cache->linepart = 0;
114   cache->inclusive = 0; /* old AMD (K8-K10) supposed to have exclusive caches */
115 
116   if (level == 1) {
117     cache->ways = (cpuid >> 16) & 0xff;
118     if (cache->ways == 0xff)
119       /* Fully associative */
120       cache->ways = -1;
121   } else {
122     static const unsigned ways_tab[] = { 0, 1, 2, 0, 4, 0, 8, 0, 16, 0, 32, 48, 64, 96, 128, -1 };
123     unsigned ways = (cpuid >> 12) & 0xf;
124     cache->ways = ways_tab[ways];
125   }
126   cache->size = size;
127   cache->sets = 0;
128 
129   hwloc_debug("cache L%u t%u linesize %u ways %u size %luKB\n", cache->level, cache->nbthreads_sharing, cache->linesize, cache->ways, cache->size >> 10);
130 }
131 
132 /* Fetch information from the processor itself thanks to cpuid and store it in
133  * infos for summarize to analyze them globally */
look_proc(struct hwloc_backend * backend,struct procinfo * infos,unsigned highest_cpuid,unsigned highest_ext_cpuid,unsigned * features,enum cpuid_type cpuid_type)134 static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, unsigned highest_cpuid, unsigned highest_ext_cpuid, unsigned *features, enum cpuid_type cpuid_type)
135 {
136   struct hwloc_x86_backend_data_s *data = backend->private_data;
137   unsigned eax, ebx, ecx = 0, edx;
138   unsigned cachenum;
139   struct cacheinfo *cache;
140   unsigned regs[4];
141   unsigned _model, _extendedmodel, _family, _extendedfamily;
142 
143   infos->present = 1;
144 
145   /* on return from this function, the following fields must be set in infos:
146    * packageid, nodeid, unitid, coreid, threadid, or -1
147    * apicid
148    * levels and levels slots in otherids[]
149    * numcaches and numcaches slots in caches[]
150    *
151    * max_log_proc, max_nbthreads, max_nbcores, logprocid
152    * are only used temporarily inside this function and its callees.
153    */
154 
155   /* Get apicid, max_log_proc, packageid, logprocid from cpuid 0x01 */
156   eax = 0x01;
157   hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
158   infos->apicid = ebx >> 24;
159   if (edx & (1 << 28))
160     infos->max_log_proc = 1 << hwloc_flsl(((ebx >> 16) & 0xff) - 1);
161   else
162     infos->max_log_proc = 1;
163   hwloc_debug("APIC ID 0x%02x max_log_proc %u\n", infos->apicid, infos->max_log_proc);
164   infos->packageid = infos->apicid / infos->max_log_proc;
165   infos->logprocid = infos->apicid % infos->max_log_proc;
166   hwloc_debug("phys %u thread %u\n", infos->packageid, infos->logprocid);
167 
168   /* Get cpu model/family/stepping numbers from same cpuid */
169   _model          = (eax>>4) & 0xf;
170   _extendedmodel  = (eax>>16) & 0xf;
171   _family         = (eax>>8) & 0xf;
172   _extendedfamily = (eax>>20) & 0xff;
173   if ((cpuid_type == intel || cpuid_type == amd) && _family == 0xf) {
174     infos->cpufamilynumber = _family + _extendedfamily;
175   } else {
176     infos->cpufamilynumber = _family;
177   }
178   if ((cpuid_type == intel && (_family == 0x6 || _family == 0xf))
179       || (cpuid_type == amd && _family == 0xf)) {
180     infos->cpumodelnumber = _model + (_extendedmodel << 4);
181   } else {
182     infos->cpumodelnumber = _model;
183   }
184   infos->cpustepping = eax & 0xf;
185 
186   if (cpuid_type == intel && infos->cpufamilynumber == 0x6 &&
187       (infos->cpumodelnumber == 0x57 || infos->cpumodelnumber == 0x85))
188     data->is_knl = 1; /* KNM is the same as KNL */
189 
190   /* Get cpu vendor string from cpuid 0x00 */
191   memset(regs, 0, sizeof(regs));
192   regs[0] = 0;
193   hwloc_x86_cpuid(&regs[0], &regs[1], &regs[3], &regs[2]);
194   memcpy(infos->cpuvendor, regs+1, 4*3);
195   /* infos was calloc'ed, already ends with \0 */
196 
197   /* Get cpu model string from cpuid 0x80000002-4 */
198   if (highest_ext_cpuid >= 0x80000004) {
199     memset(regs, 0, sizeof(regs));
200     regs[0] = 0x80000002;
201     hwloc_x86_cpuid(&regs[0], &regs[1], &regs[2], &regs[3]);
202     memcpy(infos->cpumodel, regs, 4*4);
203     regs[0] = 0x80000003;
204     hwloc_x86_cpuid(&regs[0], &regs[1], &regs[2], &regs[3]);
205     memcpy(infos->cpumodel + 4*4, regs, 4*4);
206     regs[0] = 0x80000004;
207     hwloc_x86_cpuid(&regs[0], &regs[1], &regs[2], &regs[3]);
208     memcpy(infos->cpumodel + 4*4*2, regs, 4*4);
209     /* infos was calloc'ed, already ends with \0 */
210   }
211 
212   /* Get core/thread information from cpuid 0x80000008
213    * (not supported on Intel)
214    */
215   if (cpuid_type != intel && highest_ext_cpuid >= 0x80000008) {
216     unsigned coreidsize;
217     eax = 0x80000008;
218     hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
219     coreidsize = (ecx >> 12) & 0xf;
220     hwloc_debug("core ID size: %u\n", coreidsize);
221     if (!coreidsize) {
222       infos->max_nbcores = (ecx & 0xff) + 1;
223     } else
224       infos->max_nbcores = 1 << coreidsize;
225     hwloc_debug("Thus max # of cores: %u\n", infos->max_nbcores);
226     /* Still no multithreaded AMD */
227     infos->max_nbthreads = 1 ;
228     hwloc_debug("and max # of threads: %u\n", infos->max_nbthreads);
229     /* The legacy max_log_proc is deprecated, it can be smaller than max_nbcores,
230      * which is the maximum number of cores that the processor could theoretically support
231      * (see "Multiple Core Calculation" in the AMD CPUID specification).
232      * Recompute packageid/logprocid/threadid/coreid accordingly.
233      */
234     infos->packageid = infos->apicid / infos->max_nbcores;
235     infos->logprocid = infos->apicid % infos->max_nbcores;
236     infos->threadid = infos->logprocid % infos->max_nbthreads;
237     infos->coreid = infos->logprocid / infos->max_nbthreads;
238     hwloc_debug("this is thread %u of core %u\n", infos->threadid, infos->coreid);
239   }
240 
241   infos->numcaches = 0;
242   infos->cache = NULL;
243 
244   /* Get apicid, nodeid, unitid from cpuid 0x8000001e
245    * and cache information from cpuid 0x8000001d
246    * (AMD topology extension)
247    */
248   if (cpuid_type != intel && has_topoext(features)) {
249     unsigned apic_id, node_id, nodes_per_proc;
250 
251     eax = 0x8000001e;
252     hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
253     infos->apicid = apic_id = eax;
254 
255     if (infos->cpufamilynumber == 0x16) {
256       /* ecx is reserved */
257       node_id = 0;
258       nodes_per_proc = 1;
259     } else {
260       node_id = ecx & 0xff;
261       nodes_per_proc = ((ecx >> 8) & 7) + 1;
262     }
263     infos->nodeid = node_id;
264     if ((infos->cpufamilynumber == 0x15 && nodes_per_proc > 2)
265         || (infos->cpufamilynumber == 0x17 && nodes_per_proc > 4)) {
266       hwloc_debug("warning: undefined nodes_per_proc value %d, assuming it means %d\n", nodes_per_proc, nodes_per_proc);
267     }
268 
269     if (infos->cpufamilynumber <= 0x16) { /* topoext appeared in 0x15 and compute-units were only used in 0x15 and 0x16 */
270       unsigned unit_id, cores_per_unit;
271       infos->unitid = unit_id = ebx & 0xff;
272       cores_per_unit = ((ebx >> 8) & 0xff) + 1;
273       hwloc_debug("topoext %08x, %d nodes, node %d, %d cores in unit %d\n", apic_id, nodes_per_proc, node_id, cores_per_unit, unit_id);
274     } else {
275       unsigned core_id, threads_per_core;
276       infos->coreid = core_id = ebx & 0xff;
277       threads_per_core = ((ebx >> 8) & 0xff) + 1;
278       hwloc_debug("topoext %08x, %d nodes, node %d, %d threads in core %d\n", apic_id, nodes_per_proc, node_id, threads_per_core, core_id);
279     }
280 
281     for (cachenum = 0; ; cachenum++) {
282       unsigned type;
283       eax = 0x8000001d;
284       ecx = cachenum;
285       hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
286       type = eax & 0x1f;
287       if (type == 0)
288         break;
289       infos->numcaches++;
290     }
291 
292     cache = infos->cache = malloc(infos->numcaches * sizeof(*infos->cache));
293 
294     for (cachenum = 0; ; cachenum++) {
295       unsigned long linesize, linepart, ways, sets;
296       unsigned type;
297       eax = 0x8000001d;
298       ecx = cachenum;
299       hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
300 
301       type = eax & 0x1f;
302 
303       if (type == 0)
304         break;
305 
306       cache->type = type;
307       cache->level = (eax >> 5) & 0x7;
308       /* Note: actually number of cores */
309       cache->nbthreads_sharing = ((eax >> 14) &  0xfff) + 1;
310 
311       cache->linesize = linesize = (ebx & 0xfff) + 1;
312       cache->linepart = linepart = ((ebx >> 12) & 0x3ff) + 1;
313       ways = ((ebx >> 22) & 0x3ff) + 1;
314 
315       if (eax & (1 << 9))
316         /* Fully associative */
317         cache->ways = -1;
318       else
319         cache->ways = ways;
320       cache->sets = sets = ecx + 1;
321       cache->size = linesize * linepart * ways * sets;
322       cache->inclusive = edx & 0x2;
323 
324       hwloc_debug("cache %u type %u L%u t%u c%u linesize %lu linepart %lu ways %lu sets %lu, size %uKB\n", cachenum, cache->type, cache->level, cache->nbthreads_sharing, infos->max_nbcores, linesize, linepart, ways, sets, cache->size >> 10);
325 
326       cache++;
327     }
328   } else {
329     /* If there's no topoext,
330      * get cache information from cpuid 0x80000005 and 0x80000006
331      * (not supported on Intel)
332      */
333     if (cpuid_type != intel && highest_ext_cpuid >= 0x80000005) {
334       eax = 0x80000005;
335       hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
336       fill_amd_cache(infos, 1, 1, ecx); /* L1d */
337       fill_amd_cache(infos, 1, 2, edx); /* L1i */
338     }
339     if (cpuid_type != intel && highest_ext_cpuid >= 0x80000006) {
340       eax = 0x80000006;
341       hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
342       if (ecx & 0xf000)
343         /* This is actually supported on Intel but LinePerTag isn't returned in bits 8-11.
344          * Could be useful if some Intels (at least before Core micro-architecture)
345          * support this leaf without leaf 0x4.
346          */
347         fill_amd_cache(infos, 2, 3, ecx); /* L2u */
348       if (edx & 0xf000)
349         fill_amd_cache(infos, 3, 3, edx); /* L3u */
350     }
351   }
352 
353   /* Get thread/core + cache information from cpuid 0x04
354    * (not supported on AMD)
355    */
356   if (cpuid_type != amd && highest_cpuid >= 0x04) {
357     unsigned level;
358     for (cachenum = 0; ; cachenum++) {
359       unsigned type;
360       eax = 0x04;
361       ecx = cachenum;
362       hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
363 
364       type = eax & 0x1f;
365 
366       hwloc_debug("cache %u type %u\n", cachenum, type);
367 
368       if (type == 0)
369         break;
370       level = (eax >> 5) & 0x7;
371       if (data->is_knl && level == 3)
372         /* KNL reports wrong L3 information (size always 0, cpuset always the entire machine, ignore it */
373         break;
374       infos->numcaches++;
375 
376       if (!cachenum) {
377         /* by the way, get thread/core information from the first cache */
378         infos->max_nbcores = ((eax >> 26) & 0x3f) + 1;
379         infos->max_nbthreads = infos->max_log_proc / infos->max_nbcores;
380         hwloc_debug("thus %u threads\n", infos->max_nbthreads);
381         infos->threadid = infos->logprocid % infos->max_nbthreads;
382         infos->coreid = infos->logprocid / infos->max_nbthreads;
383         hwloc_debug("this is thread %u of core %u\n", infos->threadid, infos->coreid);
384       }
385     }
386 
387     cache = infos->cache = malloc(infos->numcaches * sizeof(*infos->cache));
388 
389     for (cachenum = 0; ; cachenum++) {
390       unsigned long linesize, linepart, ways, sets;
391       unsigned type;
392       eax = 0x04;
393       ecx = cachenum;
394       hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
395 
396       type = eax & 0x1f;
397 
398       if (type == 0)
399         break;
400       level = (eax >> 5) & 0x7;
401       if (data->is_knl && level == 3)
402         /* KNL reports wrong L3 information (size always 0, cpuset always the entire machine, ignore it */
403         break;
404 
405       cache->type = type;
406       cache->level = level;
407       cache->nbthreads_sharing = ((eax >> 14) & 0xfff) + 1;
408 
409       cache->linesize = linesize = (ebx & 0xfff) + 1;
410       cache->linepart = linepart = ((ebx >> 12) & 0x3ff) + 1;
411       ways = ((ebx >> 22) & 0x3ff) + 1;
412       if (eax & (1 << 9))
413         /* Fully associative */
414         cache->ways = -1;
415       else
416         cache->ways = ways;
417       cache->sets = sets = ecx + 1;
418       cache->size = linesize * linepart * ways * sets;
419       cache->inclusive = edx & 0x2;
420 
421       hwloc_debug("cache %u type %u L%u t%u c%u linesize %lu linepart %lu ways %lu sets %lu, size %uKB\n", cachenum, cache->type, cache->level, cache->nbthreads_sharing, infos->max_nbcores, linesize, linepart, ways, sets, cache->size >> 10);
422 
423       cache++;
424     }
425   }
426 
427   /* Get package/core/thread information from cpuid 0x0b
428    * (Intel x2APIC)
429    */
430   if (cpuid_type == intel && highest_cpuid >= 0x0b && has_x2apic(features)) {
431     unsigned level, apic_nextshift, apic_number, apic_type, apic_id = 0, apic_shift = 0, id;
432     for (level = 0; ; level++) {
433       ecx = level;
434       eax = 0x0b;
435       hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
436       if (!eax && !ebx)
437         break;
438     }
439     if (level) {
440       infos->levels = level;
441       infos->otherids = malloc(level * sizeof(*infos->otherids));
442       for (level = 0; ; level++) {
443         ecx = level;
444         eax = 0x0b;
445         hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
446         if (!eax && !ebx)
447           break;
448         apic_nextshift = eax & 0x1f;
449         apic_number = ebx & 0xffff;
450         apic_type = (ecx & 0xff00) >> 8;
451         apic_id = edx;
452         id = (apic_id >> apic_shift) & ((1 << (apic_nextshift - apic_shift)) - 1);
453         hwloc_debug("x2APIC %08x %d: nextshift %d num %2d type %d id %2d\n", apic_id, level, apic_nextshift, apic_number, apic_type, id);
454         infos->apicid = apic_id;
455         infos->otherids[level] = UINT_MAX;
456         switch (apic_type) {
457         case 1:
458           infos->threadid = id;
459           break;
460         case 2:
461           infos->coreid = id;
462           break;
463         default:
464           hwloc_debug("x2APIC %d: unknown type %d\n", level, apic_type);
465           infos->otherids[level] = apic_id >> apic_shift;
466           break;
467         }
468         apic_shift = apic_nextshift;
469       }
470       infos->apicid = apic_id;
471       infos->packageid = apic_id >> apic_shift;
472       hwloc_debug("x2APIC remainder: %d\n", infos->packageid);
473       hwloc_debug("this is thread %u of core %u\n", infos->threadid, infos->coreid);
474     }
475   }
476 
477   /* Now that we have all info, compute cacheids and apply quirks */
478   for (cachenum = 0; cachenum < infos->numcaches; cachenum++) {
479     cache = &infos->cache[cachenum];
480 
481     /* default cacheid value */
482     cache->cacheid = infos->apicid / cache->nbthreads_sharing;
483 
484     /* AMD quirk */
485     if (cpuid_type == amd
486         && infos->cpufamilynumber== 0x10 && infos->cpumodelnumber == 0x9
487         && cache->level == 3
488         && (cache->ways == -1 || (cache->ways % 2 == 0)) && cache->nbthreads_sharing >= 8) {
489       /* Fix AMD family 0x10 model 0x9 (Magny-Cours) with 8 or 12 cores.
490        * The L3 (and its associativity) is actually split into two halves).
491        */
492       if (cache->nbthreads_sharing == 16)
493         cache->nbthreads_sharing = 12; /* nbthreads_sharing is a power of 2 but the processor actually has 8 or 12 cores */
494       cache->nbthreads_sharing /= 2;
495       cache->size /= 2;
496       if (cache->ways != -1)
497         cache->ways /= 2;
498       /* AMD Magny-Cours 12-cores processor reserve APIC ids as AAAAAABBBBBB....
499        * among first L3 (A), second L3 (B), and unexisting cores (.).
500        * On multi-socket servers, L3 in non-first sockets may have APIC id ranges
501        * such as [16-21] that are not aligned on multiple of nbthreads_sharing (6).
502        * That means, we can't just compare apicid/nbthreads_sharing to identify siblings.
503        */
504       cache->cacheid = (infos->apicid % infos->max_log_proc) / cache->nbthreads_sharing /* cacheid within the package */
505         + 2 * (infos->apicid / infos->max_log_proc); /* add 2 caches per previous package */
506 
507     } else if (cpuid_type == amd
508                && infos->cpufamilynumber == 0x15
509                && (infos->cpumodelnumber == 0x1 /* Bulldozer */ || infos->cpumodelnumber == 0x2 /* Piledriver */)
510                && cache->level == 3 && cache->nbthreads_sharing == 6) {
511       /* AMD Bulldozer and Piledriver 12-core processors have same APIC ids as Magny-Cours above,
512        * but we can't merge the checks because the original nbthreads_sharing must be exactly 6 here.
513        */
514       cache->cacheid = (infos->apicid % infos->max_log_proc) / cache->nbthreads_sharing /* cacheid within the package */
515         + 2 * (infos->apicid / infos->max_log_proc); /* add 2 cache per previous package */
516     }
517   }
518 
519   if (hwloc_bitmap_isset(data->apicid_set, infos->apicid))
520     data->apicid_unique = 0;
521   else
522     hwloc_bitmap_set(data->apicid_set, infos->apicid);
523 }
524 
525 static void
hwloc_x86_add_cpuinfos(hwloc_obj_t obj,struct procinfo * info,int nodup)526 hwloc_x86_add_cpuinfos(hwloc_obj_t obj, struct procinfo *info, int nodup)
527 {
528   char number[8];
529   hwloc_obj_add_info_nodup(obj, "CPUVendor", info->cpuvendor, nodup);
530   snprintf(number, sizeof(number), "%u", info->cpufamilynumber);
531   hwloc_obj_add_info_nodup(obj, "CPUFamilyNumber", number, nodup);
532   snprintf(number, sizeof(number), "%u", info->cpumodelnumber);
533   hwloc_obj_add_info_nodup(obj, "CPUModelNumber", number, nodup);
534   if (info->cpumodel[0]) {
535     const char *c = info->cpumodel;
536     while (*c == ' ')
537       c++;
538     hwloc_obj_add_info_nodup(obj, "CPUModel", c, nodup);
539   }
540   snprintf(number, sizeof(number), "%u", info->cpustepping);
541   hwloc_obj_add_info_nodup(obj, "CPUStepping", number, nodup);
542 }
543 
544 /* Analyse information stored in infos, and build/annotate topology levels accordingly */
summarize(struct hwloc_backend * backend,struct procinfo * infos,int fulldiscovery)545 static int summarize(struct hwloc_backend *backend, struct procinfo *infos, int fulldiscovery)
546 {
547   struct hwloc_topology *topology = backend->topology;
548   struct hwloc_x86_backend_data_s *data = backend->private_data;
549   unsigned nbprocs = data->nbprocs;
550   hwloc_bitmap_t complete_cpuset = hwloc_bitmap_alloc();
551   unsigned i, j, l, level, type;
552   unsigned nbpackages = 0;
553   int one = -1;
554   unsigned next_group_depth = topology->next_group_depth;
555   int caches_added = 0;
556   hwloc_bitmap_t remaining_cpuset;
557 
558   for (i = 0; i < nbprocs; i++)
559     if (infos[i].present) {
560       hwloc_bitmap_set(complete_cpuset, i);
561       one = i;
562     }
563 
564   if (one == -1) {
565     hwloc_bitmap_free(complete_cpuset);
566     return 0;
567   }
568 
569   remaining_cpuset = hwloc_bitmap_alloc();
570 
571   /* Ideally, when fulldiscovery=0, we could add any object that doesn't exist yet.
572    * But what if the x86 and the native backends disagree because one is buggy? Which one to trust?
573    * Only annotate existing objects for now.
574    */
575 
576   /* Look for packages */
577   if (fulldiscovery) {
578     hwloc_bitmap_t package_cpuset;
579     hwloc_obj_t package;
580 
581     hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
582     while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
583       unsigned packageid = infos[i].packageid;
584 
585       package_cpuset = hwloc_bitmap_alloc();
586       for (j = i; j < nbprocs; j++) {
587         if (infos[j].packageid == packageid) {
588           hwloc_bitmap_set(package_cpuset, j);
589           hwloc_bitmap_clr(remaining_cpuset, j);
590         }
591       }
592       package = hwloc_alloc_setup_object(HWLOC_OBJ_PACKAGE, packageid);
593       package->cpuset = package_cpuset;
594 
595       hwloc_x86_add_cpuinfos(package, &infos[i], 0);
596 
597       hwloc_debug_1arg_bitmap("os package %u has cpuset %s\n",
598           packageid, package_cpuset);
599       hwloc_insert_object_by_cpuset(topology, package);
600       nbpackages++;
601     }
602 
603   } else {
604     /* Annotate packages previously-existing packages */
605     hwloc_obj_t package = NULL;
606     int same = 1;
607     nbpackages = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PACKAGE);
608     /* check whether all packages have the same info */
609     for(i=1; i<nbprocs; i++) {
610       if (strcmp(infos[i].cpumodel, infos[0].cpumodel)) {
611         same = 0;
612         break;
613       }
614     }
615     /* now iterate over packages and annotate them */
616     while ((package = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_PACKAGE, package)) != NULL) {
617       if (package->os_index == (unsigned) -1) {
618         /* try to fix the package OS index if unknown.
619          * FIXME: ideally, we should check all bits in case x86 and the native backend disagree.
620          */
621         for(i=0; i<nbprocs; i++) {
622           if (hwloc_bitmap_isset(package->cpuset, i)) {
623             package->os_index = infos[i].packageid;
624             break;
625           }
626         }
627       }
628       for(i=0; i<nbprocs; i++) {
629         /* if there's a single package, it's the one we want.
630          * if the index is ok, it's the one we want.
631          * if the index is unknown but all packages have the same id, that's fine
632          */
633         if (nbpackages == 1 || infos[i].packageid == package->os_index || (same && package->os_index == (unsigned) -1)) {
634           hwloc_x86_add_cpuinfos(package, &infos[i], 1);
635           break;
636         }
637       }
638     }
639   }
640   /* If there was no package, annotate the Machine instead */
641   if ((!nbpackages) && infos[0].cpumodel[0]) {
642     hwloc_x86_add_cpuinfos(hwloc_get_root_obj(topology), &infos[0], 1);
643   }
644 
645   /* Look for Numa nodes inside packages */
646   if (fulldiscovery) {
647     hwloc_bitmap_t node_cpuset;
648     hwloc_obj_t node;
649 
650     hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
651     while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
652       unsigned packageid = infos[i].packageid;
653       unsigned nodeid = infos[i].nodeid;
654 
655       if (nodeid == (unsigned)-1) {
656         hwloc_bitmap_clr(remaining_cpuset, i);
657         continue;
658       }
659 
660       node_cpuset = hwloc_bitmap_alloc();
661       for (j = i; j < nbprocs; j++) {
662         if (infos[j].nodeid == (unsigned) -1) {
663           hwloc_bitmap_clr(remaining_cpuset, j);
664           continue;
665         }
666 
667         if (infos[j].packageid == packageid && infos[j].nodeid == nodeid) {
668           hwloc_bitmap_set(node_cpuset, j);
669           hwloc_bitmap_clr(remaining_cpuset, j);
670         }
671       }
672       node = hwloc_alloc_setup_object(HWLOC_OBJ_NUMANODE, nodeid);
673       node->cpuset = node_cpuset;
674       node->nodeset = hwloc_bitmap_alloc();
675       hwloc_bitmap_set(node->nodeset, nodeid);
676       hwloc_debug_1arg_bitmap("os node %u has cpuset %s\n",
677           nodeid, node_cpuset);
678       hwloc_insert_object_by_cpuset(topology, node);
679     }
680   }
681 
682   /* Look for Compute units inside packages */
683   if (fulldiscovery) {
684     hwloc_bitmap_t unit_cpuset;
685     hwloc_obj_t unit;
686 
687     hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
688     while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
689       unsigned packageid = infos[i].packageid;
690       unsigned unitid = infos[i].unitid;
691 
692       if (unitid == (unsigned)-1) {
693         hwloc_bitmap_clr(remaining_cpuset, i);
694         continue;
695       }
696 
697       unit_cpuset = hwloc_bitmap_alloc();
698       for (j = i; j < nbprocs; j++) {
699         if (infos[j].unitid == (unsigned) -1) {
700           hwloc_bitmap_clr(remaining_cpuset, j);
701           continue;
702         }
703 
704         if (infos[j].packageid == packageid && infos[j].unitid == unitid) {
705           hwloc_bitmap_set(unit_cpuset, j);
706           hwloc_bitmap_clr(remaining_cpuset, j);
707         }
708       }
709       unit = hwloc_alloc_setup_object(HWLOC_OBJ_GROUP, unitid);
710       unit->cpuset = unit_cpuset;
711       hwloc_obj_add_info(unit, "Type", "ComputeUnit");
712       hwloc_debug_1arg_bitmap("os unit %u has cpuset %s\n",
713           unitid, unit_cpuset);
714       hwloc_insert_object_by_cpuset(topology, unit);
715     }
716   }
717 
718   /* Look for unknown objects */
719   if (infos[one].otherids) {
720     for (level = infos[one].levels-1; level <= infos[one].levels-1; level--) {
721       if (infos[one].otherids[level] != UINT_MAX) {
722         hwloc_bitmap_t unknown_cpuset;
723         hwloc_obj_t unknown_obj;
724 
725         hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
726         while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
727           unsigned unknownid = infos[i].otherids[level];
728 
729           unknown_cpuset = hwloc_bitmap_alloc();
730           for (j = i; j < nbprocs; j++) {
731             if (infos[j].otherids[level] == unknownid) {
732               hwloc_bitmap_set(unknown_cpuset, j);
733               hwloc_bitmap_clr(remaining_cpuset, j);
734             }
735           }
736           unknown_obj = hwloc_alloc_setup_object(HWLOC_OBJ_GROUP, unknownid);
737           unknown_obj->cpuset = unknown_cpuset;
738           unknown_obj->os_level = level;
739           unknown_obj->attr->group.depth = topology->next_group_depth + level;
740           if (next_group_depth <= topology->next_group_depth + level)
741             next_group_depth = topology->next_group_depth + level + 1;
742           hwloc_debug_2args_bitmap("os unknown%d %u has cpuset %s\n",
743               level, unknownid, unknown_cpuset);
744           hwloc_insert_object_by_cpuset(topology, unknown_obj);
745         }
746       }
747     }
748   }
749 
750   /* Look for cores */
751   if (fulldiscovery) {
752     hwloc_bitmap_t core_cpuset;
753     hwloc_obj_t core;
754 
755     hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
756     while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
757       unsigned packageid = infos[i].packageid;
758       unsigned nodeid = infos[i].nodeid;
759       unsigned coreid = infos[i].coreid;
760 
761       if (coreid == (unsigned) -1) {
762         hwloc_bitmap_clr(remaining_cpuset, i);
763         continue;
764       }
765 
766       core_cpuset = hwloc_bitmap_alloc();
767       for (j = i; j < nbprocs; j++) {
768         if (infos[j].coreid == (unsigned) -1) {
769           hwloc_bitmap_clr(remaining_cpuset, j);
770           continue;
771         }
772 
773         if (infos[j].packageid == packageid && infos[j].nodeid == nodeid && infos[j].coreid == coreid) {
774           hwloc_bitmap_set(core_cpuset, j);
775           hwloc_bitmap_clr(remaining_cpuset, j);
776         }
777       }
778       core = hwloc_alloc_setup_object(HWLOC_OBJ_CORE, coreid);
779       core->cpuset = core_cpuset;
780       hwloc_debug_1arg_bitmap("os core %u has cpuset %s\n",
781           coreid, core_cpuset);
782       hwloc_insert_object_by_cpuset(topology, core);
783     }
784   }
785 
786   /* Look for PUs */
787   if (fulldiscovery) {
788     hwloc_debug("%s", "\n\n * CPU cpusets *\n\n");
789     for (i=0; i<nbprocs; i++)
790       if(infos[i].present) { /* Only add present PU. We don't know if others actually exist */
791        struct hwloc_obj *obj = hwloc_alloc_setup_object(HWLOC_OBJ_PU, i);
792        obj->cpuset = hwloc_bitmap_alloc();
793        hwloc_bitmap_only(obj->cpuset, i);
794        hwloc_debug_1arg_bitmap("PU %u has cpuset %s\n", i, obj->cpuset);
795        hwloc_insert_object_by_cpuset(topology, obj);
796      }
797   }
798 
799   /* Look for caches */
800   /* First find max level */
801   level = 0;
802   for (i = 0; i < nbprocs; i++)
803     for (j = 0; j < infos[i].numcaches; j++)
804       if (infos[i].cache[j].level > level)
805         level = infos[i].cache[j].level;
806   while (level > 0) {
807     for (type = 1; type <= 3; type++) {
808       /* Look for caches of that type at level level */
809       {
810         hwloc_obj_t cache;
811 
812         hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
813         while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
814           hwloc_bitmap_t puset;
815           int depth;
816 
817           for (l = 0; l < infos[i].numcaches; l++) {
818             if (infos[i].cache[l].level == level && infos[i].cache[l].type == type)
819               break;
820           }
821           if (l == infos[i].numcaches) {
822             /* no cache Llevel of that type in i */
823             hwloc_bitmap_clr(remaining_cpuset, i);
824             continue;
825           }
826 
827           puset = hwloc_bitmap_alloc();
828           hwloc_bitmap_set(puset, i);
829           depth = hwloc_get_cache_type_depth(topology, level,
830                                              type == 1 ? HWLOC_OBJ_CACHE_DATA : type == 2 ? HWLOC_OBJ_CACHE_INSTRUCTION : HWLOC_OBJ_CACHE_UNIFIED);
831           if (depth != HWLOC_TYPE_DEPTH_UNKNOWN)
832             cache = hwloc_get_next_obj_covering_cpuset_by_depth(topology, puset, depth, NULL);
833           else
834             cache = NULL;
835           hwloc_bitmap_free(puset);
836 
837           if (cache) {
838             /* Found cache above that PU, annotate if no such attribute yet */
839             if (!hwloc_obj_get_info_by_name(cache, "Inclusive"))
840               hwloc_obj_add_info(cache, "Inclusive", infos[i].cache[l].inclusive ? "1" : "0");
841             hwloc_bitmap_andnot(remaining_cpuset, remaining_cpuset, cache->cpuset);
842           } else {
843             /* Add the missing cache */
844             hwloc_bitmap_t cache_cpuset;
845             unsigned packageid = infos[i].packageid;
846             unsigned cacheid = infos[i].cache[l].cacheid;
847             /* Now look for others sharing it */
848             cache_cpuset = hwloc_bitmap_alloc();
849             for (j = i; j < nbprocs; j++) {
850               unsigned l2;
851               for (l2 = 0; l2 < infos[j].numcaches; l2++) {
852                 if (infos[j].cache[l2].level == level && infos[j].cache[l2].type == type)
853                   break;
854               }
855               if (l2 == infos[j].numcaches) {
856                 /* no cache Llevel of that type in j */
857                 hwloc_bitmap_clr(remaining_cpuset, j);
858                 continue;
859               }
860               if (infos[j].packageid == packageid && infos[j].cache[l2].cacheid == cacheid) {
861                 hwloc_bitmap_set(cache_cpuset, j);
862                 hwloc_bitmap_clr(remaining_cpuset, j);
863               }
864             }
865             cache = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, cacheid);
866             cache->attr->cache.depth = level;
867             cache->attr->cache.size = infos[i].cache[l].size;
868             cache->attr->cache.linesize = infos[i].cache[l].linesize;
869             cache->attr->cache.associativity = infos[i].cache[l].ways;
870             switch (infos[i].cache[l].type) {
871               case 1:
872                 cache->attr->cache.type = HWLOC_OBJ_CACHE_DATA;
873                 break;
874               case 2:
875                 cache->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION;
876                 break;
877               case 3:
878                 cache->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED;
879                 break;
880             }
881             cache->cpuset = cache_cpuset;
882             hwloc_obj_add_info(cache, "Inclusive", infos[i].cache[l].inclusive ? "1" : "0");
883             hwloc_debug_2args_bitmap("os L%u cache %u has cpuset %s\n",
884                 level, cacheid, cache_cpuset);
885             hwloc_insert_object_by_cpuset(topology, cache);
886             caches_added++;
887           }
888         }
889       }
890     }
891     level--;
892   }
893 
894   hwloc_bitmap_free(remaining_cpuset);
895   hwloc_bitmap_free(complete_cpuset);
896   topology->next_group_depth = next_group_depth;
897 
898   return fulldiscovery || caches_added;
899 }
900 
901 static int
look_procs(struct hwloc_backend * backend,struct procinfo * infos,int fulldiscovery,unsigned highest_cpuid,unsigned highest_ext_cpuid,unsigned * features,enum cpuid_type cpuid_type,int (* get_cpubind)(hwloc_topology_t topology,hwloc_cpuset_t set,int flags),int (* set_cpubind)(hwloc_topology_t topology,hwloc_const_cpuset_t set,int flags))902 look_procs(struct hwloc_backend *backend, struct procinfo *infos, int fulldiscovery,
903            unsigned highest_cpuid, unsigned highest_ext_cpuid, unsigned *features, enum cpuid_type cpuid_type,
904            int (*get_cpubind)(hwloc_topology_t topology, hwloc_cpuset_t set, int flags),
905            int (*set_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags))
906 {
907   struct hwloc_x86_backend_data_s *data = backend->private_data;
908   struct hwloc_topology *topology = backend->topology;
909   unsigned nbprocs = data->nbprocs;
910   hwloc_bitmap_t orig_cpuset = hwloc_bitmap_alloc();
911   hwloc_bitmap_t set;
912   unsigned i;
913   int ret = 0;
914 
915   if (get_cpubind(topology, orig_cpuset, HWLOC_CPUBIND_STRICT)) {
916     hwloc_bitmap_free(orig_cpuset);
917     return -1;
918   }
919 
920   set = hwloc_bitmap_alloc();
921 
922   for (i = 0; i < nbprocs; i++) {
923     hwloc_bitmap_only(set, i);
924     hwloc_debug("binding to CPU%d\n", i);
925     if (set_cpubind(topology, set, HWLOC_CPUBIND_STRICT)) {
926       hwloc_debug("could not bind to CPU%d: %s\n", i, strerror(errno));
927       continue;
928     }
929     look_proc(backend, &infos[i], highest_cpuid, highest_ext_cpuid, features, cpuid_type);
930   }
931 
932   set_cpubind(topology, orig_cpuset, 0);
933   hwloc_bitmap_free(set);
934   hwloc_bitmap_free(orig_cpuset);
935 
936   if (!data->apicid_unique)
937     fulldiscovery = 0;
938   else
939     ret = summarize(backend, infos, fulldiscovery);
940   return ret;
941 }
942 
943 #if defined HWLOC_FREEBSD_SYS && defined HAVE_CPUSET_SETID
944 #include <sys/param.h>
945 #include <sys/cpuset.h>
946 typedef cpusetid_t hwloc_x86_os_state_t;
hwloc_x86_os_state_save(hwloc_x86_os_state_t * state)947 static void hwloc_x86_os_state_save(hwloc_x86_os_state_t *state)
948 {
949   /* temporary make all cpus available during discovery */
950   cpuset_getid(CPU_LEVEL_CPUSET, CPU_WHICH_PID, -1, state);
951   cpuset_setid(CPU_WHICH_PID, -1, 0);
952 }
hwloc_x86_os_state_restore(hwloc_x86_os_state_t * state)953 static void hwloc_x86_os_state_restore(hwloc_x86_os_state_t *state)
954 {
955   /* restore initial cpuset */
956   cpuset_setid(CPU_WHICH_PID, -1, *state);
957 }
958 #else /* !defined HWLOC_FREEBSD_SYS || !defined HAVE_CPUSET_SETID */
959 typedef void * hwloc_x86_os_state_t;
hwloc_x86_os_state_save(hwloc_x86_os_state_t * state __hwloc_attribute_unused)960 static void hwloc_x86_os_state_save(hwloc_x86_os_state_t *state __hwloc_attribute_unused) { }
hwloc_x86_os_state_restore(hwloc_x86_os_state_t * state __hwloc_attribute_unused)961 static void hwloc_x86_os_state_restore(hwloc_x86_os_state_t *state __hwloc_attribute_unused) { }
962 #endif /* !defined HWLOC_FREEBSD_SYS || !defined HAVE_CPUSET_SETID */
963 
964 
965 #define INTEL_EBX ('G' | ('e'<<8) | ('n'<<16) | ('u'<<24))
966 #define INTEL_EDX ('i' | ('n'<<8) | ('e'<<16) | ('I'<<24))
967 #define INTEL_ECX ('n' | ('t'<<8) | ('e'<<16) | ('l'<<24))
968 
969 #define AMD_EBX ('A' | ('u'<<8) | ('t'<<16) | ('h'<<24))
970 #define AMD_EDX ('e' | ('n'<<8) | ('t'<<16) | ('i'<<24))
971 #define AMD_ECX ('c' | ('A'<<8) | ('M'<<16) | ('D'<<24))
972 
973 /* fake cpubind for when nbprocs=1 and no binding support */
fake_get_cpubind(hwloc_topology_t topology __hwloc_attribute_unused,hwloc_cpuset_t set __hwloc_attribute_unused,int flags __hwloc_attribute_unused)974 static int fake_get_cpubind(hwloc_topology_t topology __hwloc_attribute_unused,
975                             hwloc_cpuset_t set __hwloc_attribute_unused,
976                             int flags __hwloc_attribute_unused)
977 {
978   return 0;
979 }
fake_set_cpubind(hwloc_topology_t topology __hwloc_attribute_unused,hwloc_const_cpuset_t set __hwloc_attribute_unused,int flags __hwloc_attribute_unused)980 static int fake_set_cpubind(hwloc_topology_t topology __hwloc_attribute_unused,
981                             hwloc_const_cpuset_t set __hwloc_attribute_unused,
982                             int flags __hwloc_attribute_unused)
983 {
984   return 0;
985 }
986 
987 static
hwloc_look_x86(struct hwloc_backend * backend,int fulldiscovery)988 int hwloc_look_x86(struct hwloc_backend *backend, int fulldiscovery)
989 {
990   struct hwloc_x86_backend_data_s *data = backend->private_data;
991   unsigned nbprocs = data->nbprocs;
992   unsigned eax, ebx, ecx = 0, edx;
993   unsigned i;
994   unsigned highest_cpuid;
995   unsigned highest_ext_cpuid;
996   /* This stores cpuid features with the same indexing as Linux */
997   unsigned features[10] = { 0 };
998   struct procinfo *infos = NULL;
999   enum cpuid_type cpuid_type = unknown;
1000   hwloc_x86_os_state_t os_state;
1001   struct hwloc_binding_hooks hooks;
1002   struct hwloc_topology_support support;
1003   struct hwloc_topology_membind_support memsupport __hwloc_attribute_unused;
1004   int (*get_cpubind)(hwloc_topology_t topology, hwloc_cpuset_t set, int flags);
1005   int (*set_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags);
1006   int ret = -1;
1007 
1008   /* check if binding works */
1009   memset(&hooks, 0, sizeof(hooks));
1010   support.membind = &memsupport;
1011   hwloc_set_native_binding_hooks(&hooks, &support);
1012   if (hooks.get_thisthread_cpubind && hooks.set_thisthread_cpubind) {
1013     get_cpubind = hooks.get_thisthread_cpubind;
1014     set_cpubind = hooks.set_thisthread_cpubind;
1015   } else if (hooks.get_thisproc_cpubind && hooks.set_thisproc_cpubind) {
1016     get_cpubind = hooks.get_thisproc_cpubind;
1017     set_cpubind = hooks.set_thisproc_cpubind;
1018   } else {
1019     /* we need binding support if there are multiple PUs */
1020     if (nbprocs > 1)
1021       goto out;
1022     get_cpubind = fake_get_cpubind;
1023     set_cpubind = fake_set_cpubind;
1024   }
1025 
1026   if (!hwloc_have_x86_cpuid())
1027     goto out;
1028 
1029   infos = calloc(nbprocs, sizeof(struct procinfo));
1030   if (NULL == infos)
1031     goto out;
1032   for (i = 0; i < nbprocs; i++) {
1033     infos[i].nodeid = (unsigned) -1;
1034     infos[i].packageid = (unsigned) -1;
1035     infos[i].unitid = (unsigned) -1;
1036     infos[i].coreid = (unsigned) -1;
1037     infos[i].threadid = (unsigned) -1;
1038   }
1039 
1040   eax = 0x00;
1041   hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
1042   highest_cpuid = eax;
1043   if (ebx == INTEL_EBX && ecx == INTEL_ECX && edx == INTEL_EDX)
1044     cpuid_type = intel;
1045   if (ebx == AMD_EBX && ecx == AMD_ECX && edx == AMD_EDX)
1046     cpuid_type = amd;
1047 
1048   hwloc_debug("highest cpuid %x, cpuid type %u\n", highest_cpuid, cpuid_type);
1049   if (highest_cpuid < 0x01) {
1050       goto out_with_infos;
1051   }
1052 
1053   eax = 0x01;
1054   hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
1055   features[0] = edx;
1056   features[4] = ecx;
1057 
1058   eax = 0x80000000;
1059   hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
1060   highest_ext_cpuid = eax;
1061 
1062   hwloc_debug("highest extended cpuid %x\n", highest_ext_cpuid);
1063 
1064   if (highest_cpuid >= 0x7) {
1065     eax = 0x7;
1066     ecx = 0;
1067     hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
1068     features[9] = ebx;
1069   }
1070 
1071   if (cpuid_type != intel && highest_ext_cpuid >= 0x80000001) {
1072     eax = 0x80000001;
1073     hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
1074     features[1] = edx;
1075     features[6] = ecx;
1076   }
1077 
1078   hwloc_x86_os_state_save(&os_state);
1079 
1080   ret = look_procs(backend, infos, fulldiscovery,
1081                    highest_cpuid, highest_ext_cpuid, features, cpuid_type,
1082                    get_cpubind, set_cpubind);
1083   if (ret >= 0)
1084     /* success, we're done */
1085     goto out_with_os_state;
1086 
1087   if (nbprocs == 1) {
1088     /* only one processor, no need to bind */
1089     look_proc(backend, &infos[0], highest_cpuid, highest_ext_cpuid, features, cpuid_type);
1090     ret = summarize(backend, infos, fulldiscovery);
1091   }
1092 
1093 out_with_os_state:
1094   hwloc_x86_os_state_restore(&os_state);
1095 
1096 out_with_infos:
1097   if (NULL != infos) {
1098     for (i = 0; i < nbprocs; i++) {
1099       free(infos[i].cache);
1100       if (infos[i].otherids)
1101         free(infos[i].otherids);
1102     }
1103     free(infos);
1104   }
1105 
1106 out:
1107   return ret;
1108 }
1109 
1110 static int
hwloc_x86_discover(struct hwloc_backend * backend)1111 hwloc_x86_discover(struct hwloc_backend *backend)
1112 {
1113   struct hwloc_x86_backend_data_s *data = backend->private_data;
1114   struct hwloc_topology *topology = backend->topology;
1115   int alreadypus = 0;
1116   int ret;
1117 
1118 #if HAVE_DECL_RUNNING_ON_VALGRIND
1119   if (RUNNING_ON_VALGRIND) {
1120     fprintf(stderr, "hwloc x86 backend cannot work under Valgrind, disabling.\n");
1121     return 0;
1122   }
1123 #endif
1124 
1125   data->nbprocs = hwloc_fallback_nbprocessors(topology);
1126 
1127   if (!topology->is_thissystem) {
1128     hwloc_debug("%s", "\nno x86 detection (not thissystem)\n");
1129     return 0;
1130   }
1131 
1132   if (topology->levels[0][0]->cpuset) {
1133     /* somebody else discovered things */
1134     if (topology->nb_levels == 2 && topology->level_nbobjects[1] == data->nbprocs) {
1135       /* only PUs were discovered, as much as we would, complete the topology with everything else */
1136       alreadypus = 1;
1137       goto fulldiscovery;
1138     }
1139 
1140     /* several object types were added, we can't easily complete, just do partial discovery */
1141     ret = hwloc_look_x86(backend, 0);
1142     if (ret)
1143       hwloc_obj_add_info(topology->levels[0][0], "Backend", "x86");
1144     return ret;
1145   } else {
1146     /* topology is empty, initialize it */
1147     hwloc_alloc_obj_cpusets(topology->levels[0][0]);
1148   }
1149 
1150 fulldiscovery:
1151   if (hwloc_look_x86(backend, 1) < 0) {
1152     /* if failed, create PUs */
1153     if (!alreadypus)
1154       hwloc_setup_pu_level(topology, data->nbprocs);
1155   }
1156 
1157   hwloc_obj_add_info(topology->levels[0][0], "Backend", "x86");
1158 
1159 #ifdef HAVE_UNAME
1160   hwloc_add_uname_info(topology, NULL); /* we already know is_thissystem() is true */
1161 #else
1162   /* uname isn't available, manually setup the "Architecture" info */
1163 #ifdef HWLOC_X86_64_ARCH
1164   hwloc_obj_add_info(topology->levels[0][0], "Architecture", "x86_64");
1165 #else
1166   hwloc_obj_add_info(topology->levels[0][0], "Architecture", "x86");
1167 #endif
1168 #endif
1169   return 1;
1170 }
1171 
1172 static void
hwloc_x86_backend_disable(struct hwloc_backend * backend)1173 hwloc_x86_backend_disable(struct hwloc_backend *backend)
1174 {
1175   struct hwloc_x86_backend_data_s *data = backend->private_data;
1176   hwloc_bitmap_free(data->apicid_set);
1177   free(data);
1178 }
1179 
1180 static struct hwloc_backend *
hwloc_x86_component_instantiate(struct hwloc_disc_component * component,const void * _data1 __hwloc_attribute_unused,const void * _data2 __hwloc_attribute_unused,const void * _data3 __hwloc_attribute_unused)1181 hwloc_x86_component_instantiate(struct hwloc_disc_component *component,
1182                                 const void *_data1 __hwloc_attribute_unused,
1183                                 const void *_data2 __hwloc_attribute_unused,
1184                                 const void *_data3 __hwloc_attribute_unused)
1185 {
1186   struct hwloc_backend *backend;
1187   struct hwloc_x86_backend_data_s *data;
1188 
1189   backend = hwloc_backend_alloc(component);
1190   if (!backend)
1191     goto out;
1192 
1193   data = malloc(sizeof(*data));
1194   if (!data) {
1195     errno = ENOMEM;
1196     goto out_with_backend;
1197   }
1198 
1199   backend->private_data = data;
1200   backend->flags = HWLOC_BACKEND_FLAG_NEED_LEVELS;
1201   backend->discover = hwloc_x86_discover;
1202   backend->disable = hwloc_x86_backend_disable;
1203 
1204   /* default values */
1205   data->is_knl = 0;
1206   data->apicid_set = hwloc_bitmap_alloc();
1207   data->apicid_unique = 1;
1208 
1209   return backend;
1210 
1211  out_with_backend:
1212   free(backend);
1213  out:
1214   return NULL;
1215 }
1216 
1217 static struct hwloc_disc_component hwloc_x86_disc_component = {
1218   HWLOC_DISC_COMPONENT_TYPE_CPU,
1219   "x86",
1220   HWLOC_DISC_COMPONENT_TYPE_GLOBAL,
1221   hwloc_x86_component_instantiate,
1222   45, /* between native and no_os */
1223   NULL
1224 };
1225 
1226 const struct hwloc_component hwloc_x86_component = {
1227   HWLOC_COMPONENT_ABI,
1228   NULL, NULL,
1229   HWLOC_COMPONENT_TYPE_DISC,
1230   0,
1231   &hwloc_x86_disc_component
1232 };
1233