1 /*
2 * Copyright © 2009-2020 Inria. All rights reserved.
3 * See COPYING in top-level directory.
4 */
5
6 #include "private/autogen/config.h"
7 #include "hwloc.h"
8 #include "hwloc/plugins.h"
9 #include "private/private.h"
10 #include "private/debug.h"
11 #include "private/misc.h"
12
13 #include <fcntl.h>
14 #ifdef HAVE_UNISTD_H
15 #include <unistd.h>
16 #endif
17 #include <sys/stat.h>
18
19 #if defined(HWLOC_WIN_SYS) && !defined(__CYGWIN__)
20 #include <io.h>
21 #define open _open
22 #define read _read
23 #define close _close
24 #endif
25
26
27 /**************************************
28 * Init/Exit and Forced PCI localities
29 */
30
31 static void
hwloc_pci_forced_locality_parse_one(struct hwloc_topology * topology,const char * string,unsigned * allocated)32 hwloc_pci_forced_locality_parse_one(struct hwloc_topology *topology,
33 const char *string /* must contain a ' ' */,
34 unsigned *allocated)
35 {
36 unsigned nr = topology->pci_forced_locality_nr;
37 unsigned domain, bus_first, bus_last, dummy;
38 hwloc_bitmap_t set;
39 char *tmp;
40
41 if (sscanf(string, "%x:%x-%x %x", &domain, &bus_first, &bus_last, &dummy) == 4) {
42 /* fine */
43 } else if (sscanf(string, "%x:%x %x", &domain, &bus_first, &dummy) == 3) {
44 bus_last = bus_first;
45 } else if (sscanf(string, "%x %x", &domain, &dummy) == 2) {
46 bus_first = 0;
47 bus_last = 255;
48 } else
49 return;
50
51 tmp = strchr(string, ' ');
52 if (!tmp)
53 return;
54 tmp++;
55
56 set = hwloc_bitmap_alloc();
57 hwloc_bitmap_sscanf(set, tmp);
58
59 if (!*allocated) {
60 topology->pci_forced_locality = malloc(sizeof(*topology->pci_forced_locality));
61 if (!topology->pci_forced_locality)
62 goto out_with_set; /* failed to allocate, ignore this forced locality */
63 *allocated = 1;
64 } else if (nr >= *allocated) {
65 struct hwloc_pci_forced_locality_s *tmplocs;
66 tmplocs = realloc(topology->pci_forced_locality,
67 2 * *allocated * sizeof(*topology->pci_forced_locality));
68 if (!tmplocs)
69 goto out_with_set; /* failed to allocate, ignore this forced locality */
70 topology->pci_forced_locality = tmplocs;
71 *allocated *= 2;
72 }
73
74 topology->pci_forced_locality[nr].domain = domain;
75 topology->pci_forced_locality[nr].bus_first = bus_first;
76 topology->pci_forced_locality[nr].bus_last = bus_last;
77 topology->pci_forced_locality[nr].cpuset = set;
78 topology->pci_forced_locality_nr++;
79 return;
80
81 out_with_set:
82 hwloc_bitmap_free(set);
83 return;
84 }
85
86 static void
hwloc_pci_forced_locality_parse(struct hwloc_topology * topology,const char * _env)87 hwloc_pci_forced_locality_parse(struct hwloc_topology *topology, const char *_env)
88 {
89 char *env = strdup(_env);
90 unsigned allocated = 0;
91 char *tmp = env;
92
93 while (1) {
94 size_t len = strcspn(tmp, ";\r\n");
95 char *next = NULL;
96
97 if (tmp[len] != '\0') {
98 tmp[len] = '\0';
99 if (tmp[len+1] != '\0')
100 next = &tmp[len]+1;
101 }
102
103 hwloc_pci_forced_locality_parse_one(topology, tmp, &allocated);
104
105 if (next)
106 tmp = next;
107 else
108 break;
109 }
110
111 free(env);
112 }
113
114 void
hwloc_pci_discovery_init(struct hwloc_topology * topology)115 hwloc_pci_discovery_init(struct hwloc_topology *topology)
116 {
117 topology->pci_has_forced_locality = 0;
118 topology->pci_forced_locality_nr = 0;
119 topology->pci_forced_locality = NULL;
120
121 topology->first_pci_locality = topology->last_pci_locality = NULL;
122 }
123
124 void
hwloc_pci_discovery_prepare(struct hwloc_topology * topology)125 hwloc_pci_discovery_prepare(struct hwloc_topology *topology)
126 {
127 char *env;
128
129 env = getenv("HWLOC_PCI_LOCALITY");
130 if (env) {
131 int fd;
132
133 topology->pci_has_forced_locality = 1;
134
135 fd = open(env, O_RDONLY);
136 if (fd >= 0) {
137 struct stat st;
138 char *buffer;
139 int err = fstat(fd, &st);
140 if (!err) {
141 if (st.st_size <= 64*1024) { /* random limit large enough to store multiple cpusets for thousands of PUs */
142 buffer = malloc(st.st_size+1);
143 if (buffer && read(fd, buffer, st.st_size) == st.st_size) {
144 buffer[st.st_size] = '\0';
145 hwloc_pci_forced_locality_parse(topology, buffer);
146 }
147 free(buffer);
148 } else {
149 fprintf(stderr, "Ignoring HWLOC_PCI_LOCALITY file `%s' too large (%lu bytes)\n",
150 env, (unsigned long) st.st_size);
151 }
152 }
153 close(fd);
154 } else
155 hwloc_pci_forced_locality_parse(topology, env);
156 }
157 }
158
159 void
hwloc_pci_discovery_exit(struct hwloc_topology * topology)160 hwloc_pci_discovery_exit(struct hwloc_topology *topology)
161 {
162 struct hwloc_pci_locality_s *cur;
163 unsigned i;
164
165 for(i=0; i<topology->pci_forced_locality_nr; i++)
166 hwloc_bitmap_free(topology->pci_forced_locality[i].cpuset);
167 free(topology->pci_forced_locality);
168
169 cur = topology->first_pci_locality;
170 while (cur) {
171 struct hwloc_pci_locality_s *next = cur->next;
172 hwloc_bitmap_free(cur->cpuset);
173 free(cur);
174 cur = next;
175 }
176
177 hwloc_pci_discovery_init(topology);
178 }
179
180
181 /******************************
182 * Inserting in Tree by Bus ID
183 */
184
185 #ifdef HWLOC_DEBUG
186 static void
hwloc_pci_traverse_print_cb(void * cbdata __hwloc_attribute_unused,struct hwloc_obj * pcidev)187 hwloc_pci_traverse_print_cb(void * cbdata __hwloc_attribute_unused,
188 struct hwloc_obj *pcidev)
189 {
190 char busid[14];
191 hwloc_obj_t parent;
192
193 /* indent */
194 parent = pcidev->parent;
195 while (parent) {
196 hwloc_debug("%s", " ");
197 parent = parent->parent;
198 }
199
200 snprintf(busid, sizeof(busid), "%04x:%02x:%02x.%01x",
201 pcidev->attr->pcidev.domain, pcidev->attr->pcidev.bus, pcidev->attr->pcidev.dev, pcidev->attr->pcidev.func);
202
203 if (pcidev->type == HWLOC_OBJ_BRIDGE) {
204 if (pcidev->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_HOST)
205 hwloc_debug("HostBridge");
206 else
207 hwloc_debug("%s Bridge [%04x:%04x]", busid,
208 pcidev->attr->pcidev.vendor_id, pcidev->attr->pcidev.device_id);
209 hwloc_debug(" to %04x:[%02x:%02x]\n",
210 pcidev->attr->bridge.downstream.pci.domain, pcidev->attr->bridge.downstream.pci.secondary_bus, pcidev->attr->bridge.downstream.pci.subordinate_bus);
211 } else
212 hwloc_debug("%s Device [%04x:%04x (%04x:%04x) rev=%02x class=%04x]\n", busid,
213 pcidev->attr->pcidev.vendor_id, pcidev->attr->pcidev.device_id,
214 pcidev->attr->pcidev.subvendor_id, pcidev->attr->pcidev.subdevice_id,
215 pcidev->attr->pcidev.revision, pcidev->attr->pcidev.class_id);
216 }
217
218 static void
hwloc_pci_traverse(void * cbdata,struct hwloc_obj * tree,void (* cb)(void * cbdata,struct hwloc_obj *))219 hwloc_pci_traverse(void * cbdata, struct hwloc_obj *tree,
220 void (*cb)(void * cbdata, struct hwloc_obj *))
221 {
222 hwloc_obj_t child;
223 cb(cbdata, tree);
224 for_each_io_child(child, tree) {
225 if (child->type == HWLOC_OBJ_BRIDGE)
226 hwloc_pci_traverse(cbdata, child, cb);
227 }
228 }
229 #endif /* HWLOC_DEBUG */
230
231 enum hwloc_pci_busid_comparison_e {
232 HWLOC_PCI_BUSID_LOWER,
233 HWLOC_PCI_BUSID_HIGHER,
234 HWLOC_PCI_BUSID_INCLUDED,
235 HWLOC_PCI_BUSID_SUPERSET,
236 HWLOC_PCI_BUSID_EQUAL
237 };
238
239 static enum hwloc_pci_busid_comparison_e
hwloc_pci_compare_busids(struct hwloc_obj * a,struct hwloc_obj * b)240 hwloc_pci_compare_busids(struct hwloc_obj *a, struct hwloc_obj *b)
241 {
242 #ifdef HWLOC_DEBUG
243 if (a->type == HWLOC_OBJ_BRIDGE)
244 assert(a->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI);
245 if (b->type == HWLOC_OBJ_BRIDGE)
246 assert(b->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI);
247 #endif
248
249 if (a->attr->pcidev.domain < b->attr->pcidev.domain)
250 return HWLOC_PCI_BUSID_LOWER;
251 if (a->attr->pcidev.domain > b->attr->pcidev.domain)
252 return HWLOC_PCI_BUSID_HIGHER;
253
254 if (a->type == HWLOC_OBJ_BRIDGE
255 && b->attr->pcidev.bus >= a->attr->bridge.downstream.pci.secondary_bus
256 && b->attr->pcidev.bus <= a->attr->bridge.downstream.pci.subordinate_bus)
257 return HWLOC_PCI_BUSID_SUPERSET;
258 if (b->type == HWLOC_OBJ_BRIDGE
259 && a->attr->pcidev.bus >= b->attr->bridge.downstream.pci.secondary_bus
260 && a->attr->pcidev.bus <= b->attr->bridge.downstream.pci.subordinate_bus)
261 return HWLOC_PCI_BUSID_INCLUDED;
262
263 if (a->attr->pcidev.bus < b->attr->pcidev.bus)
264 return HWLOC_PCI_BUSID_LOWER;
265 if (a->attr->pcidev.bus > b->attr->pcidev.bus)
266 return HWLOC_PCI_BUSID_HIGHER;
267
268 if (a->attr->pcidev.dev < b->attr->pcidev.dev)
269 return HWLOC_PCI_BUSID_LOWER;
270 if (a->attr->pcidev.dev > b->attr->pcidev.dev)
271 return HWLOC_PCI_BUSID_HIGHER;
272
273 if (a->attr->pcidev.func < b->attr->pcidev.func)
274 return HWLOC_PCI_BUSID_LOWER;
275 if (a->attr->pcidev.func > b->attr->pcidev.func)
276 return HWLOC_PCI_BUSID_HIGHER;
277
278 /* Should never reach here. */
279 return HWLOC_PCI_BUSID_EQUAL;
280 }
281
282 static void
hwloc_pci_add_object(struct hwloc_obj * parent,struct hwloc_obj ** parent_io_first_child_p,struct hwloc_obj * new)283 hwloc_pci_add_object(struct hwloc_obj *parent, struct hwloc_obj **parent_io_first_child_p, struct hwloc_obj *new)
284 {
285 struct hwloc_obj **curp, **childp;
286
287 curp = parent_io_first_child_p;
288 while (*curp) {
289 enum hwloc_pci_busid_comparison_e comp = hwloc_pci_compare_busids(new, *curp);
290 switch (comp) {
291 case HWLOC_PCI_BUSID_HIGHER:
292 /* go further */
293 curp = &(*curp)->next_sibling;
294 continue;
295 case HWLOC_PCI_BUSID_INCLUDED:
296 /* insert new below current bridge */
297 hwloc_pci_add_object(*curp, &(*curp)->io_first_child, new);
298 return;
299 case HWLOC_PCI_BUSID_LOWER:
300 case HWLOC_PCI_BUSID_SUPERSET: {
301 /* insert new before current */
302 new->next_sibling = *curp;
303 *curp = new;
304 new->parent = parent;
305 if (new->type == HWLOC_OBJ_BRIDGE) {
306 /* look at remaining siblings and move some below new */
307 childp = &new->io_first_child;
308 curp = &new->next_sibling;
309 while (*curp) {
310 hwloc_obj_t cur = *curp;
311 if (hwloc_pci_compare_busids(new, cur) == HWLOC_PCI_BUSID_LOWER) {
312 /* this sibling remains under root, after new. */
313 if (cur->attr->pcidev.domain > new->attr->pcidev.domain
314 || cur->attr->pcidev.bus > new->attr->bridge.downstream.pci.subordinate_bus)
315 /* this sibling is even above new's subordinate bus, no other sibling could go below new */
316 return;
317 curp = &cur->next_sibling;
318 } else {
319 /* this sibling goes under new */
320 *childp = cur;
321 *curp = cur->next_sibling;
322 (*childp)->parent = new;
323 (*childp)->next_sibling = NULL;
324 childp = &(*childp)->next_sibling;
325 }
326 }
327 }
328 return;
329 }
330 case HWLOC_PCI_BUSID_EQUAL: {
331 static int reported = 0;
332 if (!reported && !hwloc_hide_errors()) {
333 fprintf(stderr, "*********************************************************\n");
334 fprintf(stderr, "* hwloc %s received invalid PCI information.\n", HWLOC_VERSION);
335 fprintf(stderr, "*\n");
336 fprintf(stderr, "* Trying to insert PCI object %04x:%02x:%02x.%01x at %04x:%02x:%02x.%01x\n",
337 new->attr->pcidev.domain, new->attr->pcidev.bus, new->attr->pcidev.dev, new->attr->pcidev.func,
338 (*curp)->attr->pcidev.domain, (*curp)->attr->pcidev.bus, (*curp)->attr->pcidev.dev, (*curp)->attr->pcidev.func);
339 fprintf(stderr, "*\n");
340 fprintf(stderr, "* hwloc will now ignore this object and continue.\n");
341 fprintf(stderr, "*********************************************************\n");
342 reported = 1;
343 }
344 hwloc_free_unlinked_object(new);
345 return;
346 }
347 }
348 }
349 /* add to the end of the list if higher than everybody */
350 new->parent = parent;
351 new->next_sibling = NULL;
352 *curp = new;
353 }
354
355 void
hwloc_pcidisc_tree_insert_by_busid(struct hwloc_obj ** treep,struct hwloc_obj * obj)356 hwloc_pcidisc_tree_insert_by_busid(struct hwloc_obj **treep,
357 struct hwloc_obj *obj)
358 {
359 hwloc_pci_add_object(NULL /* no parent on top of tree */, treep, obj);
360 }
361
362
363 /**********************
364 * Attaching PCI Trees
365 */
366
367 static struct hwloc_obj *
hwloc_pcidisc_add_hostbridges(struct hwloc_topology * topology,struct hwloc_obj * old_tree)368 hwloc_pcidisc_add_hostbridges(struct hwloc_topology *topology,
369 struct hwloc_obj *old_tree)
370 {
371 struct hwloc_obj * new = NULL, **newp = &new;
372
373 /*
374 * tree points to all objects connected to any upstream bus in the machine.
375 * We now create one real hostbridge object per upstream bus.
376 * It's not actually a PCI device so we have to create it.
377 */
378 while (old_tree) {
379 /* start a new host bridge */
380 struct hwloc_obj *hostbridge;
381 struct hwloc_obj **dstnextp;
382 struct hwloc_obj **srcnextp;
383 struct hwloc_obj *child;
384 unsigned current_domain;
385 unsigned char current_bus;
386 unsigned char current_subordinate;
387
388 hostbridge = hwloc_alloc_setup_object(topology, HWLOC_OBJ_BRIDGE, HWLOC_UNKNOWN_INDEX);
389 if (!hostbridge) {
390 /* just queue remaining things without hostbridges and return */
391 *newp = old_tree;
392 return new;
393 }
394 dstnextp = &hostbridge->io_first_child;
395
396 srcnextp = &old_tree;
397 child = *srcnextp;
398 current_domain = child->attr->pcidev.domain;
399 current_bus = child->attr->pcidev.bus;
400 current_subordinate = current_bus;
401
402 hwloc_debug("Adding new PCI hostbridge %04x:%02x\n", current_domain, current_bus);
403
404 next_child:
405 /* remove next child from tree */
406 *srcnextp = child->next_sibling;
407 /* append it to hostbridge */
408 *dstnextp = child;
409 child->parent = hostbridge;
410 child->next_sibling = NULL;
411 dstnextp = &child->next_sibling;
412
413 /* compute hostbridge secondary/subordinate buses */
414 if (child->type == HWLOC_OBJ_BRIDGE
415 && child->attr->bridge.downstream.pci.subordinate_bus > current_subordinate)
416 current_subordinate = child->attr->bridge.downstream.pci.subordinate_bus;
417
418 /* use next child if it has the same domains/bus */
419 child = *srcnextp;
420 if (child
421 && child->attr->pcidev.domain == current_domain
422 && child->attr->pcidev.bus == current_bus)
423 goto next_child;
424
425 /* finish setting up this hostbridge */
426 hostbridge->attr->bridge.upstream_type = HWLOC_OBJ_BRIDGE_HOST;
427 hostbridge->attr->bridge.downstream_type = HWLOC_OBJ_BRIDGE_PCI;
428 hostbridge->attr->bridge.downstream.pci.domain = current_domain;
429 hostbridge->attr->bridge.downstream.pci.secondary_bus = current_bus;
430 hostbridge->attr->bridge.downstream.pci.subordinate_bus = current_subordinate;
431 hwloc_debug(" new PCI hostbridge covers %04x:[%02x-%02x]\n",
432 current_domain, current_bus, current_subordinate);
433
434 *newp = hostbridge;
435 newp = &hostbridge->next_sibling;
436 }
437
438 return new;
439 }
440
441 static struct hwloc_obj *
hwloc_pci_fixup_busid_parent(struct hwloc_topology * topology __hwloc_attribute_unused,struct hwloc_pcidev_attr_s * busid __hwloc_attribute_unused,struct hwloc_obj * parent __hwloc_attribute_unused)442 hwloc_pci_fixup_busid_parent(struct hwloc_topology *topology __hwloc_attribute_unused,
443 struct hwloc_pcidev_attr_s *busid __hwloc_attribute_unused,
444 struct hwloc_obj *parent __hwloc_attribute_unused)
445 {
446 /* no quirk for now */
447 return parent;
448 }
449
450 static struct hwloc_obj *
hwloc__pci_find_busid_parent(struct hwloc_topology * topology,struct hwloc_pcidev_attr_s * busid)451 hwloc__pci_find_busid_parent(struct hwloc_topology *topology, struct hwloc_pcidev_attr_s *busid)
452 {
453 hwloc_bitmap_t cpuset = hwloc_bitmap_alloc();
454 hwloc_obj_t parent;
455 int forced = 0;
456 int noquirks = 0;
457 unsigned i;
458 int err;
459
460 hwloc_debug("Looking for parent of PCI busid %04x:%02x:%02x.%01x\n",
461 busid->domain, busid->bus, busid->dev, busid->func);
462
463 /* try to match a forced locality */
464 if (topology->pci_has_forced_locality) {
465 for(i=0; i<topology->pci_forced_locality_nr; i++) {
466 if (busid->domain == topology->pci_forced_locality[i].domain
467 && busid->bus >= topology->pci_forced_locality[i].bus_first
468 && busid->bus <= topology->pci_forced_locality[i].bus_last) {
469 hwloc_bitmap_copy(cpuset, topology->pci_forced_locality[i].cpuset);
470 forced = 1;
471 break;
472 }
473 }
474 /* if pci locality was forced, even empty, don't let quirks change what the OS reports */
475 noquirks = 1;
476 }
477
478 /* deprecated force locality variables */
479 if (!forced) {
480 const char *env;
481 char envname[256];
482 /* override the cpuset with the environment if given */
483 snprintf(envname, sizeof(envname), "HWLOC_PCI_%04x_%02x_LOCALCPUS",
484 busid->domain, busid->bus);
485 env = getenv(envname);
486 if (env) {
487 static int reported = 0;
488 if (!topology->pci_has_forced_locality && !reported) {
489 fprintf(stderr, "Environment variable %s is deprecated, please use HWLOC_PCI_LOCALITY instead.\n", env);
490 reported = 1;
491 }
492 if (*env) {
493 /* force the cpuset */
494 hwloc_debug("Overriding PCI locality using %s in the environment\n", envname);
495 hwloc_bitmap_sscanf(cpuset, env);
496 forced = 1;
497 }
498 /* if env exists, even empty, don't let quirks change what the OS reports */
499 noquirks = 1;
500 }
501 }
502
503 if (!forced) {
504 /* get the cpuset by asking the backend that provides the relevant hook, if any. */
505 struct hwloc_backend *backend = topology->get_pci_busid_cpuset_backend;
506 if (backend)
507 err = backend->get_pci_busid_cpuset(backend, busid, cpuset);
508 else
509 err = -1;
510 if (err < 0)
511 /* if we got nothing, assume this PCI bus is attached to the top of hierarchy */
512 hwloc_bitmap_copy(cpuset, hwloc_topology_get_topology_cpuset(topology));
513 }
514
515 hwloc_debug_bitmap(" will attach PCI bus to cpuset %s\n", cpuset);
516
517 parent = hwloc_find_insert_io_parent_by_complete_cpuset(topology, cpuset);
518 if (parent) {
519 if (!noquirks)
520 /* We found a valid parent. Check that the OS didn't report invalid locality */
521 parent = hwloc_pci_fixup_busid_parent(topology, busid, parent);
522 } else {
523 /* Fallback to root */
524 parent = hwloc_get_root_obj(topology);
525 }
526
527 hwloc_bitmap_free(cpuset);
528 return parent;
529 }
530
531 int
hwloc_pcidisc_tree_attach(struct hwloc_topology * topology,struct hwloc_obj * tree)532 hwloc_pcidisc_tree_attach(struct hwloc_topology *topology, struct hwloc_obj *tree)
533 {
534 enum hwloc_type_filter_e bfilter;
535
536 if (!tree)
537 /* found nothing, exit */
538 return 0;
539
540 #ifdef HWLOC_DEBUG
541 hwloc_debug("%s", "\nPCI hierarchy:\n");
542 hwloc_pci_traverse(NULL, tree, hwloc_pci_traverse_print_cb);
543 hwloc_debug("%s", "\n");
544 #endif
545
546 bfilter = topology->type_filter[HWLOC_OBJ_BRIDGE];
547 if (bfilter != HWLOC_TYPE_FILTER_KEEP_NONE) {
548 tree = hwloc_pcidisc_add_hostbridges(topology, tree);
549 }
550
551 while (tree) {
552 struct hwloc_obj *obj, *pciobj;
553 struct hwloc_obj *parent;
554 struct hwloc_pci_locality_s *loc;
555 unsigned domain, bus_min, bus_max;
556
557 obj = tree;
558
559 /* hostbridges don't have a PCI busid for looking up locality, use their first child */
560 if (obj->type == HWLOC_OBJ_BRIDGE && obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_HOST)
561 pciobj = obj->io_first_child;
562 else
563 pciobj = obj;
564 /* now we have a pci device or a pci bridge */
565 assert(pciobj->type == HWLOC_OBJ_PCI_DEVICE
566 || (pciobj->type == HWLOC_OBJ_BRIDGE && pciobj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI));
567
568 if (obj->type == HWLOC_OBJ_BRIDGE) {
569 domain = obj->attr->bridge.downstream.pci.domain;
570 bus_min = obj->attr->bridge.downstream.pci.secondary_bus;
571 bus_max = obj->attr->bridge.downstream.pci.subordinate_bus;
572 } else {
573 domain = pciobj->attr->pcidev.domain;
574 bus_min = pciobj->attr->pcidev.bus;
575 bus_max = pciobj->attr->pcidev.bus;
576 }
577
578 /* find where to attach that PCI bus */
579 parent = hwloc__pci_find_busid_parent(topology, &pciobj->attr->pcidev);
580
581 /* reuse the previous locality if possible */
582 if (topology->last_pci_locality
583 && parent == topology->last_pci_locality->parent
584 && domain == topology->last_pci_locality->domain
585 && (bus_min == topology->last_pci_locality->bus_max
586 || bus_min == topology->last_pci_locality->bus_max+1)) {
587 hwloc_debug(" Reusing PCI locality up to bus %04x:%02x\n",
588 domain, bus_max);
589 topology->last_pci_locality->bus_max = bus_max;
590 goto done;
591 }
592
593 loc = malloc(sizeof(*loc));
594 if (!loc) {
595 /* fallback to attaching to root */
596 parent = hwloc_get_root_obj(topology);
597 goto done;
598 }
599
600 loc->domain = domain;
601 loc->bus_min = bus_min;
602 loc->bus_max = bus_max;
603 loc->parent = parent;
604 loc->cpuset = hwloc_bitmap_dup(parent->cpuset);
605 if (!loc->cpuset) {
606 /* fallback to attaching to root */
607 free(loc);
608 parent = hwloc_get_root_obj(topology);
609 goto done;
610 }
611
612 hwloc_debug("Adding PCI locality %s P#%u for bus %04x:[%02x:%02x]\n",
613 hwloc_obj_type_string(parent->type), parent->os_index, loc->domain, loc->bus_min, loc->bus_max);
614 if (topology->last_pci_locality) {
615 loc->prev = topology->last_pci_locality;
616 loc->next = NULL;
617 topology->last_pci_locality->next = loc;
618 topology->last_pci_locality = loc;
619 } else {
620 loc->prev = NULL;
621 loc->next = NULL;
622 topology->first_pci_locality = loc;
623 topology->last_pci_locality = loc;
624 }
625
626 done:
627 /* dequeue this object */
628 tree = obj->next_sibling;
629 obj->next_sibling = NULL;
630 hwloc_insert_object_by_parent(topology, parent, obj);
631 }
632
633 return 0;
634 }
635
636
637 /*********************************
638 * Finding PCI objects or parents
639 */
640
641 struct hwloc_obj *
hwloc_pci_find_parent_by_busid(struct hwloc_topology * topology,unsigned domain,unsigned bus,unsigned dev,unsigned func)642 hwloc_pci_find_parent_by_busid(struct hwloc_topology *topology,
643 unsigned domain, unsigned bus, unsigned dev, unsigned func)
644 {
645 struct hwloc_pcidev_attr_s busid;
646 hwloc_obj_t parent;
647
648 /* try to find that exact busid */
649 parent = hwloc_pci_find_by_busid(topology, domain, bus, dev, func);
650 if (parent)
651 return parent;
652
653 /* try to find the locality of that bus instead */
654 busid.domain = domain;
655 busid.bus = bus;
656 busid.dev = dev;
657 busid.func = func;
658 return hwloc__pci_find_busid_parent(topology, &busid);
659 }
660
661 /* return the smallest object that contains the desired busid */
662 static struct hwloc_obj *
hwloc__pci_find_by_busid(hwloc_obj_t parent,unsigned domain,unsigned bus,unsigned dev,unsigned func)663 hwloc__pci_find_by_busid(hwloc_obj_t parent,
664 unsigned domain, unsigned bus, unsigned dev, unsigned func)
665 {
666 hwloc_obj_t child;
667
668 for_each_io_child(child, parent) {
669 if (child->type == HWLOC_OBJ_PCI_DEVICE
670 || (child->type == HWLOC_OBJ_BRIDGE
671 && child->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI)) {
672 if (child->attr->pcidev.domain == domain
673 && child->attr->pcidev.bus == bus
674 && child->attr->pcidev.dev == dev
675 && child->attr->pcidev.func == func)
676 /* that's the right bus id */
677 return child;
678 if (child->attr->pcidev.domain > domain
679 || (child->attr->pcidev.domain == domain
680 && child->attr->pcidev.bus > bus))
681 /* bus id too high, won't find anything later, return parent */
682 return parent;
683 if (child->type == HWLOC_OBJ_BRIDGE
684 && child->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI
685 && child->attr->bridge.downstream.pci.domain == domain
686 && child->attr->bridge.downstream.pci.secondary_bus <= bus
687 && child->attr->bridge.downstream.pci.subordinate_bus >= bus)
688 /* not the right bus id, but it's included in the bus below that bridge */
689 return hwloc__pci_find_by_busid(child, domain, bus, dev, func);
690
691 } else if (child->type == HWLOC_OBJ_BRIDGE
692 && child->attr->bridge.upstream_type != HWLOC_OBJ_BRIDGE_PCI
693 && child->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI
694 /* non-PCI to PCI bridge, just look at the subordinate bus */
695 && child->attr->bridge.downstream.pci.domain == domain
696 && child->attr->bridge.downstream.pci.secondary_bus <= bus
697 && child->attr->bridge.downstream.pci.subordinate_bus >= bus) {
698 /* contains our bus, recurse */
699 return hwloc__pci_find_by_busid(child, domain, bus, dev, func);
700 }
701 }
702 /* didn't find anything, return parent */
703 return parent;
704 }
705
706 struct hwloc_obj *
hwloc_pci_find_by_busid(struct hwloc_topology * topology,unsigned domain,unsigned bus,unsigned dev,unsigned func)707 hwloc_pci_find_by_busid(struct hwloc_topology *topology,
708 unsigned domain, unsigned bus, unsigned dev, unsigned func)
709 {
710 struct hwloc_pci_locality_s *loc;
711 hwloc_obj_t root = hwloc_get_root_obj(topology);
712 hwloc_obj_t parent = NULL;
713
714 hwloc_debug("pcidisc looking for bus id %04x:%02x:%02x.%01x\n", domain, bus, dev, func);
715 loc = topology->first_pci_locality;
716 while (loc) {
717 if (loc->domain == domain && loc->bus_min <= bus && loc->bus_max >= bus) {
718 parent = loc->parent;
719 assert(parent);
720 hwloc_debug(" found pci locality for %04x:[%02x:%02x]\n",
721 loc->domain, loc->bus_min, loc->bus_max);
722 break;
723 }
724 loc = loc->next;
725 }
726 /* if we failed to insert localities, look at root too */
727 if (!parent)
728 parent = root;
729
730 hwloc_debug(" looking for bus %04x:%02x:%02x.%01x below %s P#%u\n",
731 domain, bus, dev, func,
732 hwloc_obj_type_string(parent->type), parent->os_index);
733 parent = hwloc__pci_find_by_busid(parent, domain, bus, dev, func);
734 if (parent == root) {
735 hwloc_debug(" found nothing better than root object, ignoring\n");
736 return NULL;
737 } else {
738 if (parent->type == HWLOC_OBJ_PCI_DEVICE
739 || (parent->type == HWLOC_OBJ_BRIDGE && parent->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI))
740 hwloc_debug(" found busid %04x:%02x:%02x.%01x\n",
741 parent->attr->pcidev.domain, parent->attr->pcidev.bus,
742 parent->attr->pcidev.dev, parent->attr->pcidev.func);
743 else
744 hwloc_debug(" found parent %s P#%u\n",
745 hwloc_obj_type_string(parent->type), parent->os_index);
746 return parent;
747 }
748 }
749
750
751 /*******************************
752 * Parsing the PCI Config Space
753 */
754
755 #define HWLOC_PCI_STATUS 0x06
756 #define HWLOC_PCI_STATUS_CAP_LIST 0x10
757 #define HWLOC_PCI_CAPABILITY_LIST 0x34
758 #define HWLOC_PCI_CAP_LIST_ID 0
759 #define HWLOC_PCI_CAP_LIST_NEXT 1
760
761 unsigned
hwloc_pcidisc_find_cap(const unsigned char * config,unsigned cap)762 hwloc_pcidisc_find_cap(const unsigned char *config, unsigned cap)
763 {
764 unsigned char seen[256] = { 0 };
765 unsigned char ptr; /* unsigned char to make sure we stay within the 256-byte config space */
766
767 if (!(config[HWLOC_PCI_STATUS] & HWLOC_PCI_STATUS_CAP_LIST))
768 return 0;
769
770 for (ptr = config[HWLOC_PCI_CAPABILITY_LIST] & ~3;
771 ptr; /* exit if next is 0 */
772 ptr = config[ptr + HWLOC_PCI_CAP_LIST_NEXT] & ~3) {
773 unsigned char id;
774
775 /* Looped around! */
776 if (seen[ptr])
777 break;
778 seen[ptr] = 1;
779
780 id = config[ptr + HWLOC_PCI_CAP_LIST_ID];
781 if (id == cap)
782 return ptr;
783 if (id == 0xff) /* exit if id is 0 or 0xff */
784 break;
785 }
786 return 0;
787 }
788
789 #define HWLOC_PCI_EXP_LNKSTA 0x12
790 #define HWLOC_PCI_EXP_LNKSTA_SPEED 0x000f
791 #define HWLOC_PCI_EXP_LNKSTA_WIDTH 0x03f0
792
793 int
hwloc_pcidisc_find_linkspeed(const unsigned char * config,unsigned offset,float * linkspeed)794 hwloc_pcidisc_find_linkspeed(const unsigned char *config,
795 unsigned offset, float *linkspeed)
796 {
797 unsigned linksta, speed, width;
798 float lanespeed;
799
800 memcpy(&linksta, &config[offset + HWLOC_PCI_EXP_LNKSTA], 4);
801 speed = linksta & HWLOC_PCI_EXP_LNKSTA_SPEED; /* PCIe generation */
802 width = (linksta & HWLOC_PCI_EXP_LNKSTA_WIDTH) >> 4; /* how many lanes */
803 /* PCIe Gen1 = 2.5GT/s signal-rate per lane with 8/10 encoding = 0.25GB/s data-rate per lane
804 * PCIe Gen2 = 5 GT/s signal-rate per lane with 8/10 encoding = 0.5 GB/s data-rate per lane
805 * PCIe Gen3 = 8 GT/s signal-rate per lane with 128/130 encoding = 1 GB/s data-rate per lane
806 * PCIe Gen4 = 16 GT/s signal-rate per lane with 128/130 encoding = 2 GB/s data-rate per lane
807 * PCIe Gen5 = 32 GT/s signal-rate per lane with 128/130 encoding = 4 GB/s data-rate per lane
808 */
809
810 /* lanespeed in Gbit/s */
811 if (speed <= 2)
812 lanespeed = 2.5f * speed * 0.8f;
813 else
814 lanespeed = 8.0f * (1<<(speed-3)) * 128/130; /* assume Gen6 will be 64 GT/s and so on */
815
816 /* linkspeed in GB/s */
817 *linkspeed = lanespeed * width / 8;
818 return 0;
819 }
820
821 #define HWLOC_PCI_HEADER_TYPE 0x0e
822 #define HWLOC_PCI_HEADER_TYPE_BRIDGE 1
823 #define HWLOC_PCI_CLASS_BRIDGE_PCI 0x0604
824
825 hwloc_obj_type_t
hwloc_pcidisc_check_bridge_type(unsigned device_class,const unsigned char * config)826 hwloc_pcidisc_check_bridge_type(unsigned device_class, const unsigned char *config)
827 {
828 unsigned char headertype;
829
830 if (device_class != HWLOC_PCI_CLASS_BRIDGE_PCI)
831 return HWLOC_OBJ_PCI_DEVICE;
832
833 headertype = config[HWLOC_PCI_HEADER_TYPE] & 0x7f;
834 return (headertype == HWLOC_PCI_HEADER_TYPE_BRIDGE)
835 ? HWLOC_OBJ_BRIDGE : HWLOC_OBJ_PCI_DEVICE;
836 }
837
838 #define HWLOC_PCI_PRIMARY_BUS 0x18
839 #define HWLOC_PCI_SECONDARY_BUS 0x19
840 #define HWLOC_PCI_SUBORDINATE_BUS 0x1a
841
842 int
hwloc_pcidisc_find_bridge_buses(unsigned domain,unsigned bus,unsigned dev,unsigned func,unsigned * secondary_busp,unsigned * subordinate_busp,const unsigned char * config)843 hwloc_pcidisc_find_bridge_buses(unsigned domain, unsigned bus, unsigned dev, unsigned func,
844 unsigned *secondary_busp, unsigned *subordinate_busp,
845 const unsigned char *config)
846 {
847 unsigned secondary_bus, subordinate_bus;
848
849 if (config[HWLOC_PCI_PRIMARY_BUS] != bus) {
850 /* Sometimes the config space contains 00 instead of the actual primary bus number.
851 * Always trust the bus ID because it was built by the system which has more information
852 * to workaround such problems (e.g. ACPI information about PCI parent/children).
853 */
854 hwloc_debug(" %04x:%02x:%02x.%01x bridge with (ignored) invalid PCI_PRIMARY_BUS %02x\n",
855 domain, bus, dev, func, config[HWLOC_PCI_PRIMARY_BUS]);
856 }
857
858 secondary_bus = config[HWLOC_PCI_SECONDARY_BUS];
859 subordinate_bus = config[HWLOC_PCI_SUBORDINATE_BUS];
860
861 if (secondary_bus <= bus
862 || subordinate_bus <= bus
863 || secondary_bus > subordinate_bus) {
864 /* This should catch most cases of invalid bridge information
865 * (e.g. 00 for secondary and subordinate).
866 * Ideally we would also check that [secondary-subordinate] is included
867 * in the parent bridge [secondary+1:subordinate]. But that's hard to do
868 * because objects may be discovered out of order (especially in the fsroot case).
869 */
870 hwloc_debug(" %04x:%02x:%02x.%01x bridge has invalid secondary-subordinate buses [%02x-%02x]\n",
871 domain, bus, dev, func,
872 secondary_bus, subordinate_bus);
873 return -1;
874 }
875
876 *secondary_busp = secondary_bus;
877 *subordinate_busp = subordinate_bus;
878 return 0;
879 }
880
881
882 /****************
883 * Class Strings
884 */
885
886 const char *
hwloc_pci_class_string(unsigned short class_id)887 hwloc_pci_class_string(unsigned short class_id)
888 {
889 /* See https://pci-ids.ucw.cz/read/PD/ */
890 switch ((class_id & 0xff00) >> 8) {
891 case 0x00:
892 switch (class_id) {
893 case 0x0001: return "VGA";
894 }
895 break;
896 case 0x01:
897 switch (class_id) {
898 case 0x0100: return "SCSI";
899 case 0x0101: return "IDE";
900 case 0x0102: return "Floppy";
901 case 0x0103: return "IPI";
902 case 0x0104: return "RAID";
903 case 0x0105: return "ATA";
904 case 0x0106: return "SATA";
905 case 0x0107: return "SAS";
906 case 0x0108: return "NVMExp";
907 }
908 return "Storage";
909 case 0x02:
910 switch (class_id) {
911 case 0x0200: return "Ethernet";
912 case 0x0201: return "TokenRing";
913 case 0x0202: return "FDDI";
914 case 0x0203: return "ATM";
915 case 0x0204: return "ISDN";
916 case 0x0205: return "WorldFip";
917 case 0x0206: return "PICMG";
918 case 0x0207: return "InfiniBand";
919 case 0x0208: return "Fabric";
920 }
921 return "Network";
922 case 0x03:
923 switch (class_id) {
924 case 0x0300: return "VGA";
925 case 0x0301: return "XGA";
926 case 0x0302: return "3D";
927 }
928 return "Display";
929 case 0x04:
930 switch (class_id) {
931 case 0x0400: return "MultimediaVideo";
932 case 0x0401: return "MultimediaAudio";
933 case 0x0402: return "Telephony";
934 case 0x0403: return "AudioDevice";
935 }
936 return "Multimedia";
937 case 0x05:
938 switch (class_id) {
939 case 0x0500: return "RAM";
940 case 0x0501: return "Flash";
941 }
942 return "Memory";
943 case 0x06:
944 switch (class_id) {
945 case 0x0600: return "HostBridge";
946 case 0x0601: return "ISABridge";
947 case 0x0602: return "EISABridge";
948 case 0x0603: return "MicroChannelBridge";
949 case 0x0604: return "PCIBridge";
950 case 0x0605: return "PCMCIABridge";
951 case 0x0606: return "NubusBridge";
952 case 0x0607: return "CardBusBridge";
953 case 0x0608: return "RACEwayBridge";
954 case 0x0609: return "SemiTransparentPCIBridge";
955 case 0x060a: return "InfiniBandPCIHostBridge";
956 }
957 return "Bridge";
958 case 0x07:
959 switch (class_id) {
960 case 0x0700: return "Serial";
961 case 0x0701: return "Parallel";
962 case 0x0702: return "MultiportSerial";
963 case 0x0703: return "Model";
964 case 0x0704: return "GPIB";
965 case 0x0705: return "SmartCard";
966 }
967 return "Communication";
968 case 0x08:
969 switch (class_id) {
970 case 0x0800: return "PIC";
971 case 0x0801: return "DMA";
972 case 0x0802: return "Timer";
973 case 0x0803: return "RTC";
974 case 0x0804: return "PCIHotPlug";
975 case 0x0805: return "SDHost";
976 case 0x0806: return "IOMMU";
977 }
978 return "SystemPeripheral";
979 case 0x09:
980 switch (class_id) {
981 case 0x0900: return "Keyboard";
982 case 0x0901: return "DigitizerPen";
983 case 0x0902: return "Mouse";
984 case 0x0903: return "Scanern";
985 case 0x0904: return "Gameport";
986 }
987 return "Input";
988 case 0x0a:
989 return "DockingStation";
990 case 0x0b:
991 switch (class_id) {
992 case 0x0b00: return "386";
993 case 0x0b01: return "486";
994 case 0x0b02: return "Pentium";
995 /* 0x0b03 and 0x0b04 might be Pentium and P6 ? */
996 case 0x0b10: return "Alpha";
997 case 0x0b20: return "PowerPC";
998 case 0x0b30: return "MIPS";
999 case 0x0b40: return "Co-Processor";
1000 }
1001 return "Processor";
1002 case 0x0c:
1003 switch (class_id) {
1004 case 0x0c00: return "FireWire";
1005 case 0x0c01: return "ACCESS";
1006 case 0x0c02: return "SSA";
1007 case 0x0c03: return "USB";
1008 case 0x0c04: return "FibreChannel";
1009 case 0x0c05: return "SMBus";
1010 case 0x0c06: return "InfiniBand";
1011 case 0x0c07: return "IPMI-SMIC";
1012 case 0x0c08: return "SERCOS";
1013 case 0x0c09: return "CANBUS";
1014 }
1015 return "SerialBus";
1016 case 0x0d:
1017 switch (class_id) {
1018 case 0x0d00: return "IRDA";
1019 case 0x0d01: return "ConsumerIR";
1020 case 0x0d10: return "RF";
1021 case 0x0d11: return "Bluetooth";
1022 case 0x0d12: return "Broadband";
1023 case 0x0d20: return "802.1a";
1024 case 0x0d21: return "802.1b";
1025 }
1026 return "Wireless";
1027 case 0x0e:
1028 switch (class_id) {
1029 case 0x0e00: return "I2O";
1030 }
1031 return "Intelligent";
1032 case 0x0f:
1033 return "Satellite";
1034 case 0x10:
1035 return "Encryption";
1036 case 0x11:
1037 return "SignalProcessing";
1038 case 0x12:
1039 return "ProcessingAccelerator";
1040 case 0x13:
1041 return "Instrumentation";
1042 case 0x40:
1043 return "Co-Processor";
1044 }
1045 return "Other";
1046 }
1047