1 /*
2  * Copyright © 2009-2020 Inria.  All rights reserved.
3  * See COPYING in top-level directory.
4  */
5 
6 #include "private/autogen/config.h"
7 #include "hwloc.h"
8 #include "hwloc/plugins.h"
9 #include "private/private.h"
10 #include "private/debug.h"
11 #include "private/misc.h"
12 
13 #include <fcntl.h>
14 #ifdef HAVE_UNISTD_H
15 #include <unistd.h>
16 #endif
17 #include <sys/stat.h>
18 
19 #if defined(HWLOC_WIN_SYS) && !defined(__CYGWIN__)
20 #include <io.h>
21 #define open _open
22 #define read _read
23 #define close _close
24 #endif
25 
26 
27 /**************************************
28  * Init/Exit and Forced PCI localities
29  */
30 
31 static void
hwloc_pci_forced_locality_parse_one(struct hwloc_topology * topology,const char * string,unsigned * allocated)32 hwloc_pci_forced_locality_parse_one(struct hwloc_topology *topology,
33 				    const char *string /* must contain a ' ' */,
34 				    unsigned *allocated)
35 {
36   unsigned nr = topology->pci_forced_locality_nr;
37   unsigned domain, bus_first, bus_last, dummy;
38   hwloc_bitmap_t set;
39   char *tmp;
40 
41   if (sscanf(string, "%x:%x-%x %x", &domain, &bus_first, &bus_last, &dummy) == 4) {
42     /* fine */
43   } else if (sscanf(string, "%x:%x %x", &domain, &bus_first, &dummy) == 3) {
44     bus_last = bus_first;
45   } else if (sscanf(string, "%x %x", &domain, &dummy) == 2) {
46     bus_first = 0;
47     bus_last = 255;
48   } else
49     return;
50 
51   tmp = strchr(string, ' ');
52   if (!tmp)
53     return;
54   tmp++;
55 
56   set = hwloc_bitmap_alloc();
57   hwloc_bitmap_sscanf(set, tmp);
58 
59   if (!*allocated) {
60     topology->pci_forced_locality = malloc(sizeof(*topology->pci_forced_locality));
61     if (!topology->pci_forced_locality)
62       goto out_with_set; /* failed to allocate, ignore this forced locality */
63     *allocated = 1;
64   } else if (nr >= *allocated) {
65     struct hwloc_pci_forced_locality_s *tmplocs;
66     tmplocs = realloc(topology->pci_forced_locality,
67 		      2 * *allocated * sizeof(*topology->pci_forced_locality));
68     if (!tmplocs)
69       goto out_with_set; /* failed to allocate, ignore this forced locality */
70     topology->pci_forced_locality = tmplocs;
71     *allocated *= 2;
72   }
73 
74   topology->pci_forced_locality[nr].domain = domain;
75   topology->pci_forced_locality[nr].bus_first = bus_first;
76   topology->pci_forced_locality[nr].bus_last = bus_last;
77   topology->pci_forced_locality[nr].cpuset = set;
78   topology->pci_forced_locality_nr++;
79   return;
80 
81  out_with_set:
82   hwloc_bitmap_free(set);
83   return;
84 }
85 
86 static void
hwloc_pci_forced_locality_parse(struct hwloc_topology * topology,const char * _env)87 hwloc_pci_forced_locality_parse(struct hwloc_topology *topology, const char *_env)
88 {
89   char *env = strdup(_env);
90   unsigned allocated = 0;
91   char *tmp = env;
92 
93   while (1) {
94     size_t len = strcspn(tmp, ";\r\n");
95     char *next = NULL;
96 
97     if (tmp[len] != '\0') {
98       tmp[len] = '\0';
99       if (tmp[len+1] != '\0')
100 	next = &tmp[len]+1;
101     }
102 
103     hwloc_pci_forced_locality_parse_one(topology, tmp, &allocated);
104 
105     if (next)
106       tmp = next;
107     else
108       break;
109   }
110 
111   free(env);
112 }
113 
114 void
hwloc_pci_discovery_init(struct hwloc_topology * topology)115 hwloc_pci_discovery_init(struct hwloc_topology *topology)
116 {
117   topology->pci_has_forced_locality = 0;
118   topology->pci_forced_locality_nr = 0;
119   topology->pci_forced_locality = NULL;
120 
121   topology->first_pci_locality = topology->last_pci_locality = NULL;
122 }
123 
124 void
hwloc_pci_discovery_prepare(struct hwloc_topology * topology)125 hwloc_pci_discovery_prepare(struct hwloc_topology *topology)
126 {
127   char *env;
128 
129   env = getenv("HWLOC_PCI_LOCALITY");
130   if (env) {
131     int fd;
132 
133     topology->pci_has_forced_locality = 1;
134 
135     fd = open(env, O_RDONLY);
136     if (fd >= 0) {
137       struct stat st;
138       char *buffer;
139       int err = fstat(fd, &st);
140       if (!err) {
141 	if (st.st_size <= 64*1024) { /* random limit large enough to store multiple cpusets for thousands of PUs */
142 	  buffer = malloc(st.st_size+1);
143 	  if (buffer && read(fd, buffer, st.st_size) == st.st_size) {
144 	    buffer[st.st_size] = '\0';
145 	    hwloc_pci_forced_locality_parse(topology, buffer);
146 	  }
147 	  free(buffer);
148 	} else {
149 	  fprintf(stderr, "Ignoring HWLOC_PCI_LOCALITY file `%s' too large (%lu bytes)\n",
150 		  env, (unsigned long) st.st_size);
151 	}
152       }
153       close(fd);
154     } else
155       hwloc_pci_forced_locality_parse(topology, env);
156   }
157 }
158 
159 void
hwloc_pci_discovery_exit(struct hwloc_topology * topology)160 hwloc_pci_discovery_exit(struct hwloc_topology *topology)
161 {
162   struct hwloc_pci_locality_s *cur;
163   unsigned i;
164 
165   for(i=0; i<topology->pci_forced_locality_nr; i++)
166     hwloc_bitmap_free(topology->pci_forced_locality[i].cpuset);
167   free(topology->pci_forced_locality);
168 
169   cur = topology->first_pci_locality;
170   while (cur) {
171     struct hwloc_pci_locality_s *next = cur->next;
172     hwloc_bitmap_free(cur->cpuset);
173     free(cur);
174     cur = next;
175   }
176 
177   hwloc_pci_discovery_init(topology);
178 }
179 
180 
181 /******************************
182  * Inserting in Tree by Bus ID
183  */
184 
185 #ifdef HWLOC_DEBUG
186 static void
hwloc_pci_traverse_print_cb(void * cbdata __hwloc_attribute_unused,struct hwloc_obj * pcidev)187 hwloc_pci_traverse_print_cb(void * cbdata __hwloc_attribute_unused,
188 			    struct hwloc_obj *pcidev)
189 {
190   char busid[14];
191   hwloc_obj_t parent;
192 
193   /* indent */
194   parent = pcidev->parent;
195   while (parent) {
196     hwloc_debug("%s", "  ");
197     parent = parent->parent;
198   }
199 
200   snprintf(busid, sizeof(busid), "%04x:%02x:%02x.%01x",
201            pcidev->attr->pcidev.domain, pcidev->attr->pcidev.bus, pcidev->attr->pcidev.dev, pcidev->attr->pcidev.func);
202 
203   if (pcidev->type == HWLOC_OBJ_BRIDGE) {
204     if (pcidev->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_HOST)
205       hwloc_debug("HostBridge");
206     else
207       hwloc_debug("%s Bridge [%04x:%04x]", busid,
208 		  pcidev->attr->pcidev.vendor_id, pcidev->attr->pcidev.device_id);
209     hwloc_debug(" to %04x:[%02x:%02x]\n",
210 		pcidev->attr->bridge.downstream.pci.domain, pcidev->attr->bridge.downstream.pci.secondary_bus, pcidev->attr->bridge.downstream.pci.subordinate_bus);
211   } else
212     hwloc_debug("%s Device [%04x:%04x (%04x:%04x) rev=%02x class=%04x]\n", busid,
213 		pcidev->attr->pcidev.vendor_id, pcidev->attr->pcidev.device_id,
214 		pcidev->attr->pcidev.subvendor_id, pcidev->attr->pcidev.subdevice_id,
215 		pcidev->attr->pcidev.revision, pcidev->attr->pcidev.class_id);
216 }
217 
218 static void
hwloc_pci_traverse(void * cbdata,struct hwloc_obj * tree,void (* cb)(void * cbdata,struct hwloc_obj *))219 hwloc_pci_traverse(void * cbdata, struct hwloc_obj *tree,
220 		   void (*cb)(void * cbdata, struct hwloc_obj *))
221 {
222   hwloc_obj_t child;
223   cb(cbdata, tree);
224   for_each_io_child(child, tree) {
225     if (child->type == HWLOC_OBJ_BRIDGE)
226       hwloc_pci_traverse(cbdata, child, cb);
227   }
228 }
229 #endif /* HWLOC_DEBUG */
230 
231 enum hwloc_pci_busid_comparison_e {
232   HWLOC_PCI_BUSID_LOWER,
233   HWLOC_PCI_BUSID_HIGHER,
234   HWLOC_PCI_BUSID_INCLUDED,
235   HWLOC_PCI_BUSID_SUPERSET,
236   HWLOC_PCI_BUSID_EQUAL
237 };
238 
239 static enum hwloc_pci_busid_comparison_e
hwloc_pci_compare_busids(struct hwloc_obj * a,struct hwloc_obj * b)240 hwloc_pci_compare_busids(struct hwloc_obj *a, struct hwloc_obj *b)
241 {
242 #ifdef HWLOC_DEBUG
243   if (a->type == HWLOC_OBJ_BRIDGE)
244     assert(a->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI);
245   if (b->type == HWLOC_OBJ_BRIDGE)
246     assert(b->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI);
247 #endif
248 
249   if (a->attr->pcidev.domain < b->attr->pcidev.domain)
250     return HWLOC_PCI_BUSID_LOWER;
251   if (a->attr->pcidev.domain > b->attr->pcidev.domain)
252     return HWLOC_PCI_BUSID_HIGHER;
253 
254   if (a->type == HWLOC_OBJ_BRIDGE
255       && b->attr->pcidev.bus >= a->attr->bridge.downstream.pci.secondary_bus
256       && b->attr->pcidev.bus <= a->attr->bridge.downstream.pci.subordinate_bus)
257     return HWLOC_PCI_BUSID_SUPERSET;
258   if (b->type == HWLOC_OBJ_BRIDGE
259       && a->attr->pcidev.bus >= b->attr->bridge.downstream.pci.secondary_bus
260       && a->attr->pcidev.bus <= b->attr->bridge.downstream.pci.subordinate_bus)
261     return HWLOC_PCI_BUSID_INCLUDED;
262 
263   if (a->attr->pcidev.bus < b->attr->pcidev.bus)
264     return HWLOC_PCI_BUSID_LOWER;
265   if (a->attr->pcidev.bus > b->attr->pcidev.bus)
266     return HWLOC_PCI_BUSID_HIGHER;
267 
268   if (a->attr->pcidev.dev < b->attr->pcidev.dev)
269     return HWLOC_PCI_BUSID_LOWER;
270   if (a->attr->pcidev.dev > b->attr->pcidev.dev)
271     return HWLOC_PCI_BUSID_HIGHER;
272 
273   if (a->attr->pcidev.func < b->attr->pcidev.func)
274     return HWLOC_PCI_BUSID_LOWER;
275   if (a->attr->pcidev.func > b->attr->pcidev.func)
276     return HWLOC_PCI_BUSID_HIGHER;
277 
278   /* Should never reach here. */
279   return HWLOC_PCI_BUSID_EQUAL;
280 }
281 
282 static void
hwloc_pci_add_object(struct hwloc_obj * parent,struct hwloc_obj ** parent_io_first_child_p,struct hwloc_obj * new)283 hwloc_pci_add_object(struct hwloc_obj *parent, struct hwloc_obj **parent_io_first_child_p, struct hwloc_obj *new)
284 {
285   struct hwloc_obj **curp, **childp;
286 
287   curp = parent_io_first_child_p;
288   while (*curp) {
289     enum hwloc_pci_busid_comparison_e comp = hwloc_pci_compare_busids(new, *curp);
290     switch (comp) {
291     case HWLOC_PCI_BUSID_HIGHER:
292       /* go further */
293       curp = &(*curp)->next_sibling;
294       continue;
295     case HWLOC_PCI_BUSID_INCLUDED:
296       /* insert new below current bridge */
297       hwloc_pci_add_object(*curp, &(*curp)->io_first_child, new);
298       return;
299     case HWLOC_PCI_BUSID_LOWER:
300     case HWLOC_PCI_BUSID_SUPERSET: {
301       /* insert new before current */
302       new->next_sibling = *curp;
303       *curp = new;
304       new->parent = parent;
305       if (new->type == HWLOC_OBJ_BRIDGE) {
306 	/* look at remaining siblings and move some below new */
307 	childp = &new->io_first_child;
308 	curp = &new->next_sibling;
309 	while (*curp) {
310 	  hwloc_obj_t cur = *curp;
311 	  if (hwloc_pci_compare_busids(new, cur) == HWLOC_PCI_BUSID_LOWER) {
312 	    /* this sibling remains under root, after new. */
313 	    if (cur->attr->pcidev.domain > new->attr->pcidev.domain
314 		|| cur->attr->pcidev.bus > new->attr->bridge.downstream.pci.subordinate_bus)
315 	      /* this sibling is even above new's subordinate bus, no other sibling could go below new */
316 	      return;
317 	    curp = &cur->next_sibling;
318 	  } else {
319 	    /* this sibling goes under new */
320 	    *childp = cur;
321 	    *curp = cur->next_sibling;
322 	    (*childp)->parent = new;
323 	    (*childp)->next_sibling = NULL;
324 	    childp = &(*childp)->next_sibling;
325 	  }
326 	}
327       }
328       return;
329     }
330     case HWLOC_PCI_BUSID_EQUAL: {
331       static int reported = 0;
332       if (!reported && !hwloc_hide_errors()) {
333         fprintf(stderr, "*********************************************************\n");
334         fprintf(stderr, "* hwloc %s received invalid PCI information.\n", HWLOC_VERSION);
335         fprintf(stderr, "*\n");
336         fprintf(stderr, "* Trying to insert PCI object %04x:%02x:%02x.%01x at %04x:%02x:%02x.%01x\n",
337                 new->attr->pcidev.domain, new->attr->pcidev.bus, new->attr->pcidev.dev, new->attr->pcidev.func,
338                 (*curp)->attr->pcidev.domain, (*curp)->attr->pcidev.bus, (*curp)->attr->pcidev.dev, (*curp)->attr->pcidev.func);
339         fprintf(stderr, "*\n");
340         fprintf(stderr, "* hwloc will now ignore this object and continue.\n");
341         fprintf(stderr, "*********************************************************\n");
342         reported = 1;
343       }
344       hwloc_free_unlinked_object(new);
345       return;
346     }
347     }
348   }
349   /* add to the end of the list if higher than everybody */
350   new->parent = parent;
351   new->next_sibling = NULL;
352   *curp = new;
353 }
354 
355 void
hwloc_pcidisc_tree_insert_by_busid(struct hwloc_obj ** treep,struct hwloc_obj * obj)356 hwloc_pcidisc_tree_insert_by_busid(struct hwloc_obj **treep,
357 				   struct hwloc_obj *obj)
358 {
359   hwloc_pci_add_object(NULL /* no parent on top of tree */, treep, obj);
360 }
361 
362 
363 /**********************
364  * Attaching PCI Trees
365  */
366 
367 static struct hwloc_obj *
hwloc_pcidisc_add_hostbridges(struct hwloc_topology * topology,struct hwloc_obj * old_tree)368 hwloc_pcidisc_add_hostbridges(struct hwloc_topology *topology,
369 			      struct hwloc_obj *old_tree)
370 {
371   struct hwloc_obj * new = NULL, **newp = &new;
372 
373   /*
374    * tree points to all objects connected to any upstream bus in the machine.
375    * We now create one real hostbridge object per upstream bus.
376    * It's not actually a PCI device so we have to create it.
377    */
378   while (old_tree) {
379     /* start a new host bridge */
380     struct hwloc_obj *hostbridge;
381     struct hwloc_obj **dstnextp;
382     struct hwloc_obj **srcnextp;
383     struct hwloc_obj *child;
384     unsigned current_domain;
385     unsigned char current_bus;
386     unsigned char current_subordinate;
387 
388     hostbridge = hwloc_alloc_setup_object(topology, HWLOC_OBJ_BRIDGE, HWLOC_UNKNOWN_INDEX);
389     if (!hostbridge) {
390       /* just queue remaining things without hostbridges and return */
391       *newp = old_tree;
392       return new;
393     }
394     dstnextp = &hostbridge->io_first_child;
395 
396     srcnextp = &old_tree;
397     child = *srcnextp;
398     current_domain = child->attr->pcidev.domain;
399     current_bus = child->attr->pcidev.bus;
400     current_subordinate = current_bus;
401 
402     hwloc_debug("Adding new PCI hostbridge %04x:%02x\n", current_domain, current_bus);
403 
404   next_child:
405     /* remove next child from tree */
406     *srcnextp = child->next_sibling;
407     /* append it to hostbridge */
408     *dstnextp = child;
409     child->parent = hostbridge;
410     child->next_sibling = NULL;
411     dstnextp = &child->next_sibling;
412 
413     /* compute hostbridge secondary/subordinate buses */
414     if (child->type == HWLOC_OBJ_BRIDGE
415 	&& child->attr->bridge.downstream.pci.subordinate_bus > current_subordinate)
416       current_subordinate = child->attr->bridge.downstream.pci.subordinate_bus;
417 
418     /* use next child if it has the same domains/bus */
419     child = *srcnextp;
420     if (child
421 	&& child->attr->pcidev.domain == current_domain
422 	&& child->attr->pcidev.bus == current_bus)
423       goto next_child;
424 
425     /* finish setting up this hostbridge */
426     hostbridge->attr->bridge.upstream_type = HWLOC_OBJ_BRIDGE_HOST;
427     hostbridge->attr->bridge.downstream_type = HWLOC_OBJ_BRIDGE_PCI;
428     hostbridge->attr->bridge.downstream.pci.domain = current_domain;
429     hostbridge->attr->bridge.downstream.pci.secondary_bus = current_bus;
430     hostbridge->attr->bridge.downstream.pci.subordinate_bus = current_subordinate;
431     hwloc_debug("  new PCI hostbridge covers %04x:[%02x-%02x]\n",
432 		current_domain, current_bus, current_subordinate);
433 
434     *newp = hostbridge;
435     newp = &hostbridge->next_sibling;
436   }
437 
438   return new;
439 }
440 
441 static struct hwloc_obj *
hwloc_pci_fixup_busid_parent(struct hwloc_topology * topology __hwloc_attribute_unused,struct hwloc_pcidev_attr_s * busid __hwloc_attribute_unused,struct hwloc_obj * parent __hwloc_attribute_unused)442 hwloc_pci_fixup_busid_parent(struct hwloc_topology *topology __hwloc_attribute_unused,
443 			     struct hwloc_pcidev_attr_s *busid __hwloc_attribute_unused,
444 			     struct hwloc_obj *parent __hwloc_attribute_unused)
445 {
446   /* no quirk for now */
447   return parent;
448 }
449 
450 static struct hwloc_obj *
hwloc__pci_find_busid_parent(struct hwloc_topology * topology,struct hwloc_pcidev_attr_s * busid)451 hwloc__pci_find_busid_parent(struct hwloc_topology *topology, struct hwloc_pcidev_attr_s *busid)
452 {
453   hwloc_bitmap_t cpuset = hwloc_bitmap_alloc();
454   hwloc_obj_t parent;
455   int forced = 0;
456   int noquirks = 0;
457   unsigned i;
458   int err;
459 
460   hwloc_debug("Looking for parent of PCI busid %04x:%02x:%02x.%01x\n",
461 	      busid->domain, busid->bus, busid->dev, busid->func);
462 
463   /* try to match a forced locality */
464   if (topology->pci_has_forced_locality) {
465     for(i=0; i<topology->pci_forced_locality_nr; i++) {
466       if (busid->domain == topology->pci_forced_locality[i].domain
467 	  && busid->bus >= topology->pci_forced_locality[i].bus_first
468 	  && busid->bus <= topology->pci_forced_locality[i].bus_last) {
469 	hwloc_bitmap_copy(cpuset, topology->pci_forced_locality[i].cpuset);
470 	forced = 1;
471 	break;
472       }
473     }
474     /* if pci locality was forced, even empty, don't let quirks change what the OS reports */
475     noquirks = 1;
476   }
477 
478   /* deprecated force locality variables */
479   if (!forced) {
480     const char *env;
481     char envname[256];
482     /* override the cpuset with the environment if given */
483     snprintf(envname, sizeof(envname), "HWLOC_PCI_%04x_%02x_LOCALCPUS",
484 	     busid->domain, busid->bus);
485     env = getenv(envname);
486     if (env) {
487       static int reported = 0;
488       if (!topology->pci_has_forced_locality && !reported) {
489 	fprintf(stderr, "Environment variable %s is deprecated, please use HWLOC_PCI_LOCALITY instead.\n", env);
490 	reported = 1;
491       }
492       if (*env) {
493 	/* force the cpuset */
494 	hwloc_debug("Overriding PCI locality using %s in the environment\n", envname);
495 	hwloc_bitmap_sscanf(cpuset, env);
496 	forced = 1;
497       }
498       /* if env exists, even empty, don't let quirks change what the OS reports */
499       noquirks = 1;
500     }
501   }
502 
503   if (!forced) {
504     /* get the cpuset by asking the backend that provides the relevant hook, if any. */
505     struct hwloc_backend *backend = topology->get_pci_busid_cpuset_backend;
506     if (backend)
507       err = backend->get_pci_busid_cpuset(backend, busid, cpuset);
508     else
509       err = -1;
510     if (err < 0)
511       /* if we got nothing, assume this PCI bus is attached to the top of hierarchy */
512       hwloc_bitmap_copy(cpuset, hwloc_topology_get_topology_cpuset(topology));
513   }
514 
515   hwloc_debug_bitmap("  will attach PCI bus to cpuset %s\n", cpuset);
516 
517   parent = hwloc_find_insert_io_parent_by_complete_cpuset(topology, cpuset);
518   if (parent) {
519     if (!noquirks)
520       /* We found a valid parent. Check that the OS didn't report invalid locality */
521       parent = hwloc_pci_fixup_busid_parent(topology, busid, parent);
522   } else {
523     /* Fallback to root */
524     parent = hwloc_get_root_obj(topology);
525   }
526 
527   hwloc_bitmap_free(cpuset);
528   return parent;
529 }
530 
531 int
hwloc_pcidisc_tree_attach(struct hwloc_topology * topology,struct hwloc_obj * tree)532 hwloc_pcidisc_tree_attach(struct hwloc_topology *topology, struct hwloc_obj *tree)
533 {
534   enum hwloc_type_filter_e bfilter;
535 
536   if (!tree)
537     /* found nothing, exit */
538     return 0;
539 
540 #ifdef HWLOC_DEBUG
541   hwloc_debug("%s", "\nPCI hierarchy:\n");
542   hwloc_pci_traverse(NULL, tree, hwloc_pci_traverse_print_cb);
543   hwloc_debug("%s", "\n");
544 #endif
545 
546   bfilter = topology->type_filter[HWLOC_OBJ_BRIDGE];
547   if (bfilter != HWLOC_TYPE_FILTER_KEEP_NONE) {
548     tree = hwloc_pcidisc_add_hostbridges(topology, tree);
549   }
550 
551   while (tree) {
552     struct hwloc_obj *obj, *pciobj;
553     struct hwloc_obj *parent;
554     struct hwloc_pci_locality_s *loc;
555     unsigned domain, bus_min, bus_max;
556 
557     obj = tree;
558 
559     /* hostbridges don't have a PCI busid for looking up locality, use their first child */
560     if (obj->type == HWLOC_OBJ_BRIDGE && obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_HOST)
561       pciobj = obj->io_first_child;
562     else
563       pciobj = obj;
564     /* now we have a pci device or a pci bridge */
565     assert(pciobj->type == HWLOC_OBJ_PCI_DEVICE
566 	   || (pciobj->type == HWLOC_OBJ_BRIDGE && pciobj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI));
567 
568     if (obj->type == HWLOC_OBJ_BRIDGE) {
569       domain = obj->attr->bridge.downstream.pci.domain;
570       bus_min = obj->attr->bridge.downstream.pci.secondary_bus;
571       bus_max = obj->attr->bridge.downstream.pci.subordinate_bus;
572     } else {
573       domain = pciobj->attr->pcidev.domain;
574       bus_min = pciobj->attr->pcidev.bus;
575       bus_max = pciobj->attr->pcidev.bus;
576     }
577 
578     /* find where to attach that PCI bus */
579     parent = hwloc__pci_find_busid_parent(topology, &pciobj->attr->pcidev);
580 
581     /* reuse the previous locality if possible */
582     if (topology->last_pci_locality
583 	&& parent == topology->last_pci_locality->parent
584 	&& domain == topology->last_pci_locality->domain
585 	&& (bus_min == topology->last_pci_locality->bus_max
586 	    || bus_min == topology->last_pci_locality->bus_max+1)) {
587       hwloc_debug("  Reusing PCI locality up to bus %04x:%02x\n",
588 		  domain, bus_max);
589       topology->last_pci_locality->bus_max = bus_max;
590       goto done;
591     }
592 
593     loc = malloc(sizeof(*loc));
594     if (!loc) {
595       /* fallback to attaching to root */
596       parent = hwloc_get_root_obj(topology);
597       goto done;
598     }
599 
600     loc->domain = domain;
601     loc->bus_min = bus_min;
602     loc->bus_max = bus_max;
603     loc->parent = parent;
604     loc->cpuset = hwloc_bitmap_dup(parent->cpuset);
605     if (!loc->cpuset) {
606       /* fallback to attaching to root */
607       free(loc);
608       parent = hwloc_get_root_obj(topology);
609       goto done;
610     }
611 
612     hwloc_debug("Adding PCI locality %s P#%u for bus %04x:[%02x:%02x]\n",
613 		hwloc_obj_type_string(parent->type), parent->os_index, loc->domain, loc->bus_min, loc->bus_max);
614     if (topology->last_pci_locality) {
615       loc->prev = topology->last_pci_locality;
616       loc->next = NULL;
617       topology->last_pci_locality->next = loc;
618       topology->last_pci_locality = loc;
619     } else {
620       loc->prev = NULL;
621       loc->next = NULL;
622       topology->first_pci_locality = loc;
623       topology->last_pci_locality = loc;
624     }
625 
626   done:
627     /* dequeue this object */
628     tree = obj->next_sibling;
629     obj->next_sibling = NULL;
630     hwloc_insert_object_by_parent(topology, parent, obj);
631   }
632 
633   return 0;
634 }
635 
636 
637 /*********************************
638  * Finding PCI objects or parents
639  */
640 
641 struct hwloc_obj *
hwloc_pci_find_parent_by_busid(struct hwloc_topology * topology,unsigned domain,unsigned bus,unsigned dev,unsigned func)642 hwloc_pci_find_parent_by_busid(struct hwloc_topology *topology,
643 			       unsigned domain, unsigned bus, unsigned dev, unsigned func)
644 {
645   struct hwloc_pcidev_attr_s busid;
646   hwloc_obj_t parent;
647 
648   /* try to find that exact busid */
649   parent = hwloc_pci_find_by_busid(topology, domain, bus, dev, func);
650   if (parent)
651     return parent;
652 
653   /* try to find the locality of that bus instead */
654   busid.domain = domain;
655   busid.bus = bus;
656   busid.dev = dev;
657   busid.func = func;
658   return hwloc__pci_find_busid_parent(topology, &busid);
659 }
660 
661 /* return the smallest object that contains the desired busid */
662 static struct hwloc_obj *
hwloc__pci_find_by_busid(hwloc_obj_t parent,unsigned domain,unsigned bus,unsigned dev,unsigned func)663 hwloc__pci_find_by_busid(hwloc_obj_t parent,
664 			 unsigned domain, unsigned bus, unsigned dev, unsigned func)
665 {
666   hwloc_obj_t child;
667 
668   for_each_io_child(child, parent) {
669     if (child->type == HWLOC_OBJ_PCI_DEVICE
670 	|| (child->type == HWLOC_OBJ_BRIDGE
671 	    && child->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI)) {
672       if (child->attr->pcidev.domain == domain
673 	  && child->attr->pcidev.bus == bus
674 	  && child->attr->pcidev.dev == dev
675 	  && child->attr->pcidev.func == func)
676 	/* that's the right bus id */
677 	return child;
678       if (child->attr->pcidev.domain > domain
679 	  || (child->attr->pcidev.domain == domain
680 	      && child->attr->pcidev.bus > bus))
681 	/* bus id too high, won't find anything later, return parent */
682 	return parent;
683       if (child->type == HWLOC_OBJ_BRIDGE
684 	  && child->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI
685 	  && child->attr->bridge.downstream.pci.domain == domain
686 	  && child->attr->bridge.downstream.pci.secondary_bus <= bus
687 	  && child->attr->bridge.downstream.pci.subordinate_bus >= bus)
688 	/* not the right bus id, but it's included in the bus below that bridge */
689 	return hwloc__pci_find_by_busid(child, domain, bus, dev, func);
690 
691     } else if (child->type == HWLOC_OBJ_BRIDGE
692 	       && child->attr->bridge.upstream_type != HWLOC_OBJ_BRIDGE_PCI
693 	       && child->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI
694 	       /* non-PCI to PCI bridge, just look at the subordinate bus */
695 	       && child->attr->bridge.downstream.pci.domain == domain
696 	       && child->attr->bridge.downstream.pci.secondary_bus <= bus
697 	       && child->attr->bridge.downstream.pci.subordinate_bus >= bus) {
698       /* contains our bus, recurse */
699       return hwloc__pci_find_by_busid(child, domain, bus, dev, func);
700     }
701   }
702   /* didn't find anything, return parent */
703   return parent;
704 }
705 
706 struct hwloc_obj *
hwloc_pci_find_by_busid(struct hwloc_topology * topology,unsigned domain,unsigned bus,unsigned dev,unsigned func)707 hwloc_pci_find_by_busid(struct hwloc_topology *topology,
708 			unsigned domain, unsigned bus, unsigned dev, unsigned func)
709 {
710   struct hwloc_pci_locality_s *loc;
711   hwloc_obj_t root = hwloc_get_root_obj(topology);
712   hwloc_obj_t parent = NULL;
713 
714   hwloc_debug("pcidisc looking for bus id %04x:%02x:%02x.%01x\n", domain, bus, dev, func);
715   loc = topology->first_pci_locality;
716   while (loc) {
717     if (loc->domain == domain && loc->bus_min <= bus && loc->bus_max >= bus) {
718       parent = loc->parent;
719       assert(parent);
720       hwloc_debug("  found pci locality for %04x:[%02x:%02x]\n",
721 		  loc->domain, loc->bus_min, loc->bus_max);
722       break;
723     }
724     loc = loc->next;
725   }
726   /* if we failed to insert localities, look at root too */
727   if (!parent)
728     parent = root;
729 
730   hwloc_debug("  looking for bus %04x:%02x:%02x.%01x below %s P#%u\n",
731 	      domain, bus, dev, func,
732 	      hwloc_obj_type_string(parent->type), parent->os_index);
733   parent = hwloc__pci_find_by_busid(parent, domain, bus, dev, func);
734   if (parent == root) {
735     hwloc_debug("  found nothing better than root object, ignoring\n");
736     return NULL;
737   } else {
738     if (parent->type == HWLOC_OBJ_PCI_DEVICE
739 	|| (parent->type == HWLOC_OBJ_BRIDGE && parent->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI))
740       hwloc_debug("  found busid %04x:%02x:%02x.%01x\n",
741 		  parent->attr->pcidev.domain, parent->attr->pcidev.bus,
742 		  parent->attr->pcidev.dev, parent->attr->pcidev.func);
743     else
744       hwloc_debug("  found parent %s P#%u\n",
745 		  hwloc_obj_type_string(parent->type), parent->os_index);
746     return parent;
747   }
748 }
749 
750 
751 /*******************************
752  * Parsing the PCI Config Space
753  */
754 
755 #define HWLOC_PCI_STATUS 0x06
756 #define HWLOC_PCI_STATUS_CAP_LIST 0x10
757 #define HWLOC_PCI_CAPABILITY_LIST 0x34
758 #define HWLOC_PCI_CAP_LIST_ID 0
759 #define HWLOC_PCI_CAP_LIST_NEXT 1
760 
761 unsigned
hwloc_pcidisc_find_cap(const unsigned char * config,unsigned cap)762 hwloc_pcidisc_find_cap(const unsigned char *config, unsigned cap)
763 {
764   unsigned char seen[256] = { 0 };
765   unsigned char ptr; /* unsigned char to make sure we stay within the 256-byte config space */
766 
767   if (!(config[HWLOC_PCI_STATUS] & HWLOC_PCI_STATUS_CAP_LIST))
768     return 0;
769 
770   for (ptr = config[HWLOC_PCI_CAPABILITY_LIST] & ~3;
771        ptr; /* exit if next is 0 */
772        ptr = config[ptr + HWLOC_PCI_CAP_LIST_NEXT] & ~3) {
773     unsigned char id;
774 
775     /* Looped around! */
776     if (seen[ptr])
777       break;
778     seen[ptr] = 1;
779 
780     id = config[ptr + HWLOC_PCI_CAP_LIST_ID];
781     if (id == cap)
782       return ptr;
783     if (id == 0xff) /* exit if id is 0 or 0xff */
784       break;
785   }
786   return 0;
787 }
788 
789 #define HWLOC_PCI_EXP_LNKSTA 0x12
790 #define HWLOC_PCI_EXP_LNKSTA_SPEED 0x000f
791 #define HWLOC_PCI_EXP_LNKSTA_WIDTH 0x03f0
792 
793 int
hwloc_pcidisc_find_linkspeed(const unsigned char * config,unsigned offset,float * linkspeed)794 hwloc_pcidisc_find_linkspeed(const unsigned char *config,
795 			     unsigned offset, float *linkspeed)
796 {
797   unsigned linksta, speed, width;
798   float lanespeed;
799 
800   memcpy(&linksta, &config[offset + HWLOC_PCI_EXP_LNKSTA], 4);
801   speed = linksta & HWLOC_PCI_EXP_LNKSTA_SPEED; /* PCIe generation */
802   width = (linksta & HWLOC_PCI_EXP_LNKSTA_WIDTH) >> 4; /* how many lanes */
803   /* PCIe Gen1 = 2.5GT/s signal-rate per lane with 8/10 encoding    = 0.25GB/s data-rate per lane
804    * PCIe Gen2 = 5  GT/s signal-rate per lane with 8/10 encoding    = 0.5 GB/s data-rate per lane
805    * PCIe Gen3 = 8  GT/s signal-rate per lane with 128/130 encoding = 1   GB/s data-rate per lane
806    * PCIe Gen4 = 16 GT/s signal-rate per lane with 128/130 encoding = 2   GB/s data-rate per lane
807    * PCIe Gen5 = 32 GT/s signal-rate per lane with 128/130 encoding = 4   GB/s data-rate per lane
808    */
809 
810   /* lanespeed in Gbit/s */
811   if (speed <= 2)
812     lanespeed = 2.5f * speed * 0.8f;
813   else
814     lanespeed = 8.0f * (1<<(speed-3)) * 128/130; /* assume Gen6 will be 64 GT/s and so on */
815 
816   /* linkspeed in GB/s */
817   *linkspeed = lanespeed * width / 8;
818   return 0;
819 }
820 
821 #define HWLOC_PCI_HEADER_TYPE 0x0e
822 #define HWLOC_PCI_HEADER_TYPE_BRIDGE 1
823 #define HWLOC_PCI_CLASS_BRIDGE_PCI 0x0604
824 
825 hwloc_obj_type_t
hwloc_pcidisc_check_bridge_type(unsigned device_class,const unsigned char * config)826 hwloc_pcidisc_check_bridge_type(unsigned device_class, const unsigned char *config)
827 {
828   unsigned char headertype;
829 
830   if (device_class != HWLOC_PCI_CLASS_BRIDGE_PCI)
831     return HWLOC_OBJ_PCI_DEVICE;
832 
833   headertype = config[HWLOC_PCI_HEADER_TYPE] & 0x7f;
834   return (headertype == HWLOC_PCI_HEADER_TYPE_BRIDGE)
835     ? HWLOC_OBJ_BRIDGE : HWLOC_OBJ_PCI_DEVICE;
836 }
837 
838 #define HWLOC_PCI_PRIMARY_BUS 0x18
839 #define HWLOC_PCI_SECONDARY_BUS 0x19
840 #define HWLOC_PCI_SUBORDINATE_BUS 0x1a
841 
842 int
hwloc_pcidisc_find_bridge_buses(unsigned domain,unsigned bus,unsigned dev,unsigned func,unsigned * secondary_busp,unsigned * subordinate_busp,const unsigned char * config)843 hwloc_pcidisc_find_bridge_buses(unsigned domain, unsigned bus, unsigned dev, unsigned func,
844 				unsigned *secondary_busp, unsigned *subordinate_busp,
845 				const unsigned char *config)
846 {
847   unsigned secondary_bus, subordinate_bus;
848 
849   if (config[HWLOC_PCI_PRIMARY_BUS] != bus) {
850     /* Sometimes the config space contains 00 instead of the actual primary bus number.
851      * Always trust the bus ID because it was built by the system which has more information
852      * to workaround such problems (e.g. ACPI information about PCI parent/children).
853      */
854     hwloc_debug("  %04x:%02x:%02x.%01x bridge with (ignored) invalid PCI_PRIMARY_BUS %02x\n",
855 		domain, bus, dev, func, config[HWLOC_PCI_PRIMARY_BUS]);
856   }
857 
858   secondary_bus = config[HWLOC_PCI_SECONDARY_BUS];
859   subordinate_bus = config[HWLOC_PCI_SUBORDINATE_BUS];
860 
861   if (secondary_bus <= bus
862       || subordinate_bus <= bus
863       || secondary_bus > subordinate_bus) {
864     /* This should catch most cases of invalid bridge information
865      * (e.g. 00 for secondary and subordinate).
866      * Ideally we would also check that [secondary-subordinate] is included
867      * in the parent bridge [secondary+1:subordinate]. But that's hard to do
868      * because objects may be discovered out of order (especially in the fsroot case).
869      */
870     hwloc_debug("  %04x:%02x:%02x.%01x bridge has invalid secondary-subordinate buses [%02x-%02x]\n",
871 		domain, bus, dev, func,
872 		secondary_bus, subordinate_bus);
873     return -1;
874   }
875 
876   *secondary_busp = secondary_bus;
877   *subordinate_busp = subordinate_bus;
878   return 0;
879 }
880 
881 
882 /****************
883  * Class Strings
884  */
885 
886 const char *
hwloc_pci_class_string(unsigned short class_id)887 hwloc_pci_class_string(unsigned short class_id)
888 {
889   /* See https://pci-ids.ucw.cz/read/PD/ */
890   switch ((class_id & 0xff00) >> 8) {
891     case 0x00:
892       switch (class_id) {
893 	case 0x0001: return "VGA";
894       }
895       break;
896     case 0x01:
897       switch (class_id) {
898 	case 0x0100: return "SCSI";
899 	case 0x0101: return "IDE";
900 	case 0x0102: return "Floppy";
901 	case 0x0103: return "IPI";
902 	case 0x0104: return "RAID";
903 	case 0x0105: return "ATA";
904 	case 0x0106: return "SATA";
905 	case 0x0107: return "SAS";
906 	case 0x0108: return "NVMExp";
907       }
908       return "Storage";
909     case 0x02:
910       switch (class_id) {
911 	case 0x0200: return "Ethernet";
912 	case 0x0201: return "TokenRing";
913 	case 0x0202: return "FDDI";
914 	case 0x0203: return "ATM";
915 	case 0x0204: return "ISDN";
916 	case 0x0205: return "WorldFip";
917 	case 0x0206: return "PICMG";
918 	case 0x0207: return "InfiniBand";
919 	case 0x0208: return "Fabric";
920       }
921       return "Network";
922     case 0x03:
923       switch (class_id) {
924 	case 0x0300: return "VGA";
925 	case 0x0301: return "XGA";
926 	case 0x0302: return "3D";
927       }
928       return "Display";
929     case 0x04:
930       switch (class_id) {
931 	case 0x0400: return "MultimediaVideo";
932 	case 0x0401: return "MultimediaAudio";
933 	case 0x0402: return "Telephony";
934 	case 0x0403: return "AudioDevice";
935       }
936       return "Multimedia";
937     case 0x05:
938       switch (class_id) {
939 	case 0x0500: return "RAM";
940 	case 0x0501: return "Flash";
941       }
942       return "Memory";
943     case 0x06:
944       switch (class_id) {
945 	case 0x0600: return "HostBridge";
946 	case 0x0601: return "ISABridge";
947 	case 0x0602: return "EISABridge";
948 	case 0x0603: return "MicroChannelBridge";
949 	case 0x0604: return "PCIBridge";
950 	case 0x0605: return "PCMCIABridge";
951 	case 0x0606: return "NubusBridge";
952 	case 0x0607: return "CardBusBridge";
953 	case 0x0608: return "RACEwayBridge";
954 	case 0x0609: return "SemiTransparentPCIBridge";
955 	case 0x060a: return "InfiniBandPCIHostBridge";
956       }
957       return "Bridge";
958     case 0x07:
959       switch (class_id) {
960 	case 0x0700: return "Serial";
961 	case 0x0701: return "Parallel";
962 	case 0x0702: return "MultiportSerial";
963 	case 0x0703: return "Model";
964 	case 0x0704: return "GPIB";
965 	case 0x0705: return "SmartCard";
966       }
967       return "Communication";
968     case 0x08:
969       switch (class_id) {
970 	case 0x0800: return "PIC";
971 	case 0x0801: return "DMA";
972 	case 0x0802: return "Timer";
973 	case 0x0803: return "RTC";
974 	case 0x0804: return "PCIHotPlug";
975 	case 0x0805: return "SDHost";
976 	case 0x0806: return "IOMMU";
977       }
978       return "SystemPeripheral";
979     case 0x09:
980       switch (class_id) {
981 	case 0x0900: return "Keyboard";
982 	case 0x0901: return "DigitizerPen";
983 	case 0x0902: return "Mouse";
984 	case 0x0903: return "Scanern";
985 	case 0x0904: return "Gameport";
986       }
987       return "Input";
988     case 0x0a:
989       return "DockingStation";
990     case 0x0b:
991       switch (class_id) {
992 	case 0x0b00: return "386";
993 	case 0x0b01: return "486";
994 	case 0x0b02: return "Pentium";
995 /* 0x0b03 and 0x0b04 might be Pentium and P6 ? */
996 	case 0x0b10: return "Alpha";
997 	case 0x0b20: return "PowerPC";
998 	case 0x0b30: return "MIPS";
999 	case 0x0b40: return "Co-Processor";
1000       }
1001       return "Processor";
1002     case 0x0c:
1003       switch (class_id) {
1004 	case 0x0c00: return "FireWire";
1005 	case 0x0c01: return "ACCESS";
1006 	case 0x0c02: return "SSA";
1007 	case 0x0c03: return "USB";
1008 	case 0x0c04: return "FibreChannel";
1009 	case 0x0c05: return "SMBus";
1010 	case 0x0c06: return "InfiniBand";
1011 	case 0x0c07: return "IPMI-SMIC";
1012 	case 0x0c08: return "SERCOS";
1013 	case 0x0c09: return "CANBUS";
1014       }
1015       return "SerialBus";
1016     case 0x0d:
1017       switch (class_id) {
1018 	case 0x0d00: return "IRDA";
1019 	case 0x0d01: return "ConsumerIR";
1020 	case 0x0d10: return "RF";
1021 	case 0x0d11: return "Bluetooth";
1022 	case 0x0d12: return "Broadband";
1023 	case 0x0d20: return "802.1a";
1024 	case 0x0d21: return "802.1b";
1025       }
1026       return "Wireless";
1027     case 0x0e:
1028       switch (class_id) {
1029 	case 0x0e00: return "I2O";
1030       }
1031       return "Intelligent";
1032     case 0x0f:
1033       return "Satellite";
1034     case 0x10:
1035       return "Encryption";
1036     case 0x11:
1037       return "SignalProcessing";
1038     case 0x12:
1039       return "ProcessingAccelerator";
1040     case 0x13:
1041       return "Instrumentation";
1042     case 0x40:
1043       return "Co-Processor";
1044   }
1045   return "Other";
1046 }
1047