1 /*
2  * Copyright © 2009 CNRS
3  * Copyright © 2009-2021 Inria.  All rights reserved.
4  * Copyright © 2009-2011, 2013 Université Bordeaux
5  * Copyright © 2014-2018 Cisco Systems, Inc.  All rights reserved.
6  * Copyright © 2015      Research Organization for Information Science
7  *                       and Technology (RIST). All rights reserved.
8  * See COPYING in top-level directory.
9  */
10 
11 #include "private/autogen/config.h"
12 #include "hwloc.h"
13 #include "hwloc/helper.h"
14 #include "hwloc/plugins.h"
15 
16 /* private headers allowed for convenience because this plugin is built within hwloc */
17 #include "private/debug.h"
18 #include "private/misc.h"
19 
20 #include <stdio.h>
21 #include <fcntl.h>
22 #include <string.h>
23 #include <assert.h>
24 #include <stdarg.h>
25 #ifdef HWLOC_LINUX_SYS
26 #include <dirent.h>
27 #endif
28 
29 #include <pciaccess.h>
30 
31 #ifndef PCI_HEADER_TYPE
32 #define PCI_HEADER_TYPE 0x0e
33 #endif
34 #ifndef PCI_HEADER_TYPE_BRIDGE
35 #define PCI_HEADER_TYPE_BRIDGE 1
36 #endif
37 
38 #ifndef PCI_CLASS_DEVICE
39 #define PCI_CLASS_DEVICE 0x0a
40 #endif
41 #ifndef PCI_CLASS_BRIDGE_PCI
42 #define PCI_CLASS_BRIDGE_PCI 0x0604
43 #endif
44 
45 #ifndef PCI_REVISION_ID
46 #define PCI_REVISION_ID 0x08
47 #endif
48 
49 #ifndef PCI_SUBSYSTEM_VENDOR_ID
50 #define PCI_SUBSYSTEM_VENDOR_ID 0x2c
51 #endif
52 #ifndef PCI_SUBSYSTEM_ID
53 #define PCI_SUBSYSTEM_ID 0x2e
54 #endif
55 
56 #ifndef PCI_PRIMARY_BUS
57 #define PCI_PRIMARY_BUS 0x18
58 #endif
59 #ifndef PCI_SECONDARY_BUS
60 #define PCI_SECONDARY_BUS 0x19
61 #endif
62 #ifndef PCI_SUBORDINATE_BUS
63 #define PCI_SUBORDINATE_BUS 0x1a
64 #endif
65 
66 #ifndef PCI_CAP_ID_EXP
67 #define PCI_CAP_ID_EXP 0x10
68 #endif
69 
70 #ifndef PCI_CAP_NORMAL
71 #define PCI_CAP_NORMAL 1
72 #endif
73 
74 #define CONFIG_SPACE_CACHESIZE 256
75 
76 #ifdef HWLOC_WIN_SYS
77 #error pciaccess locking currently not implemented on Windows
78 
79 #elif defined HWLOC_HAVE_PTHREAD_MUTEX
80 /* pthread mutex if available (except on windows) */
81 #include <pthread.h>
82 static pthread_mutex_t hwloc_pciaccess_mutex = PTHREAD_MUTEX_INITIALIZER;
83 #define HWLOC_PCIACCESS_LOCK() pthread_mutex_lock(&hwloc_pciaccess_mutex)
84 #define HWLOC_PCIACCESS_UNLOCK() pthread_mutex_unlock(&hwloc_pciaccess_mutex)
85 
86 #else /* HWLOC_WIN_SYS || HWLOC_HAVE_PTHREAD_MUTEX */
87 #error No mutex implementation available
88 #endif
89 
90 static void
hwloc_pci_get_obj_names(hwloc_obj_t obj,struct pci_id_match * m)91 hwloc_pci_get_obj_names(hwloc_obj_t obj, struct pci_id_match *m)
92 {
93   const char *vendorname, *devicename;
94   m->vendor_id = obj->attr->pcidev.vendor_id;
95   m->device_id = obj->attr->pcidev.device_id;
96   pci_get_strings(m, &devicename, &vendorname, NULL, NULL);
97   if (vendorname && *vendorname)
98     hwloc_obj_add_info(obj, "PCIVendor", vendorname);
99   if (devicename && *devicename)
100     hwloc_obj_add_info(obj, "PCIDevice", devicename);
101 }
102 
103 static void
hwloc_pci_get_names(hwloc_topology_t topology)104 hwloc_pci_get_names(hwloc_topology_t topology)
105 {
106   hwloc_obj_t obj;
107   struct pci_id_match m;
108 
109   /* we need the lists of PCI and bridges */
110   hwloc_topology_reconnect(topology, 0);
111 
112   m.subvendor_id = PCI_MATCH_ANY;
113   m.subdevice_id = PCI_MATCH_ANY;
114   m.device_class = 0;
115   m.device_class_mask = 0;
116   m.match_data = 0;
117 
118   HWLOC_PCIACCESS_LOCK();
119 
120   obj = NULL;
121   while ((obj = hwloc_get_next_pcidev(topology, obj)) != NULL)
122     hwloc_pci_get_obj_names(obj, &m);
123 
124   obj = NULL;
125   while ((obj = hwloc_get_next_bridge(topology, obj)) != NULL)
126     if (obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI)
127       hwloc_pci_get_obj_names(obj, &m);
128 
129   HWLOC_PCIACCESS_UNLOCK();
130 }
131 
132 static int
hwloc_look_pci(struct hwloc_backend * backend,struct hwloc_disc_status * dstatus)133 hwloc_look_pci(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus)
134 {
135   /*
136    * This backend uses the underlying OS.
137    * However we don't enforce topology->is_thissystem so that
138    * we may still force use this backend when debugging with !thissystem.
139    */
140 
141   struct hwloc_topology *topology = backend->topology;
142   enum hwloc_type_filter_e pfilter, bfilter;
143   struct hwloc_obj *tree = NULL;
144   int ret;
145   struct pci_device_iterator *iter;
146   struct pci_device *pcidev;
147   struct pci_id_match m;
148 
149   m.subvendor_id = PCI_MATCH_ANY;
150   m.subdevice_id = PCI_MATCH_ANY;
151   m.device_class = 0;
152   m.device_class_mask = 0;
153   m.match_data = 0;
154 
155   hwloc_topology_get_type_filter(topology, HWLOC_OBJ_PCI_DEVICE, &pfilter);
156   hwloc_topology_get_type_filter(topology, HWLOC_OBJ_BRIDGE, &bfilter);
157   if (bfilter == HWLOC_TYPE_FILTER_KEEP_NONE
158       && pfilter == HWLOC_TYPE_FILTER_KEEP_NONE)
159     return 0;
160 
161   if (dstatus->phase == HWLOC_DISC_PHASE_ANNOTATE) {
162     hwloc_pci_get_names(topology);
163     return 0;
164   }
165   assert(dstatus->phase == HWLOC_DISC_PHASE_PCI);
166 
167   hwloc_debug("%s", "\nScanning PCI buses...\n");
168 
169   /* pciaccess isn't thread-safe. it uses a single global variable that doesn't have
170    * refcounting, and is dynamically reallocated when vendor/device names are needed, etc.
171    */
172   HWLOC_PCIACCESS_LOCK();
173 
174   /* initialize PCI scanning */
175   ret = pci_system_init();
176   if (ret) {
177     HWLOC_PCIACCESS_UNLOCK();
178     if (!hwloc_hide_errors())
179       fprintf(stderr, "hwloc/pci: Failed to initialize libpciaccess with pci_system_init(): %d (%s)\n",
180               ret, strerror(errno));
181     return -1;
182   }
183 
184   iter = pci_slot_match_iterator_create(NULL);
185 
186   /* iterate over devices */
187   for (pcidev = pci_device_next(iter);
188        pcidev;
189        pcidev = pci_device_next(iter))
190   {
191     unsigned char config_space_cache[CONFIG_SPACE_CACHESIZE];
192     hwloc_obj_type_t type;
193     struct hwloc_obj *obj;
194     unsigned domain, bus, dev, func;
195     unsigned secondary_bus, subordinate_bus;
196     unsigned device_class;
197     unsigned short tmp16;
198     unsigned offset;
199 
200     domain = pcidev->domain;
201     bus = pcidev->bus;
202     dev = pcidev->dev;
203     func = pcidev->func;
204 
205 #ifndef HWLOC_HAVE_32BITS_PCI_DOMAIN
206     if (domain > 0xffff) {
207       static int warned = 0;
208       if (!warned && hwloc_hide_errors() < 2)
209 	fprintf(stderr, "hwloc/pci: Ignoring PCI device with non-16bit domain.\nPass --enable-32bits-pci-domain to configure to support such devices\n(warning: it would break the library ABI, don't enable unless really needed).\n");
210       warned = 1;
211       continue;
212     }
213 #endif
214 
215     /* initialize the config space in case we fail to read it (missing permissions, etc). */
216     memset(config_space_cache, 0xff, CONFIG_SPACE_CACHESIZE);
217     pci_device_probe(pcidev);
218     pci_device_cfg_read(pcidev, config_space_cache, 0, CONFIG_SPACE_CACHESIZE, NULL);
219 
220     /* try to read the device_class */
221     device_class = pcidev->device_class >> 8;
222 
223     /* bridge or pci dev? */
224     type = hwloc_pcidisc_check_bridge_type(device_class, config_space_cache);
225     /* only HWLOC_OBJ_BRIDGE for bridges to-PCI */
226     if (type == HWLOC_OBJ_BRIDGE) {
227       if (hwloc_pcidisc_find_bridge_buses(domain, bus, dev, func,
228 					  &secondary_bus, &subordinate_bus,
229 					  config_space_cache) < 0)
230 	continue;
231     }
232 
233     /* filtered? */
234     if (type == HWLOC_OBJ_PCI_DEVICE) {
235       enum hwloc_type_filter_e filter;
236       hwloc_topology_get_type_filter(topology, HWLOC_OBJ_PCI_DEVICE, &filter);
237       if (filter == HWLOC_TYPE_FILTER_KEEP_NONE)
238 	continue;
239       if (filter == HWLOC_TYPE_FILTER_KEEP_IMPORTANT
240 	  && !hwloc_filter_check_pcidev_subtype_important(device_class))
241 	continue;
242     } else if (type == HWLOC_OBJ_BRIDGE) {
243       enum hwloc_type_filter_e filter;
244       hwloc_topology_get_type_filter(topology, HWLOC_OBJ_BRIDGE, &filter);
245       if (filter == HWLOC_TYPE_FILTER_KEEP_NONE)
246 	continue;
247       /* HWLOC_TYPE_FILTER_KEEP_IMPORTANT filtered later in the core */
248     }
249 
250     /* fixup SR-IOV buggy VF device/vendor IDs */
251     if (0xffff == pcidev->vendor_id && 0xffff == pcidev->device_id) {
252       /* SR-IOV puts ffff:ffff in Virtual Function config space.
253        * The actual VF device ID is stored at a special (dynamic) location in the Physical Function config space.
254        * VF and PF have the same vendor ID.
255        *
256        * libpciaccess just returns ffff:ffff, needs to be fixed.
257        * linuxpci is OK because sysfs files are already fixed in the kernel.
258        * (pciutils is OK when it uses those Linux sysfs files.)
259        *
260        * Reading these files is an easy way to work around the libpciaccess issue on Linux,
261        * but we have no way to know if this is caused by SR-IOV or not.
262        *
263        * TODO:
264        *  If PF has CAP_ID_PCIX or CAP_ID_EXP (offset>0),
265        *  look for extended capability PCI_EXT_CAP_ID_SRIOV (need extended config space (more than 256 bytes)),
266        *  then read the VF device ID after it (PCI_IOV_DID bytes later).
267        *  Needs access to extended config space (needs root on Linux).
268        * TODO:
269        *  Add string info attributes in VF and PF objects?
270        */
271 #ifdef HWLOC_LINUX_SYS
272       /* Workaround for Linux (the kernel returns the VF device/vendor IDs). */
273       char path[64];
274       char value[16];
275       FILE *file;
276       size_t bytes_read;
277 
278       snprintf(path, sizeof(path), "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/vendor",
279 	       domain, bus, dev, func);
280       file = fopen(path, "r");
281       if (file) {
282 	bytes_read = fread(value, 1, sizeof(value), file);
283 	fclose(file);
284 	if (bytes_read)
285 	  /* fixup the pciaccess struct so that pci_device_get_vendor_name() is correct later. */
286           pcidev->vendor_id = strtoul(value, NULL, 16);
287       }
288 
289       snprintf(path, sizeof(path), "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/device",
290 	       domain, bus, dev, func);
291       file = fopen(path, "r");
292       if (file) {
293 	bytes_read = fread(value, 1, sizeof(value), file);
294 	fclose(file);
295 	if (bytes_read)
296 	  /* fixup the pciaccess struct so that pci_device_get_device_name() is correct later. */
297           pcidev->device_id = strtoul(value, NULL, 16);
298       }
299 #endif
300     }
301 
302     obj = hwloc_alloc_setup_object(topology, type, HWLOC_UNKNOWN_INDEX);
303     obj->attr->pcidev.domain = domain;
304     obj->attr->pcidev.bus = bus;
305     obj->attr->pcidev.dev = dev;
306     obj->attr->pcidev.func = func;
307     obj->attr->pcidev.vendor_id = pcidev->vendor_id;
308     obj->attr->pcidev.device_id = pcidev->device_id;
309     obj->attr->pcidev.class_id = device_class;
310     obj->attr->pcidev.revision = config_space_cache[PCI_REVISION_ID];
311 
312     /* bridge specific attributes */
313     if (type == HWLOC_OBJ_BRIDGE) {
314       /* assumes this is a Bridge to-PCI */
315       struct hwloc_bridge_attr_s *battr = &obj->attr->bridge;
316       battr->upstream_type = HWLOC_OBJ_BRIDGE_PCI;
317       battr->downstream_type = HWLOC_OBJ_BRIDGE_PCI;
318       battr->downstream.pci.domain = domain;
319       battr->downstream.pci.secondary_bus = secondary_bus;
320       battr->downstream.pci.subordinate_bus = subordinate_bus;
321     }
322 
323     obj->attr->pcidev.linkspeed = 0; /* unknown */
324     offset = hwloc_pcidisc_find_cap(config_space_cache, PCI_CAP_ID_EXP);
325 
326     if (offset > 0 && offset + 20 /* size of PCI express block up to link status */ <= CONFIG_SPACE_CACHESIZE) {
327       hwloc_pcidisc_find_linkspeed(config_space_cache, offset, &obj->attr->pcidev.linkspeed);
328 #ifdef HWLOC_LINUX_SYS
329     } else {
330       /* if not available from config-space (extended part is root-only), look in Linux sysfs files added in 4.13 */
331       char path[128];
332       char value[16];
333       FILE *file;
334       size_t bytes_read;
335       float speed = 0.f;
336       unsigned width = 0;
337       snprintf(path, sizeof(path), "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/current_link_speed",
338 	       domain, bus, dev, func);
339       file = fopen(path, "r");
340       if (file) {
341 	bytes_read = fread(value, 1, sizeof(value), file);
342 	fclose(file);
343 	if (bytes_read)
344 	  speed = hwloc_linux_pci_link_speed_from_string(value);
345       }
346       snprintf(path, sizeof(path), "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/current_link_width",
347 	       domain, bus, dev, func);
348       file = fopen(path, "r");
349       if (file) {
350 	bytes_read = fread(value, 1, sizeof(value), file);
351 	fclose(file);
352 	if (bytes_read)
353 	  width = atoi(value);
354       }
355       obj->attr->pcidev.linkspeed = speed*width/8;
356 #endif
357     }
358 
359     if (obj->type == HWLOC_OBJ_PCI_DEVICE) {
360       memcpy(&tmp16, &config_space_cache[PCI_SUBSYSTEM_VENDOR_ID], sizeof(tmp16));
361       obj->attr->pcidev.subvendor_id = tmp16;
362       memcpy(&tmp16, &config_space_cache[PCI_SUBSYSTEM_ID], sizeof(tmp16));
363       obj->attr->pcidev.subdevice_id = tmp16;
364     } else {
365       /* TODO:
366        * bridge must lookup PCI_CAP_ID_SSVID and then look at offset+PCI_SSVID_VENDOR/DEVICE_ID
367        * cardbus must look at PCI_CB_SUBSYSTEM_VENDOR_ID and PCI_CB_SUBSYSTEM_ID
368        */
369     }
370 
371     hwloc_debug("  %04x:%02x:%02x.%01x %04x %04x:%04x\n",
372 		domain, bus, dev, func,
373 		device_class, pcidev->vendor_id, pcidev->device_id);
374 
375     hwloc_pci_get_obj_names(obj, &m);
376     hwloc_pcidisc_tree_insert_by_busid(&tree, obj);
377   }
378 
379   /* finalize device scanning */
380   pci_iterator_destroy(iter);
381   pci_system_cleanup();
382   HWLOC_PCIACCESS_UNLOCK();
383 
384   hwloc_pcidisc_tree_attach(topology, tree);
385 
386   /* no need to run another PCI phase */
387   dstatus->excluded_phases |= HWLOC_DISC_PHASE_PCI;
388   /* no need to run the annotate phase, we did it above */
389   backend->phases &= HWLOC_DISC_PHASE_ANNOTATE;
390   return 0;
391 }
392 
393 static struct hwloc_backend *
hwloc_pci_component_instantiate(struct hwloc_topology * topology,struct hwloc_disc_component * component,unsigned excluded_phases __hwloc_attribute_unused,const void * _data1 __hwloc_attribute_unused,const void * _data2 __hwloc_attribute_unused,const void * _data3 __hwloc_attribute_unused)394 hwloc_pci_component_instantiate(struct hwloc_topology *topology,
395 				struct hwloc_disc_component *component,
396 				unsigned excluded_phases __hwloc_attribute_unused,
397 				const void *_data1 __hwloc_attribute_unused,
398 				const void *_data2 __hwloc_attribute_unused,
399 				const void *_data3 __hwloc_attribute_unused)
400 {
401   struct hwloc_backend *backend;
402 
403   backend = hwloc_backend_alloc(topology, component);
404   if (!backend)
405     return NULL;
406   backend->discover = hwloc_look_pci;
407 
408 #ifdef HWLOC_SOLARIS_SYS
409   if ((uid_t)0 != geteuid())
410     backend->phases &= ~HWLOC_DISC_PHASE_PCI;
411 #endif
412 
413   return backend;
414 }
415 
416 static struct hwloc_disc_component hwloc_pci_disc_component = {
417   "pci",
418   HWLOC_DISC_PHASE_PCI | HWLOC_DISC_PHASE_ANNOTATE,
419   HWLOC_DISC_PHASE_GLOBAL,
420   hwloc_pci_component_instantiate,
421   20,
422   1,
423   NULL
424 };
425 
426 static int
hwloc_pci_component_init(unsigned long flags)427 hwloc_pci_component_init(unsigned long flags)
428 {
429   if (flags)
430     return -1;
431   if (hwloc_plugin_check_namespace("pci", "hwloc_backend_alloc") < 0)
432     return -1;
433   return 0;
434 }
435 
436 #ifdef HWLOC_INSIDE_PLUGIN
437 HWLOC_DECLSPEC extern const struct hwloc_component hwloc_pci_component;
438 #endif
439 
440 const struct hwloc_component hwloc_pci_component = {
441   HWLOC_COMPONENT_ABI,
442   hwloc_pci_component_init, NULL,
443   HWLOC_COMPONENT_TYPE_DISC,
444   0,
445   &hwloc_pci_disc_component
446 };
447