1 /*
2 * Copyright © 2009 CNRS
3 * Copyright © 2009-2021 Inria. All rights reserved.
4 * Copyright © 2009-2011, 2013 Université Bordeaux
5 * Copyright © 2014-2018 Cisco Systems, Inc. All rights reserved.
6 * Copyright © 2015 Research Organization for Information Science
7 * and Technology (RIST). All rights reserved.
8 * See COPYING in top-level directory.
9 */
10
11 #include "private/autogen/config.h"
12 #include "hwloc.h"
13 #include "hwloc/helper.h"
14 #include "hwloc/plugins.h"
15
16 /* private headers allowed for convenience because this plugin is built within hwloc */
17 #include "private/debug.h"
18 #include "private/misc.h"
19
20 #include <stdio.h>
21 #include <fcntl.h>
22 #include <string.h>
23 #include <assert.h>
24 #include <stdarg.h>
25 #ifdef HWLOC_LINUX_SYS
26 #include <dirent.h>
27 #endif
28
29 #include <pciaccess.h>
30
31 #ifndef PCI_HEADER_TYPE
32 #define PCI_HEADER_TYPE 0x0e
33 #endif
34 #ifndef PCI_HEADER_TYPE_BRIDGE
35 #define PCI_HEADER_TYPE_BRIDGE 1
36 #endif
37
38 #ifndef PCI_CLASS_DEVICE
39 #define PCI_CLASS_DEVICE 0x0a
40 #endif
41 #ifndef PCI_CLASS_BRIDGE_PCI
42 #define PCI_CLASS_BRIDGE_PCI 0x0604
43 #endif
44
45 #ifndef PCI_REVISION_ID
46 #define PCI_REVISION_ID 0x08
47 #endif
48
49 #ifndef PCI_SUBSYSTEM_VENDOR_ID
50 #define PCI_SUBSYSTEM_VENDOR_ID 0x2c
51 #endif
52 #ifndef PCI_SUBSYSTEM_ID
53 #define PCI_SUBSYSTEM_ID 0x2e
54 #endif
55
56 #ifndef PCI_PRIMARY_BUS
57 #define PCI_PRIMARY_BUS 0x18
58 #endif
59 #ifndef PCI_SECONDARY_BUS
60 #define PCI_SECONDARY_BUS 0x19
61 #endif
62 #ifndef PCI_SUBORDINATE_BUS
63 #define PCI_SUBORDINATE_BUS 0x1a
64 #endif
65
66 #ifndef PCI_CAP_ID_EXP
67 #define PCI_CAP_ID_EXP 0x10
68 #endif
69
70 #ifndef PCI_CAP_NORMAL
71 #define PCI_CAP_NORMAL 1
72 #endif
73
74 #define CONFIG_SPACE_CACHESIZE 256
75
76 #ifdef HWLOC_WIN_SYS
77 #error pciaccess locking currently not implemented on Windows
78
79 #elif defined HWLOC_HAVE_PTHREAD_MUTEX
80 /* pthread mutex if available (except on windows) */
81 #include <pthread.h>
82 static pthread_mutex_t hwloc_pciaccess_mutex = PTHREAD_MUTEX_INITIALIZER;
83 #define HWLOC_PCIACCESS_LOCK() pthread_mutex_lock(&hwloc_pciaccess_mutex)
84 #define HWLOC_PCIACCESS_UNLOCK() pthread_mutex_unlock(&hwloc_pciaccess_mutex)
85
86 #else /* HWLOC_WIN_SYS || HWLOC_HAVE_PTHREAD_MUTEX */
87 #error No mutex implementation available
88 #endif
89
90 static void
hwloc_pci_get_obj_names(hwloc_obj_t obj,struct pci_id_match * m)91 hwloc_pci_get_obj_names(hwloc_obj_t obj, struct pci_id_match *m)
92 {
93 const char *vendorname, *devicename;
94 m->vendor_id = obj->attr->pcidev.vendor_id;
95 m->device_id = obj->attr->pcidev.device_id;
96 pci_get_strings(m, &devicename, &vendorname, NULL, NULL);
97 if (vendorname && *vendorname)
98 hwloc_obj_add_info(obj, "PCIVendor", vendorname);
99 if (devicename && *devicename)
100 hwloc_obj_add_info(obj, "PCIDevice", devicename);
101 }
102
103 static void
hwloc_pci_get_names(hwloc_topology_t topology)104 hwloc_pci_get_names(hwloc_topology_t topology)
105 {
106 hwloc_obj_t obj;
107 struct pci_id_match m;
108
109 /* we need the lists of PCI and bridges */
110 hwloc_topology_reconnect(topology, 0);
111
112 m.subvendor_id = PCI_MATCH_ANY;
113 m.subdevice_id = PCI_MATCH_ANY;
114 m.device_class = 0;
115 m.device_class_mask = 0;
116 m.match_data = 0;
117
118 HWLOC_PCIACCESS_LOCK();
119
120 obj = NULL;
121 while ((obj = hwloc_get_next_pcidev(topology, obj)) != NULL)
122 hwloc_pci_get_obj_names(obj, &m);
123
124 obj = NULL;
125 while ((obj = hwloc_get_next_bridge(topology, obj)) != NULL)
126 if (obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI)
127 hwloc_pci_get_obj_names(obj, &m);
128
129 HWLOC_PCIACCESS_UNLOCK();
130 }
131
132 static int
hwloc_look_pci(struct hwloc_backend * backend,struct hwloc_disc_status * dstatus)133 hwloc_look_pci(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus)
134 {
135 /*
136 * This backend uses the underlying OS.
137 * However we don't enforce topology->is_thissystem so that
138 * we may still force use this backend when debugging with !thissystem.
139 */
140
141 struct hwloc_topology *topology = backend->topology;
142 enum hwloc_type_filter_e pfilter, bfilter;
143 struct hwloc_obj *tree = NULL;
144 int ret;
145 struct pci_device_iterator *iter;
146 struct pci_device *pcidev;
147 struct pci_id_match m;
148
149 m.subvendor_id = PCI_MATCH_ANY;
150 m.subdevice_id = PCI_MATCH_ANY;
151 m.device_class = 0;
152 m.device_class_mask = 0;
153 m.match_data = 0;
154
155 hwloc_topology_get_type_filter(topology, HWLOC_OBJ_PCI_DEVICE, &pfilter);
156 hwloc_topology_get_type_filter(topology, HWLOC_OBJ_BRIDGE, &bfilter);
157 if (bfilter == HWLOC_TYPE_FILTER_KEEP_NONE
158 && pfilter == HWLOC_TYPE_FILTER_KEEP_NONE)
159 return 0;
160
161 if (dstatus->phase == HWLOC_DISC_PHASE_ANNOTATE) {
162 hwloc_pci_get_names(topology);
163 return 0;
164 }
165 assert(dstatus->phase == HWLOC_DISC_PHASE_PCI);
166
167 hwloc_debug("%s", "\nScanning PCI buses...\n");
168
169 /* pciaccess isn't thread-safe. it uses a single global variable that doesn't have
170 * refcounting, and is dynamically reallocated when vendor/device names are needed, etc.
171 */
172 HWLOC_PCIACCESS_LOCK();
173
174 /* initialize PCI scanning */
175 ret = pci_system_init();
176 if (ret) {
177 HWLOC_PCIACCESS_UNLOCK();
178 if (!hwloc_hide_errors())
179 fprintf(stderr, "hwloc/pci: Failed to initialize libpciaccess with pci_system_init(): %d (%s)\n",
180 ret, strerror(errno));
181 return -1;
182 }
183
184 iter = pci_slot_match_iterator_create(NULL);
185
186 /* iterate over devices */
187 for (pcidev = pci_device_next(iter);
188 pcidev;
189 pcidev = pci_device_next(iter))
190 {
191 unsigned char config_space_cache[CONFIG_SPACE_CACHESIZE];
192 hwloc_obj_type_t type;
193 struct hwloc_obj *obj;
194 unsigned domain, bus, dev, func;
195 unsigned secondary_bus, subordinate_bus;
196 unsigned device_class;
197 unsigned short tmp16;
198 unsigned offset;
199
200 domain = pcidev->domain;
201 bus = pcidev->bus;
202 dev = pcidev->dev;
203 func = pcidev->func;
204
205 #ifndef HWLOC_HAVE_32BITS_PCI_DOMAIN
206 if (domain > 0xffff) {
207 static int warned = 0;
208 if (!warned && hwloc_hide_errors() < 2)
209 fprintf(stderr, "hwloc/pci: Ignoring PCI device with non-16bit domain.\nPass --enable-32bits-pci-domain to configure to support such devices\n(warning: it would break the library ABI, don't enable unless really needed).\n");
210 warned = 1;
211 continue;
212 }
213 #endif
214
215 /* initialize the config space in case we fail to read it (missing permissions, etc). */
216 memset(config_space_cache, 0xff, CONFIG_SPACE_CACHESIZE);
217 pci_device_probe(pcidev);
218 pci_device_cfg_read(pcidev, config_space_cache, 0, CONFIG_SPACE_CACHESIZE, NULL);
219
220 /* try to read the device_class */
221 device_class = pcidev->device_class >> 8;
222
223 /* bridge or pci dev? */
224 type = hwloc_pcidisc_check_bridge_type(device_class, config_space_cache);
225 /* only HWLOC_OBJ_BRIDGE for bridges to-PCI */
226 if (type == HWLOC_OBJ_BRIDGE) {
227 if (hwloc_pcidisc_find_bridge_buses(domain, bus, dev, func,
228 &secondary_bus, &subordinate_bus,
229 config_space_cache) < 0)
230 continue;
231 }
232
233 /* filtered? */
234 if (type == HWLOC_OBJ_PCI_DEVICE) {
235 enum hwloc_type_filter_e filter;
236 hwloc_topology_get_type_filter(topology, HWLOC_OBJ_PCI_DEVICE, &filter);
237 if (filter == HWLOC_TYPE_FILTER_KEEP_NONE)
238 continue;
239 if (filter == HWLOC_TYPE_FILTER_KEEP_IMPORTANT
240 && !hwloc_filter_check_pcidev_subtype_important(device_class))
241 continue;
242 } else if (type == HWLOC_OBJ_BRIDGE) {
243 enum hwloc_type_filter_e filter;
244 hwloc_topology_get_type_filter(topology, HWLOC_OBJ_BRIDGE, &filter);
245 if (filter == HWLOC_TYPE_FILTER_KEEP_NONE)
246 continue;
247 /* HWLOC_TYPE_FILTER_KEEP_IMPORTANT filtered later in the core */
248 }
249
250 /* fixup SR-IOV buggy VF device/vendor IDs */
251 if (0xffff == pcidev->vendor_id && 0xffff == pcidev->device_id) {
252 /* SR-IOV puts ffff:ffff in Virtual Function config space.
253 * The actual VF device ID is stored at a special (dynamic) location in the Physical Function config space.
254 * VF and PF have the same vendor ID.
255 *
256 * libpciaccess just returns ffff:ffff, needs to be fixed.
257 * linuxpci is OK because sysfs files are already fixed in the kernel.
258 * (pciutils is OK when it uses those Linux sysfs files.)
259 *
260 * Reading these files is an easy way to work around the libpciaccess issue on Linux,
261 * but we have no way to know if this is caused by SR-IOV or not.
262 *
263 * TODO:
264 * If PF has CAP_ID_PCIX or CAP_ID_EXP (offset>0),
265 * look for extended capability PCI_EXT_CAP_ID_SRIOV (need extended config space (more than 256 bytes)),
266 * then read the VF device ID after it (PCI_IOV_DID bytes later).
267 * Needs access to extended config space (needs root on Linux).
268 * TODO:
269 * Add string info attributes in VF and PF objects?
270 */
271 #ifdef HWLOC_LINUX_SYS
272 /* Workaround for Linux (the kernel returns the VF device/vendor IDs). */
273 char path[64];
274 char value[16];
275 FILE *file;
276 size_t bytes_read;
277
278 snprintf(path, sizeof(path), "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/vendor",
279 domain, bus, dev, func);
280 file = fopen(path, "r");
281 if (file) {
282 bytes_read = fread(value, 1, sizeof(value), file);
283 fclose(file);
284 if (bytes_read)
285 /* fixup the pciaccess struct so that pci_device_get_vendor_name() is correct later. */
286 pcidev->vendor_id = strtoul(value, NULL, 16);
287 }
288
289 snprintf(path, sizeof(path), "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/device",
290 domain, bus, dev, func);
291 file = fopen(path, "r");
292 if (file) {
293 bytes_read = fread(value, 1, sizeof(value), file);
294 fclose(file);
295 if (bytes_read)
296 /* fixup the pciaccess struct so that pci_device_get_device_name() is correct later. */
297 pcidev->device_id = strtoul(value, NULL, 16);
298 }
299 #endif
300 }
301
302 obj = hwloc_alloc_setup_object(topology, type, HWLOC_UNKNOWN_INDEX);
303 obj->attr->pcidev.domain = domain;
304 obj->attr->pcidev.bus = bus;
305 obj->attr->pcidev.dev = dev;
306 obj->attr->pcidev.func = func;
307 obj->attr->pcidev.vendor_id = pcidev->vendor_id;
308 obj->attr->pcidev.device_id = pcidev->device_id;
309 obj->attr->pcidev.class_id = device_class;
310 obj->attr->pcidev.revision = config_space_cache[PCI_REVISION_ID];
311
312 /* bridge specific attributes */
313 if (type == HWLOC_OBJ_BRIDGE) {
314 /* assumes this is a Bridge to-PCI */
315 struct hwloc_bridge_attr_s *battr = &obj->attr->bridge;
316 battr->upstream_type = HWLOC_OBJ_BRIDGE_PCI;
317 battr->downstream_type = HWLOC_OBJ_BRIDGE_PCI;
318 battr->downstream.pci.domain = domain;
319 battr->downstream.pci.secondary_bus = secondary_bus;
320 battr->downstream.pci.subordinate_bus = subordinate_bus;
321 }
322
323 obj->attr->pcidev.linkspeed = 0; /* unknown */
324 offset = hwloc_pcidisc_find_cap(config_space_cache, PCI_CAP_ID_EXP);
325
326 if (offset > 0 && offset + 20 /* size of PCI express block up to link status */ <= CONFIG_SPACE_CACHESIZE) {
327 hwloc_pcidisc_find_linkspeed(config_space_cache, offset, &obj->attr->pcidev.linkspeed);
328 #ifdef HWLOC_LINUX_SYS
329 } else {
330 /* if not available from config-space (extended part is root-only), look in Linux sysfs files added in 4.13 */
331 char path[128];
332 char value[16];
333 FILE *file;
334 size_t bytes_read;
335 float speed = 0.f;
336 unsigned width = 0;
337 snprintf(path, sizeof(path), "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/current_link_speed",
338 domain, bus, dev, func);
339 file = fopen(path, "r");
340 if (file) {
341 bytes_read = fread(value, 1, sizeof(value), file);
342 fclose(file);
343 if (bytes_read)
344 speed = hwloc_linux_pci_link_speed_from_string(value);
345 }
346 snprintf(path, sizeof(path), "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/current_link_width",
347 domain, bus, dev, func);
348 file = fopen(path, "r");
349 if (file) {
350 bytes_read = fread(value, 1, sizeof(value), file);
351 fclose(file);
352 if (bytes_read)
353 width = atoi(value);
354 }
355 obj->attr->pcidev.linkspeed = speed*width/8;
356 #endif
357 }
358
359 if (obj->type == HWLOC_OBJ_PCI_DEVICE) {
360 memcpy(&tmp16, &config_space_cache[PCI_SUBSYSTEM_VENDOR_ID], sizeof(tmp16));
361 obj->attr->pcidev.subvendor_id = tmp16;
362 memcpy(&tmp16, &config_space_cache[PCI_SUBSYSTEM_ID], sizeof(tmp16));
363 obj->attr->pcidev.subdevice_id = tmp16;
364 } else {
365 /* TODO:
366 * bridge must lookup PCI_CAP_ID_SSVID and then look at offset+PCI_SSVID_VENDOR/DEVICE_ID
367 * cardbus must look at PCI_CB_SUBSYSTEM_VENDOR_ID and PCI_CB_SUBSYSTEM_ID
368 */
369 }
370
371 hwloc_debug(" %04x:%02x:%02x.%01x %04x %04x:%04x\n",
372 domain, bus, dev, func,
373 device_class, pcidev->vendor_id, pcidev->device_id);
374
375 hwloc_pci_get_obj_names(obj, &m);
376 hwloc_pcidisc_tree_insert_by_busid(&tree, obj);
377 }
378
379 /* finalize device scanning */
380 pci_iterator_destroy(iter);
381 pci_system_cleanup();
382 HWLOC_PCIACCESS_UNLOCK();
383
384 hwloc_pcidisc_tree_attach(topology, tree);
385
386 /* no need to run another PCI phase */
387 dstatus->excluded_phases |= HWLOC_DISC_PHASE_PCI;
388 /* no need to run the annotate phase, we did it above */
389 backend->phases &= HWLOC_DISC_PHASE_ANNOTATE;
390 return 0;
391 }
392
393 static struct hwloc_backend *
hwloc_pci_component_instantiate(struct hwloc_topology * topology,struct hwloc_disc_component * component,unsigned excluded_phases __hwloc_attribute_unused,const void * _data1 __hwloc_attribute_unused,const void * _data2 __hwloc_attribute_unused,const void * _data3 __hwloc_attribute_unused)394 hwloc_pci_component_instantiate(struct hwloc_topology *topology,
395 struct hwloc_disc_component *component,
396 unsigned excluded_phases __hwloc_attribute_unused,
397 const void *_data1 __hwloc_attribute_unused,
398 const void *_data2 __hwloc_attribute_unused,
399 const void *_data3 __hwloc_attribute_unused)
400 {
401 struct hwloc_backend *backend;
402
403 backend = hwloc_backend_alloc(topology, component);
404 if (!backend)
405 return NULL;
406 backend->discover = hwloc_look_pci;
407
408 #ifdef HWLOC_SOLARIS_SYS
409 if ((uid_t)0 != geteuid())
410 backend->phases &= ~HWLOC_DISC_PHASE_PCI;
411 #endif
412
413 return backend;
414 }
415
416 static struct hwloc_disc_component hwloc_pci_disc_component = {
417 "pci",
418 HWLOC_DISC_PHASE_PCI | HWLOC_DISC_PHASE_ANNOTATE,
419 HWLOC_DISC_PHASE_GLOBAL,
420 hwloc_pci_component_instantiate,
421 20,
422 1,
423 NULL
424 };
425
426 static int
hwloc_pci_component_init(unsigned long flags)427 hwloc_pci_component_init(unsigned long flags)
428 {
429 if (flags)
430 return -1;
431 if (hwloc_plugin_check_namespace("pci", "hwloc_backend_alloc") < 0)
432 return -1;
433 return 0;
434 }
435
436 #ifdef HWLOC_INSIDE_PLUGIN
437 HWLOC_DECLSPEC extern const struct hwloc_component hwloc_pci_component;
438 #endif
439
440 const struct hwloc_component hwloc_pci_component = {
441 HWLOC_COMPONENT_ABI,
442 hwloc_pci_component_init, NULL,
443 HWLOC_COMPONENT_TYPE_DISC,
444 0,
445 &hwloc_pci_disc_component
446 };
447