1 /*
2  * Copyright © 2009 CNRS
3  * Copyright © 2009-2019 Inria.  All rights reserved.
4  * Copyright © 2009-2011, 2013 Université Bordeaux
5  * Copyright © 2014-2018 Cisco Systems, Inc.  All rights reserved.
6  * Copyright © 2015      Research Organization for Information Science
7  *                       and Technology (RIST). All rights reserved.
8  * See COPYING in top-level directory.
9  */
10 
11 #include <private/autogen/config.h>
12 #include <hwloc.h>
13 #include <hwloc/helper.h>
14 #include <hwloc/plugins.h>
15 
16 /* private headers allowed for convenience because this plugin is built within hwloc */
17 #include <private/debug.h>
18 #include <private/misc.h>
19 
20 #include <stdio.h>
21 #include <fcntl.h>
22 #include <string.h>
23 #include <assert.h>
24 #include <stdarg.h>
25 #ifdef HWLOC_LINUX_SYS
26 #include <dirent.h>
27 #endif
28 
29 #include <pciaccess.h>
30 
31 #ifndef PCI_HEADER_TYPE
32 #define PCI_HEADER_TYPE 0x0e
33 #endif
34 #ifndef PCI_HEADER_TYPE_BRIDGE
35 #define PCI_HEADER_TYPE_BRIDGE 1
36 #endif
37 
38 #ifndef PCI_CLASS_DEVICE
39 #define PCI_CLASS_DEVICE 0x0a
40 #endif
41 #ifndef PCI_CLASS_BRIDGE_PCI
42 #define PCI_CLASS_BRIDGE_PCI 0x0604
43 #endif
44 
45 #ifndef PCI_REVISION_ID
46 #define PCI_REVISION_ID 0x08
47 #endif
48 
49 #ifndef PCI_SUBSYSTEM_VENDOR_ID
50 #define PCI_SUBSYSTEM_VENDOR_ID 0x2c
51 #endif
52 #ifndef PCI_SUBSYSTEM_ID
53 #define PCI_SUBSYSTEM_ID 0x2e
54 #endif
55 
56 #ifndef PCI_PRIMARY_BUS
57 #define PCI_PRIMARY_BUS 0x18
58 #endif
59 #ifndef PCI_SECONDARY_BUS
60 #define PCI_SECONDARY_BUS 0x19
61 #endif
62 #ifndef PCI_SUBORDINATE_BUS
63 #define PCI_SUBORDINATE_BUS 0x1a
64 #endif
65 
66 #ifndef PCI_CAP_ID_EXP
67 #define PCI_CAP_ID_EXP 0x10
68 #endif
69 
70 #ifndef PCI_CAP_NORMAL
71 #define PCI_CAP_NORMAL 1
72 #endif
73 
74 #define CONFIG_SPACE_CACHESIZE 256
75 
76 #ifdef HWLOC_WIN_SYS
77 #error pciaccess locking currently not implemented on Windows
78 
79 #elif defined HWLOC_HAVE_PTHREAD_MUTEX
80 /* pthread mutex if available (except on windows) */
81 #include <pthread.h>
82 static pthread_mutex_t hwloc_pciaccess_mutex = PTHREAD_MUTEX_INITIALIZER;
83 #define HWLOC_PCIACCESS_LOCK() pthread_mutex_lock(&hwloc_pciaccess_mutex)
84 #define HWLOC_PCIACCESS_UNLOCK() pthread_mutex_unlock(&hwloc_pciaccess_mutex)
85 
86 #else /* HWLOC_WIN_SYS || HWLOC_HAVE_PTHREAD_MUTEX */
87 #error No mutex implementation available
88 #endif
89 
90 static int
hwloc_look_pci(struct hwloc_backend * backend)91 hwloc_look_pci(struct hwloc_backend *backend)
92 {
93   struct hwloc_topology *topology = backend->topology;
94   struct hwloc_obj *first_obj = NULL, *last_obj = NULL;
95   int ret;
96   struct pci_device_iterator *iter;
97   struct pci_device *pcidev;
98 #ifdef HWLOC_LINUX_SYS
99   DIR *dir;
100 #endif
101 
102   if (!(hwloc_topology_get_flags(topology) & (HWLOC_TOPOLOGY_FLAG_IO_DEVICES|HWLOC_TOPOLOGY_FLAG_WHOLE_IO)))
103     return 0;
104 
105   if (hwloc_get_next_pcidev(topology, NULL)) {
106     hwloc_debug("%s", "PCI objects already added, ignoring pci backend.\n");
107     return 0;
108   }
109 
110   if (!hwloc_topology_is_thissystem(topology)) {
111     hwloc_debug("%s", "\nno PCI detection (not thissystem)\n");
112     return 0;
113   }
114 
115   hwloc_debug("%s", "\nScanning PCI buses...\n");
116 
117   /* pciaccess isn't thread-safe. it uses a single global variable that doesn't have
118    * refcounting, and is dynamically reallocated when vendor/device names are needed, etc.
119    */
120   HWLOC_PCIACCESS_LOCK();
121 
122   /* initialize PCI scanning */
123   ret = pci_system_init();
124   if (ret) {
125     HWLOC_PCIACCESS_UNLOCK();
126     hwloc_debug("%s", "Can not initialize libpciaccess\n");
127     return -1;
128   }
129 
130   iter = pci_slot_match_iterator_create(NULL);
131 
132   /* iterate over devices */
133   for (pcidev = pci_device_next(iter);
134        pcidev;
135        pcidev = pci_device_next(iter))
136   {
137     const char *vendorname, *devicename, *fullname;
138     unsigned char config_space_cache[CONFIG_SPACE_CACHESIZE];
139     struct hwloc_obj *obj;
140     unsigned os_index;
141     unsigned domain;
142     unsigned device_class;
143     unsigned short tmp16;
144     char name[128];
145     unsigned offset;
146 
147     /* initialize the config space in case we fail to read it (missing permissions, etc). */
148     memset(config_space_cache, 0xff, CONFIG_SPACE_CACHESIZE);
149     pci_device_probe(pcidev);
150     pci_device_cfg_read(pcidev, config_space_cache, 0, CONFIG_SPACE_CACHESIZE, NULL);
151 
152     /* try to read the domain */
153     domain = pcidev->domain;
154     if (domain > 0xffff) {
155       static int warned = 0;
156       if (!warned)
157 	fprintf(stderr, "Ignoring PCI device with non-16bit domain\n");
158       warned = 1;
159       continue;
160     }
161 
162     /* try to read the device_class */
163     device_class = pcidev->device_class >> 8;
164 
165     /* fixup SR-IOV buggy VF device/vendor IDs */
166     if (0xffff == pcidev->vendor_id && 0xffff == pcidev->device_id) {
167       /* SR-IOV puts ffff:ffff in Virtual Function config space.
168        * The actual VF device ID is stored at a special (dynamic) location in the Physical Function config space.
169        * VF and PF have the same vendor ID.
170        *
171        * libpciaccess just returns ffff:ffff, needs to be fixed.
172        * linuxpci is OK because sysfs files are already fixed the kernel.
173        * (pciutils is OK when it uses those Linux sysfs files.)
174        *
175        * Reading these files is an easy way to work around the libpciaccess issue on Linux,
176        * but we have no way to know if this is caused by SR-IOV or not.
177        *
178        * TODO:
179        *  If PF has CAP_ID_PCIX or CAP_ID_EXP (offset>0),
180        *  look for extended capability PCI_EXT_CAP_ID_SRIOV (need extended config space (more than 256 bytes)),
181        *  then read the VF device ID after it (PCI_IOV_DID bytes later).
182        *  Needs access to extended config space (needs root on Linux).
183        * TODO:
184        *  Add string info attributes in VF and PF objects?
185        */
186 #ifdef HWLOC_LINUX_SYS
187       /* Workaround for Linux (the kernel returns the VF device/vendor IDs). */
188       char path[64];
189       char value[16];
190       FILE *file;
191       size_t bytes_read;
192 
193       snprintf(path, sizeof(path), "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/vendor",
194 	       domain, pcidev->bus, pcidev->dev, pcidev->func);
195       file = fopen(path, "r");
196       if (file) {
197 	bytes_read = fread(value, 1, sizeof(value), file);
198 	fclose(file);
199 	if (bytes_read)
200 	  /* fixup the pciaccess struct so that pci_device_get_vendor_name() is correct later. */
201           pcidev->vendor_id = strtoul(value, NULL, 16);
202       }
203 
204       snprintf(path, sizeof(path), "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/device",
205 	       domain, pcidev->bus, pcidev->dev, pcidev->func);
206       file = fopen(path, "r");
207       if (file) {
208 	bytes_read = fread(value, 1, sizeof(value), file);
209 	fclose(file);
210 	if (bytes_read)
211 	  /* fixup the pciaccess struct so that pci_device_get_device_name() is correct later. */
212           pcidev->device_id = strtoul(value, NULL, 16);
213       }
214 #endif
215     }
216 
217     /* might be useful for debugging (note that domain might be truncated) */
218     os_index = (domain << 20) + (pcidev->bus << 12) + (pcidev->dev << 4) + pcidev->func;
219 
220     obj = hwloc_alloc_setup_object(HWLOC_OBJ_PCI_DEVICE, os_index);
221     obj->attr->pcidev.domain = domain;
222     obj->attr->pcidev.bus = pcidev->bus;
223     obj->attr->pcidev.dev = pcidev->dev;
224     obj->attr->pcidev.func = pcidev->func;
225     obj->attr->pcidev.vendor_id = pcidev->vendor_id;
226     obj->attr->pcidev.device_id = pcidev->device_id;
227     obj->attr->pcidev.class_id = device_class;
228     obj->attr->pcidev.revision = config_space_cache[PCI_REVISION_ID];
229 
230     obj->attr->pcidev.linkspeed = 0; /* unknown */
231     offset = hwloc_pci_find_cap(config_space_cache, PCI_CAP_ID_EXP);
232 
233     if (offset > 0 && offset + 20 /* size of PCI express block up to link status */ <= CONFIG_SPACE_CACHESIZE) {
234       hwloc_pci_find_linkspeed(config_space_cache, offset, &obj->attr->pcidev.linkspeed);
235 #ifdef HWLOC_LINUX_SYS
236     } else {
237       /* if not available from config-space (extended part is root-only), look in Linux sysfs files added in 4.13 */
238       char path[64];
239       char value[16];
240       FILE *file;
241       size_t bytes_read;
242       float speed = 0.f;
243       unsigned width = 0;
244       snprintf(path, sizeof(path), "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/current_link_speed",
245 	       domain, pcidev->bus, pcidev->dev, pcidev->func);
246       file = fopen(path, "r");
247       if (file) {
248 	bytes_read = fread(value, 1, sizeof(value), file);
249 	fclose(file);
250 	if (bytes_read)
251 	  speed = hwloc_linux_pci_link_speed_from_string(value);
252       }
253       snprintf(path, sizeof(path), "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/current_link_width",
254 	       domain, pcidev->bus, pcidev->dev, pcidev->func);
255       file = fopen(path, "r");
256       if (file) {
257 	bytes_read = fread(value, 1, sizeof(value), file);
258 	fclose(file);
259 	if (bytes_read)
260 	  width = atoi(value);
261       }
262       obj->attr->pcidev.linkspeed = speed*width/8;
263 #endif
264     }
265 
266     if (hwloc_pci_prepare_bridge(obj, config_space_cache) < 0)
267       continue;
268 
269     if (obj->type == HWLOC_OBJ_PCI_DEVICE) {
270       memcpy(&tmp16, &config_space_cache[PCI_SUBSYSTEM_VENDOR_ID], sizeof(tmp16));
271       obj->attr->pcidev.subvendor_id = tmp16;
272       memcpy(&tmp16, &config_space_cache[PCI_SUBSYSTEM_ID], sizeof(tmp16));
273       obj->attr->pcidev.subdevice_id = tmp16;
274     } else {
275       /* TODO:
276        * bridge must lookup PCI_CAP_ID_SSVID and then look at offset+PCI_SSVID_VENDOR/DEVICE_ID
277        * cardbus must look at PCI_CB_SUBSYSTEM_VENDOR_ID and PCI_CB_SUBSYSTEM_ID
278        */
279     }
280 
281     /* get the vendor name */
282     vendorname = pci_device_get_vendor_name(pcidev);
283     if (vendorname && *vendorname)
284       hwloc_obj_add_info(obj, "PCIVendor", vendorname);
285 
286     /* get the device name */
287     devicename = pci_device_get_device_name(pcidev);
288     if (devicename && *devicename)
289       hwloc_obj_add_info(obj, "PCIDevice", devicename);
290 
291     /* generate or get the fullname */
292     snprintf(name, sizeof(name), "%s%s%s",
293 	     vendorname ? vendorname : "",
294 	     vendorname && devicename ? " " : "",
295 	     devicename ? devicename : "");
296     fullname = name;
297     if (*name)
298       obj->name = strdup(name);
299     hwloc_debug("  %04x:%02x:%02x.%01x %04x %04x:%04x %s\n",
300 		domain, pcidev->bus, pcidev->dev, pcidev->func,
301 		device_class, pcidev->vendor_id, pcidev->device_id,
302 		fullname && *fullname ? fullname : "??");
303 
304     /* queue the object for now */
305     if (first_obj)
306       last_obj->next_sibling = obj;
307     else
308       first_obj = obj;
309     last_obj = obj;
310   }
311 
312   /* finalize device scanning */
313   pci_iterator_destroy(iter);
314   pci_system_cleanup();
315   HWLOC_PCIACCESS_UNLOCK();
316 
317 #ifdef HWLOC_LINUX_SYS
318   dir = opendir("/sys/bus/pci/slots/");
319   if (dir) {
320     struct dirent *dirent;
321     while ((dirent = readdir(dir)) != NULL) {
322       char path[64];
323       FILE *file;
324       int err;
325       if (dirent->d_name[0] == '.')
326 	continue;
327       err = snprintf(path, sizeof(path), "/sys/bus/pci/slots/%s/address", dirent->d_name);
328       if ((size_t) err < sizeof(path)) {
329 	file = fopen(path, "r");
330 	if (file) {
331 	  unsigned domain, bus, dev;
332 	  if (fscanf(file, "%x:%x:%x", &domain, &bus, &dev) == 3) {
333 	    hwloc_obj_t obj = first_obj;
334 	    while (obj) {
335 	      if (obj->attr->pcidev.domain == domain
336 		  && obj->attr->pcidev.bus == bus
337 		  && obj->attr->pcidev.dev == dev) {
338 		hwloc_obj_add_info(obj, "PCISlot", dirent->d_name);
339 	      }
340 	      obj = obj->next_sibling;
341 	    }
342 	  }
343 	  fclose(file);
344 	}
345       }
346     }
347     closedir(dir);
348   }
349 #endif
350 
351   return hwloc_insert_pci_device_list(backend, first_obj);
352 }
353 
354 static struct hwloc_backend *
hwloc_pci_component_instantiate(struct hwloc_disc_component * component,const void * _data1 __hwloc_attribute_unused,const void * _data2 __hwloc_attribute_unused,const void * _data3 __hwloc_attribute_unused)355 hwloc_pci_component_instantiate(struct hwloc_disc_component *component,
356 				   const void *_data1 __hwloc_attribute_unused,
357 				   const void *_data2 __hwloc_attribute_unused,
358 				   const void *_data3 __hwloc_attribute_unused)
359 {
360   struct hwloc_backend *backend;
361 
362   /* thissystem may not be fully initialized yet, we'll check flags in discover() */
363 
364   backend = hwloc_backend_alloc(component);
365   if (!backend)
366     return NULL;
367   backend->flags = HWLOC_BACKEND_FLAG_NEED_LEVELS;
368 #ifdef HWLOC_SOLARIS_SYS
369   if ((uid_t)0 != geteuid())
370     backend->discover = NULL;
371   else
372 #endif
373     backend->discover = hwloc_look_pci;
374   return backend;
375 }
376 
377 static struct hwloc_disc_component hwloc_pci_disc_component = {
378   HWLOC_DISC_COMPONENT_TYPE_MISC,
379   "pci",
380   HWLOC_DISC_COMPONENT_TYPE_GLOBAL,
381   hwloc_pci_component_instantiate,
382   20,
383   NULL
384 };
385 
386 static int
hwloc_pci_component_init(unsigned long flags)387 hwloc_pci_component_init(unsigned long flags)
388 {
389   if (flags)
390     return -1;
391   if (hwloc_plugin_check_namespace("pci", "hwloc_backend_alloc") < 0)
392     return -1;
393   return 0;
394 }
395 
396 #ifdef HWLOC_INSIDE_PLUGIN
397 HWLOC_DECLSPEC extern const struct hwloc_component hwloc_pci_component;
398 #endif
399 
400 const struct hwloc_component hwloc_pci_component = {
401   HWLOC_COMPONENT_ABI,
402   hwloc_pci_component_init, NULL,
403   HWLOC_COMPONENT_TYPE_DISC,
404   0,
405   &hwloc_pci_disc_component
406 };
407