1 /*
2  * Copyright © 2010-2020 Inria.  All rights reserved.
3  * Copyright © 2010-2011 Université Bordeaux
4  * Copyright © 2011 Cisco Systems, Inc.  All rights reserved.
5  * See COPYING in top-level directory.
6  */
7 
8 /** \file
9  * \brief Macros to help interaction between hwloc and the CUDA Driver API.
10  *
11  * Applications that use both hwloc and the CUDA Driver API may want to
12  * include this file so as to get topology information for CUDA devices.
13  *
14  */
15 
16 #ifndef HWLOC_CUDA_H
17 #define HWLOC_CUDA_H
18 
19 #include "hwloc.h"
20 #include "hwloc/autogen/config.h"
21 #include "hwloc/helper.h"
22 #ifdef HWLOC_LINUX_SYS
23 #include "hwloc/linux.h"
24 #endif
25 
26 #include <cuda.h>
27 
28 
29 #ifdef __cplusplus
30 extern "C" {
31 #endif
32 
33 
34 /** \defgroup hwlocality_cuda Interoperability with the CUDA Driver API
35  *
36  * This interface offers ways to retrieve topology information about
37  * CUDA devices when using the CUDA Driver API.
38  *
39  * @{
40  */
41 
42 /** \brief Return the domain, bus and device IDs of the CUDA device \p cudevice.
43  *
44  * Device \p cudevice must match the local machine.
45  */
46 static __hwloc_inline int
hwloc_cuda_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unused,CUdevice cudevice,int * domain,int * bus,int * dev)47 hwloc_cuda_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unused,
48 			      CUdevice cudevice, int *domain, int *bus, int *dev)
49 {
50   CUresult cres;
51 
52 #if CUDA_VERSION >= 4000
53   cres = cuDeviceGetAttribute(domain, CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, cudevice);
54   if (cres != CUDA_SUCCESS) {
55     errno = ENOSYS;
56     return -1;
57   }
58 #else
59   *domain = 0;
60 #endif
61   cres = cuDeviceGetAttribute(bus, CU_DEVICE_ATTRIBUTE_PCI_BUS_ID, cudevice);
62   if (cres != CUDA_SUCCESS) {
63     errno = ENOSYS;
64     return -1;
65   }
66   cres = cuDeviceGetAttribute(dev, CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, cudevice);
67   if (cres != CUDA_SUCCESS) {
68     errno = ENOSYS;
69     return -1;
70   }
71 
72   return 0;
73 }
74 
75 /** \brief Get the CPU set of processors that are physically
76  * close to device \p cudevice.
77  *
78  * Return the CPU set describing the locality of the CUDA device \p cudevice.
79  *
80  * Topology \p topology and device \p cudevice must match the local machine.
81  * I/O devices detection and the CUDA component are not needed in the topology.
82  *
83  * The function only returns the locality of the device.
84  * If more information about the device is needed, OS objects should
85  * be used instead, see hwloc_cuda_get_device_osdev()
86  * and hwloc_cuda_get_device_osdev_by_index().
87  *
88  * This function is currently only implemented in a meaningful way for
89  * Linux; other systems will simply get a full cpuset.
90  */
91 static __hwloc_inline int
hwloc_cuda_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,CUdevice cudevice,hwloc_cpuset_t set)92 hwloc_cuda_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
93 			     CUdevice cudevice, hwloc_cpuset_t set)
94 {
95 #ifdef HWLOC_LINUX_SYS
96   /* If we're on Linux, use the sysfs mechanism to get the local cpus */
97 #define HWLOC_CUDA_DEVICE_SYSFS_PATH_MAX 128
98   char path[HWLOC_CUDA_DEVICE_SYSFS_PATH_MAX];
99   int domainid, busid, deviceid;
100 
101   if (hwloc_cuda_get_device_pci_ids(topology, cudevice, &domainid, &busid, &deviceid))
102     return -1;
103 
104   if (!hwloc_topology_is_thissystem(topology)) {
105     errno = EINVAL;
106     return -1;
107   }
108 
109   sprintf(path, "/sys/bus/pci/devices/%04x:%02x:%02x.0/local_cpus", domainid, busid, deviceid);
110   if (hwloc_linux_read_path_as_cpumask(path, set) < 0
111       || hwloc_bitmap_iszero(set))
112     hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
113 #else
114   /* Non-Linux systems simply get a full cpuset */
115   hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
116 #endif
117   return 0;
118 }
119 
120 /** \brief Get the hwloc PCI device object corresponding to the
121  * CUDA device \p cudevice.
122  *
123  * Return the PCI device object describing the CUDA device \p cudevice.
124  * Return NULL if there is none.
125  *
126  * Topology \p topology and device \p cudevice must match the local machine.
127  * I/O devices detection must be enabled in topology \p topology.
128  * The CUDA component is not needed in the topology.
129  */
130 static __hwloc_inline hwloc_obj_t
hwloc_cuda_get_device_pcidev(hwloc_topology_t topology,CUdevice cudevice)131 hwloc_cuda_get_device_pcidev(hwloc_topology_t topology, CUdevice cudevice)
132 {
133   int domain, bus, dev;
134 
135   if (hwloc_cuda_get_device_pci_ids(topology, cudevice, &domain, &bus, &dev))
136     return NULL;
137 
138   return hwloc_get_pcidev_by_busid(topology, domain, bus, dev, 0);
139 }
140 
141 /** \brief Get the hwloc OS device object corresponding to CUDA device \p cudevice.
142  *
143  * Return the hwloc OS device object that describes the given
144  * CUDA device \p cudevice. Return NULL if there is none.
145  *
146  * Topology \p topology and device \p cudevice must match the local machine.
147  * I/O devices detection and the CUDA component must be enabled in the topology.
148  * If not, the locality of the object may still be found using
149  * hwloc_cuda_get_device_cpuset().
150  *
151  * \note This function cannot work if PCI devices are filtered out.
152  *
153  * \note The corresponding hwloc PCI device may be found by looking
154  * at the result parent pointer (unless PCI devices are filtered out).
155  */
156 static __hwloc_inline hwloc_obj_t
hwloc_cuda_get_device_osdev(hwloc_topology_t topology,CUdevice cudevice)157 hwloc_cuda_get_device_osdev(hwloc_topology_t topology, CUdevice cudevice)
158 {
159 	hwloc_obj_t osdev = NULL;
160 	int domain, bus, dev;
161 
162 	if (hwloc_cuda_get_device_pci_ids(topology, cudevice, &domain, &bus, &dev))
163 		return NULL;
164 
165 	osdev = NULL;
166 	while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
167 		hwloc_obj_t pcidev = osdev->parent;
168 		if (strncmp(osdev->name, "cuda", 4))
169 			continue;
170 		if (pcidev
171 		    && pcidev->type == HWLOC_OBJ_PCI_DEVICE
172 		    && (int) pcidev->attr->pcidev.domain == domain
173 		    && (int) pcidev->attr->pcidev.bus == bus
174 		    && (int) pcidev->attr->pcidev.dev == dev
175 		    && pcidev->attr->pcidev.func == 0)
176 			return osdev;
177 		/* if PCI are filtered out, we need a info attr to match on */
178 	}
179 
180 	return NULL;
181 }
182 
183 /** \brief Get the hwloc OS device object corresponding to the
184  * CUDA device whose index is \p idx.
185  *
186  * Return the OS device object describing the CUDA device whose
187  * index is \p idx. Return NULL if there is none.
188  *
189  * The topology \p topology does not necessarily have to match the current
190  * machine. For instance the topology may be an XML import of a remote host.
191  * I/O devices detection and the CUDA component must be enabled in the topology.
192  *
193  * \note The corresponding PCI device object can be obtained by looking
194  * at the OS device parent object (unless PCI devices are filtered out).
195  *
196  * \note This function is identical to hwloc_cudart_get_device_osdev_by_index().
197  */
198 static __hwloc_inline hwloc_obj_t
hwloc_cuda_get_device_osdev_by_index(hwloc_topology_t topology,unsigned idx)199 hwloc_cuda_get_device_osdev_by_index(hwloc_topology_t topology, unsigned idx)
200 {
201 	hwloc_obj_t osdev = NULL;
202 	while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
203 		if (HWLOC_OBJ_OSDEV_COPROC == osdev->attr->osdev.type
204 		    && osdev->name
205 		    && !strncmp("cuda", osdev->name, 4)
206 		    && atoi(osdev->name + 4) == (int) idx)
207 			return osdev;
208 	}
209 	return NULL;
210 }
211 
212 /** @} */
213 
214 
215 #ifdef __cplusplus
216 } /* extern "C" */
217 #endif
218 
219 
220 #endif /* HWLOC_CUDA_H */
221