1 /*
2  * Copyright © 2012-2017 Inria.  All rights reserved.
3  * Copyright © 2013, 2018 Université Bordeaux.  All right reserved.
4  * See COPYING in top-level directory.
5  */
6 
7 /** \file
8  * \brief Macros to help interaction between hwloc and the OpenCL interface.
9  *
10  * Applications that use both hwloc and OpenCL may want to
11  * include this file so as to get topology information for OpenCL devices.
12  */
13 
14 #ifndef HWLOC_OPENCL_H
15 #define HWLOC_OPENCL_H
16 
17 #include <hwloc.h>
18 #include <hwloc/autogen/config.h>
19 #include <hwloc/helper.h>
20 #ifdef HWLOC_LINUX_SYS
21 #include <hwloc/linux.h>
22 #endif
23 
24 #ifdef __APPLE__
25 #include <OpenCL/cl.h>
26 #include <OpenCL/cl_ext.h>
27 #else
28 #include <CL/cl.h>
29 #include <CL/cl_ext.h>
30 #endif
31 
32 #include <stdio.h>
33 
34 
35 #ifdef __cplusplus
36 extern "C" {
37 #endif
38 
39 
40 /** \defgroup hwlocality_opencl Interoperability with OpenCL
41  *
42  * This interface offers ways to retrieve topology information about
43  * OpenCL devices.
44  *
45  * Only the AMD OpenCL interface currently offers useful locality information
46  * about its devices.
47  *
48  * @{
49  */
50 
51 /** \brief Get the CPU set of logical processors that are physically
52  * close to OpenCL device \p device.
53  *
54  * Return the CPU set describing the locality of the OpenCL device \p device.
55  *
56  * Topology \p topology and device \p device must match the local machine.
57  * I/O devices detection and the OpenCL component are not needed in the topology.
58  *
59  * The function only returns the locality of the device.
60  * If more information about the device is needed, OS objects should
61  * be used instead, see hwloc_opencl_get_device_osdev()
62  * and hwloc_opencl_get_device_osdev_by_index().
63  *
64  * This function is currently only implemented in a meaningful way for
65  * Linux with the AMD OpenCL implementation; other systems will simply
66  * get a full cpuset.
67  */
68 static __hwloc_inline int
hwloc_opencl_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,cl_device_id device __hwloc_attribute_unused,hwloc_cpuset_t set)69 hwloc_opencl_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
70 			       cl_device_id device __hwloc_attribute_unused,
71 			       hwloc_cpuset_t set)
72 {
73 #if (defined HWLOC_LINUX_SYS) && (defined CL_DEVICE_TOPOLOGY_AMD)
74 	/* If we're on Linux + AMD OpenCL, use the AMD extension + the sysfs mechanism to get the local cpus */
75 #define HWLOC_OPENCL_DEVICE_SYSFS_PATH_MAX 128
76 	char path[HWLOC_OPENCL_DEVICE_SYSFS_PATH_MAX];
77 	FILE *sysfile = NULL;
78 	cl_device_topology_amd amdtopo;
79 	cl_int clret;
80 
81 	if (!hwloc_topology_is_thissystem(topology)) {
82 		errno = EINVAL;
83 		return -1;
84 	}
85 
86 	clret = clGetDeviceInfo(device, CL_DEVICE_TOPOLOGY_AMD, sizeof(amdtopo), &amdtopo, NULL);
87 	if (CL_SUCCESS != clret) {
88 		hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
89 		return 0;
90 	}
91 	if (CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD != amdtopo.raw.type) {
92 		hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
93 		return 0;
94 	}
95 
96 	sprintf(path, "/sys/bus/pci/devices/0000:%02x:%02x.%01x/local_cpus",
97 		(unsigned) amdtopo.pcie.bus, (unsigned) amdtopo.pcie.device, (unsigned) amdtopo.pcie.function);
98 	sysfile = fopen(path, "r");
99 	if (!sysfile)
100 		return -1;
101 
102 	if (hwloc_linux_parse_cpumap_file(sysfile, set) < 0
103 	    || hwloc_bitmap_iszero(set))
104 		hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
105 
106 	fclose(sysfile);
107 #else
108 	/* Non-Linux + AMD OpenCL systems simply get a full cpuset */
109 	hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
110 #endif
111   return 0;
112 }
113 
114 /** \brief Get the hwloc OS device object corresponding to the
115  * OpenCL device for the given indexes.
116  *
117  * Return the OS device object describing the OpenCL device
118  * whose platform index is \p platform_index,
119  * and whose device index within this platform if \p device_index.
120  * Return NULL if there is none.
121  *
122  * The topology \p topology does not necessarily have to match the current
123  * machine. For instance the topology may be an XML import of a remote host.
124  * I/O devices detection and the OpenCL component must be enabled in the topology.
125  *
126  * \note The corresponding PCI device object can be obtained by looking
127  * at the OS device parent object.
128  */
129 static __hwloc_inline hwloc_obj_t
hwloc_opencl_get_device_osdev_by_index(hwloc_topology_t topology,unsigned platform_index,unsigned device_index)130 hwloc_opencl_get_device_osdev_by_index(hwloc_topology_t topology,
131 				       unsigned platform_index, unsigned device_index)
132 {
133 	unsigned x = (unsigned) -1, y = (unsigned) -1;
134 	hwloc_obj_t osdev = NULL;
135 	while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
136 		if (HWLOC_OBJ_OSDEV_COPROC == osdev->attr->osdev.type
137                     && osdev->name
138 		    && sscanf(osdev->name, "opencl%ud%u", &x, &y) == 2
139 		    && platform_index == x && device_index == y)
140                         return osdev;
141         }
142         return NULL;
143 }
144 
145 /** \brief Get the hwloc OS device object corresponding to OpenCL device \p device.
146  *
147  * Return the hwloc OS device object that describes the given
148  * OpenCL device \p device. Return NULL if there is none.
149  *
150  * Topology \p topology and device \p device must match the local machine.
151  * I/O devices detection and the OpenCL component must be enabled in the topology.
152  * If not, the locality of the object may still be found using
153  * hwloc_opencl_get_device_cpuset().
154  *
155  * \note The corresponding hwloc PCI device may be found by looking
156  * at the result parent pointer.
157  */
158 static __hwloc_inline hwloc_obj_t
hwloc_opencl_get_device_osdev(hwloc_topology_t topology __hwloc_attribute_unused,cl_device_id device __hwloc_attribute_unused)159 hwloc_opencl_get_device_osdev(hwloc_topology_t topology __hwloc_attribute_unused,
160 			      cl_device_id device __hwloc_attribute_unused)
161 {
162 #ifdef CL_DEVICE_TOPOLOGY_AMD
163 	hwloc_obj_t osdev;
164 	cl_device_topology_amd amdtopo;
165 	cl_int clret;
166 
167 	clret = clGetDeviceInfo(device, CL_DEVICE_TOPOLOGY_AMD, sizeof(amdtopo), &amdtopo, NULL);
168 	if (CL_SUCCESS != clret) {
169 		errno = EINVAL;
170 		return NULL;
171 	}
172 	if (CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD != amdtopo.raw.type) {
173 		errno = EINVAL;
174 		return NULL;
175 	}
176 
177 	osdev = NULL;
178 	while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
179 		hwloc_obj_t pcidev = osdev->parent;
180 		if (strncmp(osdev->name, "opencl", 6))
181 			continue;
182 		if (pcidev
183 		    && pcidev->type == HWLOC_OBJ_PCI_DEVICE
184 		    && pcidev->attr->pcidev.domain == 0
185 		    && pcidev->attr->pcidev.bus == amdtopo.pcie.bus
186 		    && pcidev->attr->pcidev.dev == amdtopo.pcie.device
187 		    && pcidev->attr->pcidev.func == amdtopo.pcie.function)
188 			return osdev;
189 	}
190 
191 	return NULL;
192 #else
193 	return NULL;
194 #endif
195 }
196 
197 /** @} */
198 
199 
200 #ifdef __cplusplus
201 } /* extern "C" */
202 #endif
203 
204 
205 #endif /* HWLOC_OPENCL_H */
206