1 /*
2  * Copyright © 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person
5  * obtaining a copy of this software and associated documentation
6  * files (the "Software"), to deal in the Software without
7  * restriction, including without limitation the rights to use, copy,
8  * modify, merge, publish, distribute, sublicense, and/or sell copies
9  * of the Software, and to permit persons to whom the Software is
10  * furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including
13  * the next paragraph) shall be included in all copies or substantial
14  * portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19  * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
20  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
21  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23  * DEALINGS IN THE SOFTWARE.
24  */
25 
26 #define _GNU_SOURCE
27 #include <assert.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <dirent.h>
31 #if !defined(__FreeBSD__) && !defined(__DragonFly__)
32 #include <malloc.h>
33 #endif
34 #include <string.h>
35 #include <unistd.h>
36 #include <ctype.h>
37 #include <sched.h>
38 #include <pci/pci.h>
39 #ifdef __FreeBSD__
40 #include <sys/cpuset.h>
41 #endif
42 
43 #include "libhsakmt.h"
44 #include "fmm.h"
45 
46 /* Number of memory banks added by thunk on top of topology */
47 #define NUM_OF_IGPU_HEAPS 3
48 #define NUM_OF_DGPU_HEAPS 3
49 /* SYSFS related */
50 #define KFD_SYSFS_PATH_GENERATION_ID "/sys/devices/virtual/kfd/kfd/topology/generation_id"
51 #define KFD_SYSFS_PATH_SYSTEM_PROPERTIES "/sys/devices/virtual/kfd/kfd/topology/system_properties"
52 #define KFD_SYSFS_PATH_NODES "/sys/devices/virtual/kfd/kfd/topology/nodes"
53 #define PROC_CPUINFO_PATH "/proc/cpuinfo"
54 
55 typedef struct {
56 	uint32_t gpu_id;
57 	HsaNodeProperties node;
58 	HsaMemoryProperties *mem;     /* node->NumBanks elements */
59 	HsaCacheProperties *cache;
60 	HsaIoLinkProperties *link;
61 } node_t;
62 
63 static HsaSystemProperties *_system = NULL;
64 static node_t *node = NULL;
65 static int is_valgrind;
66 
67 static int processor_vendor;
68 /* Supported System Vendors */
69 enum SUPPORTED_PROCESSOR_VENDORS {
70 	GENUINE_INTEL = 0,
71 	AUTHENTIC_AMD
72 };
73 /* Adding newline to make the search easier */
74 static const char *supported_processor_vendor_name[] = {
75 	"GenuineIntel\n",
76 	"AuthenticAMD\n"
77 };
78 
79 static HSAKMT_STATUS topology_take_snapshot(void);
80 static HSAKMT_STATUS topology_drop_snapshot(void);
81 
82 static struct hsa_gfxip_table {
83 	uint16_t device_id;		// Device ID
84 	unsigned char major;		// GFXIP Major engine version
85 	unsigned char minor;		// GFXIP Minor engine version
86 	unsigned char stepping;		// GFXIP Stepping info
87 	unsigned char is_dgpu;		// Predicate for dGPU devices
88 	const char *amd_name;		// CALName of the device
89 	enum asic_family_type asic_family;
90 } gfxip_lookup_table[] = {
91 	/* Kaveri Family */
92 	{ 0x1304, 7, 0, 0, 0, "Spectre", CHIP_KAVERI },
93 	{ 0x1305, 7, 0, 0, 0, "Spectre", CHIP_KAVERI },
94 	{ 0x1306, 7, 0, 0, 0, "Spectre", CHIP_KAVERI },
95 	{ 0x1307, 7, 0, 0, 0, "Spectre", CHIP_KAVERI },
96 	{ 0x1309, 7, 0, 0, 0, "Spectre", CHIP_KAVERI },
97 	{ 0x130A, 7, 0, 0, 0, "Spectre", CHIP_KAVERI },
98 	{ 0x130B, 7, 0, 0, 0, "Spectre", CHIP_KAVERI },
99 	{ 0x130C, 7, 0, 0, 0, "Spectre", CHIP_KAVERI },
100 	{ 0x130D, 7, 0, 0, 0, "Spectre", CHIP_KAVERI },
101 	{ 0x130E, 7, 0, 0, 0, "Spectre", CHIP_KAVERI },
102 	{ 0x130F, 7, 0, 0, 0, "Spectre", CHIP_KAVERI },
103 	{ 0x1310, 7, 0, 0, 0, "Spectre", CHIP_KAVERI },
104 	{ 0x1311, 7, 0, 0, 0, "Spectre", CHIP_KAVERI },
105 	{ 0x1312, 7, 0, 0, 0, "Spooky", CHIP_KAVERI },
106 	{ 0x1313, 7, 0, 0, 0, "Spectre", CHIP_KAVERI },
107 	{ 0x1315, 7, 0, 0, 0, "Spectre", CHIP_KAVERI },
108 	{ 0x1316, 7, 0, 0, 0, "Spooky", CHIP_KAVERI },
109 	{ 0x1317, 7, 0, 0, 0, "Spooky", CHIP_KAVERI },
110 	{ 0x1318, 7, 0, 0, 0, "Spectre", CHIP_KAVERI },
111 	{ 0x131B, 7, 0, 0, 0, "Spectre", CHIP_KAVERI },
112 	{ 0x131C, 7, 0, 0, 0, "Spectre", CHIP_KAVERI },
113 	{ 0x131D, 7, 0, 0, 0, "Spectre", CHIP_KAVERI },
114 	/* Hawaii Family */
115 	{ 0x67A0, 7, 0, 1, 1, "Hawaii", CHIP_HAWAII },
116 	{ 0x67A1, 7, 0, 1, 1, "Hawaii", CHIP_HAWAII },
117 	{ 0x67A2, 7, 0, 1, 1, "Hawaii", CHIP_HAWAII },
118 	{ 0x67A8, 7, 0, 1, 1, "Hawaii", CHIP_HAWAII },
119 	{ 0x67A9, 7, 0, 1, 1, "Hawaii", CHIP_HAWAII },
120 	{ 0x67AA, 7, 0, 1, 1, "Hawaii", CHIP_HAWAII },
121 	{ 0x67B0, 7, 0, 1, 1, "Hawaii", CHIP_HAWAII },
122 	{ 0x67B1, 7, 0, 1, 1, "Hawaii", CHIP_HAWAII },
123 	{ 0x67B8, 7, 0, 1, 1, "Hawaii", CHIP_HAWAII },
124 	{ 0x67B9, 7, 0, 1, 1, "Hawaii", CHIP_HAWAII },
125 	{ 0x67BA, 7, 0, 1, 1, "Hawaii", CHIP_HAWAII },
126 	{ 0x67BE, 7, 0, 1, 1, "Hawaii", CHIP_HAWAII },
127 	/* Carrizo Family */
128 	{ 0x9870, 8, 0, 1, 0, "Carrizo", CHIP_CARRIZO },
129 	{ 0x9874, 8, 0, 1, 0, "Carrizo", CHIP_CARRIZO },
130 	{ 0x9875, 8, 0, 1, 0, "Carrizo", CHIP_CARRIZO },
131 	{ 0x9876, 8, 0, 1, 0, "Carrizo", CHIP_CARRIZO },
132 	{ 0x9877, 8, 0, 1, 0, "Carrizo", CHIP_CARRIZO },
133 	/* Tonga Family */
134 	{ 0x6920, 8, 0, 2, 1, "Tonga", CHIP_TONGA },
135 	{ 0x6921, 8, 0, 2, 1, "Tonga", CHIP_TONGA },
136 	{ 0x6928, 8, 0, 2, 1, "Tonga", CHIP_TONGA },
137 	{ 0x6929, 8, 0, 2, 1, "Tonga", CHIP_TONGA },
138 	{ 0x692B, 8, 0, 2, 1, "Tonga", CHIP_TONGA },
139 	{ 0x692F, 8, 0, 2, 1, "Tonga", CHIP_TONGA },
140 	{ 0x6930, 8, 0, 2, 1, "Tonga", CHIP_TONGA },
141 	{ 0x6938, 8, 0, 2, 1, "Tonga", CHIP_TONGA },
142 	{ 0x6939, 8, 0, 2, 1, "Tonga", CHIP_TONGA },
143 	/* Fiji */
144 	{ 0x7300, 8, 0, 3, 1, "Fiji", CHIP_FIJI },
145 	{ 0x730F, 8, 0, 3, 1, "Fiji", CHIP_FIJI },
146 	/* Polaris10 */
147 	{ 0x67C0, 8, 0, 3, 1, "Polaris10", CHIP_POLARIS10 },
148 	{ 0x67C1, 8, 0, 3, 1, "Polaris10", CHIP_POLARIS10 },
149 	{ 0x67C2, 8, 0, 3, 1, "Polaris10", CHIP_POLARIS10 },
150 	{ 0x67C4, 8, 0, 3, 1, "Polaris10", CHIP_POLARIS10 },
151 	{ 0x67C7, 8, 0, 3, 1, "Polaris10", CHIP_POLARIS10 },
152 	{ 0x67C8, 8, 0, 3, 1, "Polaris10", CHIP_POLARIS10 },
153 	{ 0x67C9, 8, 0, 3, 1, "Polaris10", CHIP_POLARIS10 },
154 	{ 0x67CA, 8, 0, 3, 1, "Polaris10", CHIP_POLARIS10 },
155 	{ 0x67CC, 8, 0, 3, 1, "Polaris10", CHIP_POLARIS10 },
156 	{ 0x67CF, 8, 0, 3, 1, "Polaris10", CHIP_POLARIS10 },
157 	{ 0x67D0, 8, 0, 3, 1, "Polaris10", CHIP_POLARIS10 },
158 	{ 0x67DF, 8, 0, 3, 1, "Polaris10", CHIP_POLARIS10 },
159 	/* Polaris11 */
160 	{ 0x67E0, 8, 0, 3, 1, "Polaris11", CHIP_POLARIS11 },
161 	{ 0x67E1, 8, 0, 3, 1, "Polaris11", CHIP_POLARIS11 },
162 	{ 0x67E3, 8, 0, 3, 1, "Polaris11", CHIP_POLARIS11 },
163 	{ 0x67E7, 8, 0, 3, 1, "Polaris11", CHIP_POLARIS11 },
164 	{ 0x67E8, 8, 0, 3, 1, "Polaris11", CHIP_POLARIS11 },
165 	{ 0x67E9, 8, 0, 3, 1, "Polaris11", CHIP_POLARIS11 },
166 	{ 0x67EB, 8, 0, 3, 1, "Polaris11", CHIP_POLARIS11 },
167 	{ 0x67EF, 8, 0, 3, 1, "Polaris11", CHIP_POLARIS11 },
168 	{ 0x67FF, 8, 0, 3, 1, "Polaris11", CHIP_POLARIS11 },
169 	/* Vega10 */
170 	{ 0x6860, 9, 0, 0, 1, "Vega10", CHIP_VEGA10 },
171 	{ 0x6861, 9, 0, 0, 1, "Vega10", CHIP_VEGA10 },
172 	{ 0x6862, 9, 0, 0, 1, "Vega10", CHIP_VEGA10 },
173 	{ 0x6863, 9, 0, 0, 1, "Vega10", CHIP_VEGA10 },
174 	{ 0x6864, 9, 0, 0, 1, "Vega10", CHIP_VEGA10 },
175 	{ 0x6867, 9, 0, 0, 1, "Vega10", CHIP_VEGA10 },
176 	{ 0x6868, 9, 0, 0, 1, "Vega10", CHIP_VEGA10 },
177 	{ 0x686C, 9, 0, 0, 1, "Vega10", CHIP_VEGA10 },
178 	{ 0x687F, 9, 0, 0, 1, "Vega10", CHIP_VEGA10 },
179 	/* Vega12 */
180 	{ 0x69A0, 9, 0, 4, 1, "Vega12", CHIP_VEGA10 },
181 	{ 0x69A1, 9, 0, 4, 1, "Vega12", CHIP_VEGA10 },
182 	{ 0x69A3, 9, 0, 4, 1, "Vega12", CHIP_VEGA10 },
183 	{ 0x69Af, 9, 0, 4, 1, "Vega12", CHIP_VEGA10 },
184 	/* Raven */
185 	{ 0x15DD, 9, 0, 2, 0, "Raven", CHIP_RAVEN },
186 	/* Vega20 */
187 	{ 0x66A0, 9, 0, 6, 1, "Vega20", CHIP_VEGA20 },
188 	{ 0x66A1, 9, 0, 6, 1, "Vega20", CHIP_VEGA20 },
189 	{ 0x66A2, 9, 0, 6, 1, "Vega20", CHIP_VEGA20 },
190 	{ 0x66A3, 9, 0, 6, 1, "Vega20", CHIP_VEGA20 },
191 	{ 0x66A7, 9, 0, 6, 1, "Vega20", CHIP_VEGA20 },
192 	{ 0x66AF, 9, 0, 6, 1, "Vega20", CHIP_VEGA20 },
193 };
194 
195 enum cache_type {
196 	CACHE_TYPE_NULL = 0,
197 	CACHE_TYPE_DATA = 1,
198 	CACHE_TYPE_INST = 2,
199 	CACHE_TYPE_UNIFIED = 3
200 };
201 
202 typedef struct cacheinfo {
203 	HsaCacheProperties hsa_cache_prop;
204 	uint32_t num_threads_sharing; /* how many CPUs share this cache */
205 } cacheinfo_t;
206 
207 /* CPU cache table for all CPUs on the system. Each entry has the relative CPU
208  * info and caches connected to that CPU.
209  */
210 typedef struct cpu_cacheinfo {
211 	uint32_t len; /* length of the table -> number of online procs */
212 	uint32_t num_caches; /* number of caches connected to this cpu */
213 	uint32_t num_duplicated_caches; /* to count caches being shared */
214 	uint32_t apicid; /* this cpu's apic id */
215 	uint32_t max_num_apicid; /* max number of addressable IDs */
216 	cacheinfo_t *cache_info; /* an array for cache information */
217 } cpu_cacheinfo_t;
218 
219 /* Deterministic Cache Parameters Leaf in cpuid */
220 union _cpuid_leaf_eax { /* Register EAX */
221 	struct {
222 		enum cache_type	type:5;
223 		uint32_t	level:3;
224 		uint32_t	is_self_initializing:1;
225 		uint32_t	is_fully_associative:1;
226 		uint32_t	reserved:4;
227 		uint32_t	num_threads_sharing:12;
228 		uint32_t	num_cores_on_die:6;
229 	} split;
230 	uint32_t full;
231 };
232 
233 union _cpuid_leaf_ebx { /* Register EBX */
234 	struct {
235 		uint32_t	coherency_line_size:12;
236 		uint32_t	physical_line_partition:10;
237 		uint32_t	ways_of_associativity:10;
238 	} split;
239 	uint32_t full;
240 };
241 
242 static void
free_node(node_t * n)243 free_node(node_t *n)
244 {
245 	assert(n);
246 
247 	if (!n)
248 		return;
249 
250 	if ((n)->mem)
251 		free((n)->mem);
252 	if ((n)->cache)
253 		free((n)->cache);
254 	if ((n)->link)
255 		free((n)->link);
256 }
257 
free_nodes(node_t * temp_nodes,int size)258 static void free_nodes(node_t *temp_nodes, int size)
259 {
260 	int i;
261 
262 	if (temp_nodes) {
263 		for (i = 0; i < size; i++)
264 			free_node(&temp_nodes[i]);
265 		free(temp_nodes);
266 	}
267 }
268 
269 /* num_subdirs - find the number of sub-directories in the specified path
270  *	@dirpath - directory path to find sub-directories underneath
271  *	@prefix - only count sub-directory names starting with prefix.
272  *		Use blank string, "", to count all.
273  *	Return - number of sub-directories
274  */
num_subdirs(char * dirpath,char * prefix)275 static int num_subdirs(char *dirpath, char *prefix)
276 {
277 	int count = 0;
278 	DIR *dirp;
279 	struct dirent *dir;
280 	int prefix_len = strlen(prefix);
281 
282 	dirp = opendir(dirpath);
283 	if (dirp) {
284 		while ((dir = readdir(dirp)) != 0) {
285 			if ((strcmp(dir->d_name, ".") == 0) ||
286 				(strcmp(dir->d_name, "..") == 0))
287 				continue;
288 			if (prefix_len &&
289 				strncmp(dir->d_name, prefix, prefix_len))
290 				continue;
291 			count++;
292 		}
293 		closedir(dirp);
294 	}
295 
296 	return count;
297 }
298 
299 #if defined(__x86_64__) || defined(__i386__)
300 /* cpuid instruction returns processor identification and feature information
301  * to the EAX, EBX, ECX, and EDX registers, as determined by input entered in
302  * EAX (in some cases, ECX as well).
303  */
cpuid(uint32_t * eax,uint32_t * ebx,uint32_t * ecx,uint32_t * edx)304 static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx,
305 			 uint32_t *edx)
306 {
307 	__asm__ __volatile__(
308 		"cpuid;"
309 		: "=a" (*eax),
310 		  "=b" (*ebx),
311 		  "=c" (*ecx),
312 		  "=d" (*edx)
313 		: "0" (*eax), "2" (*ecx)
314 		: "memory"
315 	);
316 }
317 
318 /* In cases ECX is also used as an input for cpuid, i.e. cache leaf */
cpuid_count(uint32_t op,int count,uint32_t * eax,uint32_t * ebx,uint32_t * ecx,uint32_t * edx)319 static void cpuid_count(uint32_t op, int count, uint32_t *eax, uint32_t *ebx,
320 			uint32_t *ecx, uint32_t *edx)
321 {
322 	*eax = op;
323 	*ecx = count;
324 	cpuid(eax, ebx, ecx, edx);
325 }
326 
327 /* Lock current process to the specified processor */
lock_to_processor(int processor)328 static int lock_to_processor(int processor)
329 {
330 #ifdef __FreeBSD__
331 	cpuset_t set;
332 
333 	CPU_ZERO(&set);
334 	CPU_SET(processor, &set);
335 	return cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, sizeof(set), &set);
336 #else
337 	cpu_set_t cpuset;
338 
339 	memset(&cpuset, 0, sizeof(cpu_set_t));
340 	CPU_SET(processor, &cpuset);
341 	/* 0: this process */
342 	return sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
343 #endif
344 }
345 
346 /* Get count's order of 2. In other words, 2^rtn_value = count
347  * When count is not an order of 2, round it up to the closest.
348  */
get_count_order(unsigned int count)349 static int get_count_order(unsigned int count)
350 {
351 	int bit;
352 	uint32_t num;
353 
354 	for (bit = 31; bit >= 0; bit--) {
355 		num = 1 << bit;
356 		if (count >= num)
357 			break;
358 	}
359 	if (count & (count - 1))
360 		++bit;
361 
362 	return bit;
363 }
364 
365 /* cpuid_find_num_cache_leaves - Use cpuid instruction to find out how many
366  *		cache leaves the CPU has.
367  *	@op - cpuid opcode to get cache information
368  *	Return - the number of cache leaves
369  */
cpuid_find_num_cache_leaves(uint32_t op)370 static int cpuid_find_num_cache_leaves(uint32_t op)
371 {
372 	union _cpuid_leaf_eax eax;
373 	union _cpuid_leaf_ebx ebx;
374 	unsigned int ecx;
375 	unsigned int edx;
376 	int idx = -1;
377 
378 	do {
379 		++idx;
380 		cpuid_count(op, idx, &eax.full, &ebx.full, &ecx, &edx);
381 		/* Modern systems have cache levels up to 3. */
382 	} while (eax.split.type != CACHE_TYPE_NULL && idx < 4);
383 	return idx;
384 }
385 
386 /* cpuid_get_cpu_cache_info - Use cpuid instruction to get cache information
387  *	@op - cpuid opcode to get cache information
388  *	@cpu_ci - this parameter is an input and also an output.
389  *		  [IN] cpu_ci->num_caches: the number of caches of this cpu
390  *		  [OUT] cpu_ci->cache_info: to store cache info collected
391  */
cpuid_get_cpu_cache_info(uint32_t op,cpu_cacheinfo_t * cpu_ci)392 static void cpuid_get_cpu_cache_info(uint32_t op, cpu_cacheinfo_t *cpu_ci)
393 {
394 	union _cpuid_leaf_eax eax;
395 	union _cpuid_leaf_ebx ebx;
396 	uint32_t ecx;
397 	uint32_t edx;
398 	uint32_t index;
399 	cacheinfo_t *this_leaf;
400 
401 	for (index = 0; index < cpu_ci->num_caches; index++) {
402 		cpuid_count(op, index, &eax.full, &ebx.full, &ecx, &edx);
403 		this_leaf = cpu_ci->cache_info + index;
404 		this_leaf->hsa_cache_prop.ProcessorIdLow = cpu_ci->apicid;
405 		this_leaf->num_threads_sharing =
406 				eax.split.num_threads_sharing + 1;
407 		this_leaf->hsa_cache_prop.CacheLevel = eax.split.level;
408 		this_leaf->hsa_cache_prop.CacheType.ui32.CPU = 1;
409 		if (eax.split.type & CACHE_TYPE_DATA)
410 			this_leaf->hsa_cache_prop.CacheType.ui32.Data = 1;
411 		if (eax.split.type & CACHE_TYPE_INST)
412 			this_leaf->hsa_cache_prop.CacheType.ui32.Instruction = 1;
413 		this_leaf->hsa_cache_prop.CacheLineSize =
414 				ebx.split.coherency_line_size + 1;
415 		this_leaf->hsa_cache_prop.CacheAssociativity =
416 				ebx.split.ways_of_associativity + 1;
417 		this_leaf->hsa_cache_prop.CacheLinesPerTag =
418 				ebx.split.physical_line_partition + 1;
419 		this_leaf->hsa_cache_prop.CacheSize = (ecx + 1) *
420 				(ebx.split.coherency_line_size	   + 1) *
421 				(ebx.split.physical_line_partition + 1) *
422 				(ebx.split.ways_of_associativity   + 1);
423 	}
424 }
425 
426 /* find_cpu_cache_siblings - In the cache list, some caches may be listed more
427  *	than once if they are shared by multiple CPUs. Identify the cache's CPU
428  *	siblings, record it to SiblingMap[], then remove the duplicated cache by
429  *	changing the cache size to 0.
430  */
find_cpu_cache_siblings(cpu_cacheinfo_t * cpu_ci_list)431 static void find_cpu_cache_siblings(cpu_cacheinfo_t *cpu_ci_list)
432 {
433 	cacheinfo_t *this_leaf, *leaf2;
434 	uint32_t n, j, idx_msb, apicid1, apicid2;
435 	cpu_cacheinfo_t *this_cpu, *cpu2;
436 	uint32_t index;
437 
438 	/* FixMe: cpuid under Valgrind doesn't return data from the processor we set
439 	 * affinity to. We can't use that data to calculate siblings.
440 	 */
441 	if (is_valgrind)
442 		return;
443 
444 	for (n = 0; n < cpu_ci_list->len; n++) {
445 		this_cpu = cpu_ci_list + n;
446 		for (index = 0; index < this_cpu->num_caches; index++) {
447 			this_leaf = this_cpu->cache_info + index;
448 			/* CacheSize 0 means an invalid cache */
449 			if (!this_leaf->hsa_cache_prop.CacheSize)
450 				continue;
451 			if (this_leaf->num_threads_sharing == 1) // no siblings
452 				continue;
453 			idx_msb = get_count_order(this_leaf->num_threads_sharing);
454 			for (j = n + 1; j < cpu_ci_list->len; j++) {
455 				cpu2 = cpu_ci_list + j;
456 				leaf2 = cpu2->cache_info + index;
457 				apicid1 = this_leaf->hsa_cache_prop.ProcessorIdLow;
458 				apicid2 = leaf2->hsa_cache_prop.ProcessorIdLow;
459 				if ((apicid2 >> idx_msb) != (apicid1 >> idx_msb))
460 					continue;
461 				/* A sibling leaf is found. Cache properties
462 				 * use ProcIdLow as offset to represent siblings
463 				 * in SiblingMap, so keep the lower apicid and
464 				 * delete the other by changing CacheSize to 0.
465 				 */
466 				if (apicid1 < apicid2) {
467 					this_leaf->hsa_cache_prop.SiblingMap[0] = 1;
468 					this_leaf->hsa_cache_prop.SiblingMap[apicid2 - apicid1] = 1;
469 					leaf2->hsa_cache_prop.CacheSize = 0;
470 					cpu2->num_duplicated_caches++;
471 				} else {
472 					leaf2->hsa_cache_prop.SiblingMap[0] = 1;
473 					leaf2->hsa_cache_prop.SiblingMap[apicid1 - apicid2] = 1;
474 					this_leaf->hsa_cache_prop.CacheSize = 0;
475 					this_cpu->num_duplicated_caches++;
476 				}
477 			}
478 		}
479 	}
480 }
481 #endif /* X86 platform */
482 
topology_sysfs_get_generation(uint32_t * gen)483 static HSAKMT_STATUS topology_sysfs_get_generation(uint32_t *gen)
484 {
485 	FILE *fd;
486 	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
487 
488 	assert(gen);
489 	fd = fopen(KFD_SYSFS_PATH_GENERATION_ID, "r");
490 	if (!fd)
491 		return HSAKMT_STATUS_ERROR;
492 	if (fscanf(fd, "%ul", gen) != 1) {
493 		ret = HSAKMT_STATUS_ERROR;
494 		goto err;
495 	}
496 
497 err:
498 	fclose(fd);
499 	return ret;
500 }
501 
topology_sysfs_get_system_props(HsaSystemProperties * props)502 HSAKMT_STATUS topology_sysfs_get_system_props(HsaSystemProperties *props)
503 {
504 	FILE *fd;
505 	char *read_buf, *p;
506 	char prop_name[256];
507 	unsigned long long prop_val;
508 	uint32_t prog;
509 	int read_size;
510 	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
511 
512 
513 	assert(props);
514 	fd = fopen(KFD_SYSFS_PATH_SYSTEM_PROPERTIES, "r");
515 	if (!fd)
516 		return HSAKMT_STATUS_ERROR;
517 
518 	read_buf = malloc(PAGE_SIZE);
519 	if (!read_buf) {
520 		ret = HSAKMT_STATUS_NO_MEMORY;
521 		goto err1;
522 	}
523 
524 	read_size = fread(read_buf, 1, PAGE_SIZE, fd);
525 	if (read_size <= 0) {
526 		ret = HSAKMT_STATUS_ERROR;
527 		goto err2;
528 	}
529 
530 	/* Since we're using the buffer as a string, we make sure the string terminates */
531 	if (read_size >= PAGE_SIZE)
532 		read_size = PAGE_SIZE - 1;
533 	read_buf[read_size] = 0;
534 
535 	/* Read the system properties */
536 	prog = 0;
537 	p = read_buf;
538 	while (sscanf(p += prog, "%s %llu\n%n", prop_name, &prop_val, &prog) == 2) {
539 		if (strcmp(prop_name, "platform_oem") == 0)
540 			props->PlatformOem = (uint32_t)prop_val;
541 		else if (strcmp(prop_name, "platform_id") == 0)
542 			props->PlatformId = (uint32_t)prop_val;
543 		else if (strcmp(prop_name, "platform_rev") == 0)
544 			props->PlatformRev = (uint32_t)prop_val;
545 	}
546 
547 	/*
548 	 * Discover the number of nodes:
549 	 * Assuming that inside nodes folder there are only folders
550 	 * which represent the node numbers
551 	 */
552 	props->NumNodes = num_subdirs(KFD_SYSFS_PATH_NODES, "");
553 
554 err2:
555 	free(read_buf);
556 err1:
557 	fclose(fd);
558 	return ret;
559 }
560 
topology_sysfs_get_gpu_id(uint32_t node_id,uint32_t * gpu_id)561 HSAKMT_STATUS topology_sysfs_get_gpu_id(uint32_t node_id, uint32_t *gpu_id)
562 {
563 	FILE *fd;
564 	char path[256];
565 	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
566 
567 	assert(gpu_id);
568 	snprintf(path, 256, "%s/%d/gpu_id", KFD_SYSFS_PATH_NODES, node_id);
569 	fd = fopen(path, "r");
570 	if (!fd)
571 		return HSAKMT_STATUS_ERROR;
572 	if (fscanf(fd, "%ul", gpu_id) != 1)
573 		ret = HSAKMT_STATUS_ERROR;
574 	fclose(fd);
575 
576 	return ret;
577 }
578 
find_hsa_gfxip_device(uint16_t device_id)579 static const struct hsa_gfxip_table *find_hsa_gfxip_device(uint16_t device_id)
580 {
581 	uint32_t i, table_size;
582 
583 	table_size = sizeof(gfxip_lookup_table)/sizeof(struct hsa_gfxip_table);
584 	for (i = 0; i < table_size; i++) {
585 		if (gfxip_lookup_table[i].device_id == device_id)
586 			return &gfxip_lookup_table[i];
587 	}
588 	return NULL;
589 }
590 
topology_get_asic_family(uint16_t device_id,enum asic_family_type * asic)591 HSAKMT_STATUS topology_get_asic_family(uint16_t device_id,
592 					enum asic_family_type *asic)
593 {
594 	const struct hsa_gfxip_table *hsa_gfxip =
595 				find_hsa_gfxip_device(device_id);
596 
597 	if (!hsa_gfxip)
598 		return HSAKMT_STATUS_INVALID_PARAMETER;
599 
600 	*asic = hsa_gfxip->asic_family;
601 	return HSAKMT_STATUS_SUCCESS;
602 }
603 
topology_is_dgpu(uint16_t device_id)604 bool topology_is_dgpu(uint16_t device_id)
605 {
606 	const struct hsa_gfxip_table *hsa_gfxip =
607 				find_hsa_gfxip_device(device_id);
608 
609 	if (hsa_gfxip && hsa_gfxip->is_dgpu) {
610 		is_dgpu = true;
611 		return true;
612 	}
613 	is_dgpu = false;
614 	return false;
615 }
616 
topology_is_svm_needed(uint16_t device_id)617 bool topology_is_svm_needed(uint16_t device_id)
618 {
619 	const struct hsa_gfxip_table *hsa_gfxip;
620 
621 	if (topology_is_dgpu(device_id))
622 		return true;
623 
624 	hsa_gfxip = find_hsa_gfxip_device(device_id);
625 
626 	if (hsa_gfxip && hsa_gfxip->asic_family >= CHIP_VEGA10)
627 		return true;
628 
629 	return false;
630 }
631 
topology_get_cpu_model_name(HsaNodeProperties * props,bool is_apu)632 static HSAKMT_STATUS topology_get_cpu_model_name(HsaNodeProperties *props,
633 						 bool is_apu)
634 {
635 	FILE *fd;
636 	char read_buf[256], cpu_model_name[HSA_PUBLIC_NAME_SIZE];
637 	const char *p;
638 	uint32_t i = 0, apic_id = 0;
639 
640 	if (!props)
641 		return HSAKMT_STATUS_INVALID_PARAMETER;
642 
643 	fd = fopen(PROC_CPUINFO_PATH, "r");
644 	if (!fd) {
645 		pr_err("Failed to open [%s]. Unable to get CPU Model Name",
646 			PROC_CPUINFO_PATH);
647 		return HSAKMT_STATUS_ERROR;
648 	}
649 
650 	while (fgets(read_buf, sizeof(read_buf), fd)) {
651 		/* Get the model name first, in case matching
652 		 * apic IDs are also present in the file
653 		 */
654 		if (!strncmp("model name", read_buf, sizeof("model name") - 1)) {
655 			p = strrchr(read_buf, ':');
656 			if (!p)
657 				goto err;
658 
659 			p++; // remove separator ':'
660 			for (; isspace(*p); p++)
661 				; /* remove white space */
662 
663 			/* Extract model name from string */
664 			for (i = 0; i < sizeof(cpu_model_name) - 1 && p[i] != '\n'; i++)
665 				cpu_model_name[i] = p[i];
666 			cpu_model_name[i] = '\0';
667 		}
668 
669 		if (!strncmp("apicid", read_buf, sizeof("apicid") - 1)) {
670 			p = strrchr(read_buf, ':');
671 			if (!p)
672 				goto err;
673 
674 			p++; // remove separator ':'
675 			for (; isspace(*p); p++)
676 				; /* remove white space */
677 
678 			/* Extract apic_id from remaining chars */
679 			apic_id = atoi(p);
680 
681 			/* Set CPU model name only if corresponding apic id */
682 			if (props->CComputeIdLo == apic_id) {
683 				/* Retrieve the CAL name of CPU node */
684 				if (!is_apu)
685 					strncpy((char *)props->AMDName, cpu_model_name, sizeof(props->AMDName));
686 				/* Convert from UTF8 to UTF16 */
687 				for (i = 0; cpu_model_name[i] != '\0' && i < HSA_PUBLIC_NAME_SIZE - 1; i++)
688 					props->MarketingName[i] = cpu_model_name[i];
689 				props->MarketingName[i] = '\0';
690 			}
691 		}
692 	}
693 	fclose(fd);
694 	return HSAKMT_STATUS_SUCCESS;
695 err:
696 	fclose(fd);
697 	return HSAKMT_STATUS_ERROR;
698 }
699 
topology_search_processor_vendor(const char * processor_name)700 static int topology_search_processor_vendor(const char *processor_name)
701 {
702 	unsigned int i;
703 
704 	for (i = 0; i < ARRAY_LEN(supported_processor_vendor_name); i++) {
705 		if (!strcmp(processor_name, supported_processor_vendor_name[i]))
706 			return i;
707 	}
708 	return -1;
709 }
710 
711 /* topology_set_processor_vendor - Parse /proc/cpuinfo and
712  *  to find processor vendor and set global variable processor_vendor
713  *
714  *  cat /proc/cpuinfo format is - "token       : Value"
715  *  where token = "vendor_id" and
716  *        Value = indicates System Vendor
717  */
topology_set_processor_vendor(void)718 static void topology_set_processor_vendor(void)
719 {
720 	FILE *fd;
721 	char read_buf[256];
722 	const char *p;
723 
724 	fd = fopen(PROC_CPUINFO_PATH, "r");
725 	if (!fd) {
726 		pr_err("Failed to open [%s]. Setting Processor Vendor to %s",
727 			PROC_CPUINFO_PATH, supported_processor_vendor_name[GENUINE_INTEL]);
728 		processor_vendor = GENUINE_INTEL;
729 		return;
730 	}
731 
732 	while (fgets(read_buf, sizeof(read_buf), fd)) {
733 		if (!strncmp("vendor_id", read_buf, sizeof("vendor_id") - 1)) {
734 			p = strrchr(read_buf, ':');
735 			p++; // remove separator ':'
736 			for (; *p && isspace(*p); p++)
737 				;	/* remove white space */
738 			processor_vendor = topology_search_processor_vendor(p);
739 			if (processor_vendor != -1) {
740 				fclose(fd);
741 				return;
742 			}
743 		}
744 	}
745 	fclose(fd);
746 	pr_err("Failed to get Processor Vendor. Setting to %s",
747 		supported_processor_vendor_name[GENUINE_INTEL]);
748 	processor_vendor = GENUINE_INTEL;
749 }
750 
topology_sysfs_get_node_props(uint32_t node_id,HsaNodeProperties * props,uint32_t * gpu_id,struct pci_access * pacc)751 HSAKMT_STATUS topology_sysfs_get_node_props(uint32_t node_id,
752 					    HsaNodeProperties *props,
753 					    uint32_t *gpu_id,
754 					    struct pci_access *pacc)
755 {
756 	FILE *fd;
757 	char *read_buf, *p, *envvar, dummy;
758 	char prop_name[256];
759 	char path[256];
760 	unsigned long long prop_val;
761 	uint32_t i, prog, major, minor, step;
762 	int read_size;
763 	const struct hsa_gfxip_table *hsa_gfxip;
764 	char namebuf[HSA_PUBLIC_NAME_SIZE];
765 	const char *name;
766 
767 	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
768 
769 	assert(props);
770 	assert(gpu_id);
771 	/* Retrieve the GPU ID */
772 	ret = topology_sysfs_get_gpu_id(node_id, gpu_id);
773 
774 	read_buf = malloc(PAGE_SIZE);
775 	if (!read_buf)
776 		return HSAKMT_STATUS_NO_MEMORY;
777 
778 	/* Retrieve the node properties */
779 	snprintf(path, 256, "%s/%d/properties", KFD_SYSFS_PATH_NODES, node_id);
780 	fd = fopen(path, "r");
781 	if (!fd) {
782 		free(read_buf);
783 		return HSAKMT_STATUS_ERROR;
784 	}
785 
786 	read_size = fread(read_buf, 1, PAGE_SIZE, fd);
787 	if (read_size <= 0) {
788 		ret = HSAKMT_STATUS_ERROR;
789 		goto err;
790 	}
791 
792 	/* Since we're using the buffer as a string, we make sure the string terminates */
793 	if (read_size >= PAGE_SIZE)
794 		read_size = PAGE_SIZE - 1;
795 	read_buf[read_size] = 0;
796 
797 	/* Read the node properties */
798 	prog = 0;
799 	p = read_buf;
800 	while (sscanf(p += prog, "%s %llu\n%n", prop_name, &prop_val, &prog) == 2) {
801 		if (strcmp(prop_name, "cpu_cores_count") == 0)
802 			props->NumCPUCores = (uint32_t)prop_val;
803 		else if (strcmp(prop_name, "simd_count") == 0)
804 			props->NumFComputeCores = (uint32_t)prop_val;
805 		else if (strcmp(prop_name, "mem_banks_count") == 0)
806 			props->NumMemoryBanks = (uint32_t)prop_val;
807 		else if (strcmp(prop_name, "caches_count") == 0)
808 			props->NumCaches = (uint32_t)prop_val;
809 		else if (strcmp(prop_name, "io_links_count") == 0)
810 			props->NumIOLinks = (uint32_t)prop_val;
811 		else if (strcmp(prop_name, "cpu_core_id_base") == 0)
812 			props->CComputeIdLo = (uint32_t)prop_val;
813 		else if (strcmp(prop_name, "simd_id_base") == 0)
814 			props->FComputeIdLo = (uint32_t)prop_val;
815 		else if (strcmp(prop_name, "capability") == 0)
816 			props->Capability.Value = (uint32_t)prop_val;
817 		else if (strcmp(prop_name, "max_waves_per_simd") == 0)
818 			props->MaxWavesPerSIMD = (uint32_t)prop_val;
819 		else if (strcmp(prop_name, "lds_size_in_kb") == 0)
820 			props->LDSSizeInKB = (uint32_t)prop_val;
821 		else if (strcmp(prop_name, "gds_size_in_kb") == 0)
822 			props->GDSSizeInKB = (uint32_t)prop_val;
823 		else if (strcmp(prop_name, "wave_front_size") == 0)
824 			props->WaveFrontSize = (uint32_t)prop_val;
825 		else if (strcmp(prop_name, "array_count") == 0)
826 			props->NumShaderBanks = (uint32_t)prop_val;
827 		else if (strcmp(prop_name, "simd_arrays_per_engine") == 0)
828 			props->NumArrays = (uint32_t)prop_val;
829 		else if (strcmp(prop_name, "cu_per_simd_array") == 0)
830 			props->NumCUPerArray = (uint32_t)prop_val;
831 		else if (strcmp(prop_name, "simd_per_cu") == 0)
832 			props->NumSIMDPerCU = (uint32_t)prop_val;
833 		else if (strcmp(prop_name, "max_slots_scratch_cu") == 0)
834 			props->MaxSlotsScratchCU = (uint32_t)prop_val;
835 		else if (strcmp(prop_name, "fw_version") == 0)
836 			props->EngineId.Value = (uint32_t)prop_val & 0x3ff;
837 		else if (strcmp(prop_name, "vendor_id") == 0)
838 			props->VendorId = (uint32_t)prop_val;
839 		else if (strcmp(prop_name, "device_id") == 0)
840 			props->DeviceId = (uint32_t)prop_val;
841 		else if (strcmp(prop_name, "location_id") == 0)
842 			props->LocationId = (uint32_t)prop_val;
843 		else if (strcmp(prop_name, "max_engine_clk_fcompute") == 0)
844 			props->MaxEngineClockMhzFCompute = (uint32_t)prop_val;
845 		else if (strcmp(prop_name, "max_engine_clk_ccompute") == 0)
846 			props->MaxEngineClockMhzCCompute = (uint32_t)prop_val;
847 		else if (strcmp(prop_name, "local_mem_size") == 0)
848 			props->LocalMemSize = prop_val;
849 		else if (strcmp(prop_name, "drm_render_minor") == 0)
850 			props->DrmRenderMinor = (int32_t)prop_val;
851 		else if (strcmp(prop_name, "sdma_fw_version") == 0)
852 			props->uCodeEngineVersions.Value = (uint32_t)prop_val & 0x3ff;
853 	}
854 
855 	hsa_gfxip = find_hsa_gfxip_device(props->DeviceId);
856 	if (hsa_gfxip) {
857 		envvar = getenv("HSA_OVERRIDE_GFX_VERSION");
858 		if (envvar) {
859 			/* HSA_OVERRIDE_GFX_VERSION=major.minor.stepping */
860 			if ((sscanf(envvar, "%u.%u.%u%c",
861 					&major, &minor, &step, &dummy) != 3) ||
862 				(major > 63 || minor > 255 || step > 255)) {
863 				pr_err("HSA_OVERRIDE_GFX_VERSION %s is invalid\n",
864 					envvar);
865 				ret = HSAKMT_STATUS_ERROR;
866 				goto err;
867 			}
868 			props->EngineId.ui32.Major = major & 0x3f;
869 			props->EngineId.ui32.Minor = minor & 0xff;
870 			props->EngineId.ui32.Stepping = step & 0xff;
871 		} else {
872 			props->EngineId.ui32.Major = hsa_gfxip->major & 0x3f;
873 			props->EngineId.ui32.Minor = hsa_gfxip->minor;
874 			props->EngineId.ui32.Stepping = hsa_gfxip->stepping;
875 		}
876 
877 		if (!hsa_gfxip->amd_name) {
878 			ret = HSAKMT_STATUS_ERROR;
879 			goto err;
880 		}
881 
882 		/* Retrieve the CAL name of the node */
883 		strncpy((char *)props->AMDName, hsa_gfxip->amd_name, sizeof(props->AMDName));
884 		if (props->NumCPUCores) {
885 			/* Is APU node */
886 			ret = topology_get_cpu_model_name(props, true);
887 			if (ret != HSAKMT_STATUS_SUCCESS) {
888 				pr_err("Failed to get APU Model Name from %s\n", PROC_CPUINFO_PATH);
889 				ret = HSAKMT_STATUS_SUCCESS; /* No hard error, continue regardless */
890 			}
891 		} else {
892 			/* Is dGPU Node
893 			 * Retrieve the marketing name of the node using pcilib,
894 			 * convert UTF8 to UTF16
895 			 */
896 			name = pci_lookup_name(pacc, namebuf, sizeof(namebuf), PCI_LOOKUP_DEVICE,
897 								   props->VendorId, props->DeviceId);
898 			for (i = 0; name[i] != 0 && i < HSA_PUBLIC_NAME_SIZE - 1; i++)
899 				props->MarketingName[i] = name[i];
900 			props->MarketingName[i] = '\0';
901 		}
902 	} else {
903 		/* Is CPU Node */
904 		if (!props->NumFComputeCores || !props->DeviceId) {
905 			ret = topology_get_cpu_model_name(props, false);
906 			if (ret != HSAKMT_STATUS_SUCCESS) {
907 				pr_err("Failed to get CPU Model Name from %s\n", PROC_CPUINFO_PATH);
908 				ret = HSAKMT_STATUS_SUCCESS; /* No hard error, continue regardless */
909 			}
910 		} else {
911 			ret = HSAKMT_STATUS_ERROR;
912 			goto err;
913 		}
914 	}
915 	if (props->NumFComputeCores)
916 		assert(props->EngineId.ui32.Major);
917 
918 err:
919 	free(read_buf);
920 	fclose(fd);
921 	return ret;
922 }
923 
topology_sysfs_get_mem_props(uint32_t node_id,uint32_t mem_id,HsaMemoryProperties * props)924 static HSAKMT_STATUS topology_sysfs_get_mem_props(uint32_t node_id,
925 						  uint32_t mem_id,
926 						  HsaMemoryProperties *props)
927 {
928 	FILE *fd;
929 	char *read_buf, *p;
930 	char prop_name[256];
931 	char path[256];
932 	unsigned long long prop_val;
933 	uint32_t prog;
934 	int read_size;
935 	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
936 
937 	assert(props);
938 	snprintf(path, 256, "%s/%d/mem_banks/%d/properties", KFD_SYSFS_PATH_NODES, node_id, mem_id);
939 	fd = fopen(path, "r");
940 	if (!fd)
941 		return HSAKMT_STATUS_ERROR;
942 	read_buf = malloc(PAGE_SIZE);
943 	if (!read_buf) {
944 		ret = HSAKMT_STATUS_NO_MEMORY;
945 		goto err1;
946 	}
947 
948 	read_size = fread(read_buf, 1, PAGE_SIZE, fd);
949 	if (read_size <= 0) {
950 		ret = HSAKMT_STATUS_ERROR;
951 		goto err2;
952 	}
953 
954 	/* Since we're using the buffer as a string, we make sure the string terminates */
955 	if (read_size >= PAGE_SIZE)
956 		read_size = PAGE_SIZE - 1;
957 	read_buf[read_size] = 0;
958 
959 	prog = 0;
960 	p = read_buf;
961 	while (sscanf(p += prog, "%s %llu\n%n", prop_name, &prop_val, &prog) == 2) {
962 		if (strcmp(prop_name, "heap_type") == 0)
963 			props->HeapType = (uint32_t)prop_val;
964 		else if (strcmp(prop_name, "size_in_bytes") == 0)
965 			props->SizeInBytes = (uint64_t)prop_val;
966 		else if (strcmp(prop_name, "flags") == 0)
967 			props->Flags.MemoryProperty = (uint32_t)prop_val;
968 		else if (strcmp(prop_name, "width") == 0)
969 			props->Width = (uint32_t)prop_val;
970 		else if (strcmp(prop_name, "mem_clk_max") == 0)
971 			props->MemoryClockMax = (uint32_t)prop_val;
972 	}
973 
974 err2:
975 	free(read_buf);
976 err1:
977 	fclose(fd);
978 	return ret;
979 }
980 
981 #if defined(__x86_64__) || defined(__i386__)
982 /* topology_destroy_temp_cpu_cache_list - Free the memory allocated in
983  *		topology_create_temp_cpu_cache_list().
984  */
topology_destroy_temp_cpu_cache_list(void * temp_cpu_ci_list)985 static void topology_destroy_temp_cpu_cache_list(void *temp_cpu_ci_list)
986 {
987 	uint32_t n;
988 	cpu_cacheinfo_t *p_temp_cpu_ci_list = (cpu_cacheinfo_t *)temp_cpu_ci_list;
989 	cpu_cacheinfo_t *this_cpu;
990 
991 	if (p_temp_cpu_ci_list) {
992 		for (n = 0; n < p_temp_cpu_ci_list->len; n++) {
993 			this_cpu = p_temp_cpu_ci_list + n;
994 			if (this_cpu->cache_info)
995 				free(this_cpu->cache_info);
996 		}
997 		free(p_temp_cpu_ci_list);
998 	}
999 
1000 	p_temp_cpu_ci_list = NULL;
1001 }
1002 
1003 /* topology_create_temp_cpu_cache_list - Create a temporary cpu-cache list to
1004  *		store cpu cache information. This list will be used to copy
1005  *		cache information to each CPU node. Must call
1006  *		topology_destroy_temp_cpu_cache_list to free the memory after
1007  *		the information is copied.
1008  *	@temp_cpu_ci_list - [OUT] temporary cpu-cache-info list to store data
1009  *	Return - HSAKMT_STATUS_SUCCESS in success or error number in failure
1010  */
topology_create_temp_cpu_cache_list(void ** temp_cpu_ci_list)1011 static HSAKMT_STATUS topology_create_temp_cpu_cache_list(void **temp_cpu_ci_list)
1012 {
1013 	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
1014 	void *p_temp_cpu_ci_list;
1015 	int procs_online;
1016 #ifdef __FreeBSD__
1017 	cpuset_t orig_cpuset;
1018 #else
1019 	cpu_set_t orig_cpuset;
1020 #endif
1021 	int i;
1022 	uint32_t cpuid_op_cache;
1023 	uint32_t eax, ebx, ecx = 0, edx; /* cpuid registers */
1024 	cpu_cacheinfo_t *cpu_ci_list, *this_cpu;
1025 
1026 	if (!temp_cpu_ci_list) {
1027 		ret = HSAKMT_STATUS_ERROR;
1028 		goto exit;
1029 	}
1030 	*temp_cpu_ci_list = NULL;
1031 
1032 	procs_online = (int)sysconf(_SC_NPROCESSORS_ONLN);
1033 	if (procs_online <= 0) {
1034 		ret = HSAKMT_STATUS_ERROR;
1035 		goto exit;
1036 	}
1037 
1038 	p_temp_cpu_ci_list = calloc(sizeof(cpu_cacheinfo_t) * procs_online, 1);
1039 	if (!p_temp_cpu_ci_list) {
1040 		ret = HSAKMT_STATUS_NO_MEMORY;
1041 		goto exit;
1042 	}
1043 
1044 	cpu_ci_list = (cpu_cacheinfo_t *)p_temp_cpu_ci_list;
1045 	cpu_ci_list->len = procs_online;
1046 
1047 	if (processor_vendor == AUTHENTIC_AMD)
1048 		cpuid_op_cache = 0x8000001d;
1049 	else
1050 		cpuid_op_cache = 0x4;
1051 
1052 	/* lock_to_processor() changes the affinity. Save the current affinity
1053 	 * so we can restore it after cpuid is done.
1054 	 */
1055 	CPU_ZERO(&orig_cpuset);
1056 #ifdef __FreeBSD__
1057 	if (cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, sizeof(orig_cpuset), &orig_cpuset) != 0) {
1058 #else
1059 	if (sched_getaffinity(0, sizeof(cpu_set_t), &orig_cpuset) != 0) {
1060 #endif
1061 		pr_err("Failed to get CPU affinity\n");
1062 		free(p_temp_cpu_ci_list);
1063 		ret = HSAKMT_STATUS_ERROR;
1064 		goto exit;
1065 	}
1066 
1067 	for (i = 0; i < procs_online; i++) {
1068 		this_cpu = cpu_ci_list + i;
1069 		lock_to_processor(i); /* so cpuid is executed in correct cpu */
1070 
1071 		eax = 0x1;
1072 		cpuid(&eax, &ebx, &ecx, &edx);
1073 		this_cpu->apicid = (ebx >> 24) & 0xff;
1074 		this_cpu->max_num_apicid = (ebx >> 16) & 0x0FF;
1075 		this_cpu->num_caches = cpuid_find_num_cache_leaves(cpuid_op_cache);
1076 		this_cpu->num_duplicated_caches = 0;
1077 		this_cpu->cache_info = calloc(
1078 				sizeof(cacheinfo_t) * this_cpu->num_caches, 1);
1079 		if (!this_cpu->cache_info) {
1080 			ret = HSAKMT_STATUS_NO_MEMORY;
1081 			goto err;
1082 		}
1083 		cpuid_get_cpu_cache_info(cpuid_op_cache, this_cpu);
1084 	}
1085 
1086 	find_cpu_cache_siblings(cpu_ci_list);
1087 	*temp_cpu_ci_list = p_temp_cpu_ci_list;
1088 
1089 err:
1090 	/* restore affinity to original */
1091 #ifdef __FreeBSD__
1092 	cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, sizeof(orig_cpuset), &orig_cpuset);
1093 #else
1094 	sched_setaffinity(0, sizeof(cpu_set_t), &orig_cpuset);
1095 #endif
1096 exit:
1097 	if (ret != HSAKMT_STATUS_SUCCESS) {
1098 		pr_warn("Topology fails to create cpu cache list\n");
1099 		topology_destroy_temp_cpu_cache_list(*temp_cpu_ci_list);
1100 	}
1101 	return ret;
1102 }
1103 
1104 /* topology_get_cpu_cache_props - Read CPU cache information from the temporary
1105  *		cache list and put them to the node's cache properties entry.
1106  *	@tbl - the node table to fill up
1107  *	@cpu_ci_list - the cpu cache information list to look up cache info
1108  *	Return - HSAKMT_STATUS_SUCCESS in success or error number in failure
1109  */
1110 static HSAKMT_STATUS topology_get_cpu_cache_props(node_t *tbl,
1111 						  cpu_cacheinfo_t *cpu_ci_list)
1112 {
1113 	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
1114 	uint32_t apicid_low = tbl->node.CComputeIdLo, apicid_max = 0;
1115 	uint32_t n, cache_cnt, idx;
1116 	cpu_cacheinfo_t *this_cpu;
1117 	cacheinfo_t *this_leaf;
1118 
1119 	/* CPU cache info list contains all CPUs. Find out CPUs belonging to
1120 	 * this node and number of caches under, so we can allocate the cache
1121 	 * properties in the node.
1122 	 */
1123 	tbl->node.NumCaches = 0;
1124 	for (n = 0; n < cpu_ci_list->len; n++) {
1125 		this_cpu = cpu_ci_list + n;
1126 		if (this_cpu->apicid == apicid_low)
1127 			/* found the first cpu in the node */
1128 			apicid_max = apicid_low + this_cpu->max_num_apicid - 1;
1129 
1130 		if ((this_cpu->apicid < apicid_low) ||
1131 			(this_cpu->apicid > apicid_max))
1132 			continue; /* this cpu doesn't belong to the node */
1133 		tbl->node.NumCaches +=
1134 			this_cpu->num_caches - this_cpu->num_duplicated_caches;
1135 	}
1136 
1137 	/* FixMe: cpuid under Valgrind doesn't return data from the processor we set
1138 	 * affinity to. All the data come from one specific processor. We'll report
1139 	 * this one processor's cache and ignore others.
1140 	 */
1141 	if (is_valgrind) {
1142 		this_cpu = cpu_ci_list;
1143 		tbl->node.NumCaches = this_cpu->num_caches;
1144 		apicid_low = apicid_max = this_cpu->apicid;
1145 	}
1146 
1147 	tbl->cache = calloc(
1148 			sizeof(HsaCacheProperties) * tbl->node.NumCaches, 1);
1149 	if (!tbl->cache) {
1150 		ret = HSAKMT_STATUS_NO_MEMORY;
1151 		goto exit;
1152 	}
1153 
1154 	/* Now fill in the information to cache properties. */
1155 	cache_cnt = 0;
1156 	for (n = 0; n < cpu_ci_list->len; n++) {
1157 		this_cpu = cpu_ci_list + n;
1158 		if ((this_cpu->apicid < apicid_low) || this_cpu->apicid > apicid_max)
1159 			continue; /* this cpu doesn't belong to the node */
1160 		for (idx = 0; idx < this_cpu->num_caches; idx++) {
1161 			this_leaf = this_cpu->cache_info + idx;
1162 			if (this_leaf->hsa_cache_prop.CacheSize > 0)
1163 				memcpy(&tbl->cache[cache_cnt++], &this_leaf->hsa_cache_prop, sizeof(HsaCacheProperties));
1164 			if (cache_cnt >= tbl->node.NumCaches)
1165 				goto exit;
1166 		}
1167 	}
1168 
1169 exit:
1170 	return ret;
1171 }
1172 #else /* not X86 */
1173 static void topology_destroy_temp_cpu_cache_list(void *temp_cpu_ci_list)
1174 {
1175 }
1176 
1177 static HSAKMT_STATUS topology_create_temp_cpu_cache_list(void **temp_cpu_ci_list)
1178 {
1179 	return HSAKMT_STATUS_SUCCESS;
1180 }
1181 
1182 static HSAKMT_STATUS topology_get_cpu_cache_props(node_t *tbl,
1183 						  cpu_cacheinfo_t *cpu_ci_list)
1184 {
1185 	return HSAKMT_STATUS_SUCCESS;
1186 }
1187 #endif
1188 
1189 static HSAKMT_STATUS topology_sysfs_get_cache_props(uint32_t node_id,
1190 						    uint32_t cache_id,
1191 						    HsaCacheProperties *props)
1192 {
1193 	FILE *fd;
1194 	char *read_buf, *p;
1195 	char prop_name[256];
1196 	char path[256];
1197 	unsigned long long prop_val;
1198 	uint32_t i, prog;
1199 	int read_size;
1200 	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
1201 
1202 	assert(props);
1203 	snprintf(path, 256, "%s/%d/caches/%d/properties", KFD_SYSFS_PATH_NODES, node_id, cache_id);
1204 	fd = fopen(path, "r");
1205 	if (!fd)
1206 		return HSAKMT_STATUS_ERROR;
1207 	read_buf = malloc(PAGE_SIZE);
1208 	if (!read_buf) {
1209 		ret = HSAKMT_STATUS_NO_MEMORY;
1210 		goto err1;
1211 	}
1212 
1213 	read_size = fread(read_buf, 1, PAGE_SIZE, fd);
1214 	if (read_size <= 0) {
1215 		ret = HSAKMT_STATUS_ERROR;
1216 		goto err2;
1217 	}
1218 
1219 	/* Since we're using the buffer as a string, we make sure the string terminates */
1220 	if (read_size >= PAGE_SIZE)
1221 		read_size = PAGE_SIZE - 1;
1222 	read_buf[read_size] = 0;
1223 
1224 	prog = 0;
1225 	p = read_buf;
1226 	while (sscanf(p += prog, "%s %llu\n%n", prop_name, &prop_val, &prog) == 2) {
1227 		if (strcmp(prop_name, "processor_id_low") == 0)
1228 			props->ProcessorIdLow = (uint32_t)prop_val;
1229 		else if (strcmp(prop_name, "level") == 0)
1230 			props->CacheLevel = (uint32_t)prop_val;
1231 		else if (strcmp(prop_name, "size") == 0)
1232 			props->CacheSize = (uint32_t)prop_val;
1233 		else if (strcmp(prop_name, "cache_line_size") == 0)
1234 			props->CacheLineSize = (uint32_t)prop_val;
1235 		else if (strcmp(prop_name, "cache_lines_per_tag") == 0)
1236 			props->CacheLinesPerTag = (uint32_t)prop_val;
1237 		else if (strcmp(prop_name, "association") == 0)
1238 			props->CacheAssociativity = (uint32_t)prop_val;
1239 		else if (strcmp(prop_name, "latency") == 0)
1240 			props->CacheLatency = (uint32_t)prop_val;
1241 		else if (strcmp(prop_name, "type") == 0)
1242 			props->CacheType.Value = (uint32_t)prop_val;
1243 		else if (strcmp(prop_name, "sibling_map") == 0)
1244 			break;
1245 	}
1246 
1247 	prog = 0;
1248 	if ((sscanf(p, "sibling_map %n", &prog)) == 0 && prog) {
1249 		i = 0;
1250 		while ((i < HSA_CPU_SIBLINGS) &&
1251 			(sscanf(p += prog, "%u%*[,\n]%n", &props->SiblingMap[i++], &prog) == 1))
1252 			continue;
1253 	}
1254 
1255 err2:
1256 	free(read_buf);
1257 err1:
1258 	fclose(fd);
1259 	return ret;
1260 }
1261 
1262 static HSAKMT_STATUS topology_sysfs_get_iolink_props(uint32_t node_id,
1263 						     uint32_t iolink_id,
1264 						     HsaIoLinkProperties *props)
1265 {
1266 	FILE *fd;
1267 	char *read_buf, *p;
1268 	char prop_name[256];
1269 	char path[256];
1270 	unsigned long long prop_val;
1271 	uint32_t prog;
1272 	int read_size;
1273 	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
1274 
1275 	assert(props);
1276 	snprintf(path, 256, "%s/%d/io_links/%d/properties", KFD_SYSFS_PATH_NODES, node_id, iolink_id);
1277 	fd = fopen(path, "r");
1278 	if (!fd)
1279 		return HSAKMT_STATUS_ERROR;
1280 	read_buf = malloc(PAGE_SIZE);
1281 	if (!read_buf) {
1282 		ret = HSAKMT_STATUS_NO_MEMORY;
1283 		goto err1;
1284 	}
1285 
1286 	read_size = fread(read_buf, 1, PAGE_SIZE, fd);
1287 	if (read_size <= 0) {
1288 		ret = HSAKMT_STATUS_ERROR;
1289 		goto err2;
1290 	}
1291 
1292 	/* Since we're using the buffer as a string, we make sure the string terminates */
1293 	if (read_size >= PAGE_SIZE)
1294 		read_size = PAGE_SIZE - 1;
1295 	read_buf[read_size] = 0;
1296 
1297 	prog = 0;
1298 	p = read_buf;
1299 	while (sscanf(p += prog, "%s %llu\n%n", prop_name, &prop_val, &prog) == 2) {
1300 		if (strcmp(prop_name, "type") == 0)
1301 			props->IoLinkType = (uint32_t)prop_val;
1302 		else if (strcmp(prop_name, "version_major") == 0)
1303 			props->VersionMajor = (uint32_t)prop_val;
1304 		else if (strcmp(prop_name, "version_minor") == 0)
1305 			props->VersionMinor = (uint32_t)prop_val;
1306 		else if (strcmp(prop_name, "node_from") == 0)
1307 			props->NodeFrom = (uint32_t)prop_val;
1308 		else if (strcmp(prop_name, "node_to") == 0)
1309 			props->NodeTo = (uint32_t)prop_val;
1310 		else if (strcmp(prop_name, "weight") == 0)
1311 			props->Weight = (uint32_t)prop_val;
1312 		else if (strcmp(prop_name, "min_latency") == 0)
1313 			props->MinimumLatency = (uint32_t)prop_val;
1314 		else if (strcmp(prop_name, "max_latency") == 0)
1315 			props->MaximumLatency = (uint32_t)prop_val;
1316 		else if (strcmp(prop_name, "min_bandwidth") == 0)
1317 			props->MinimumBandwidth = (uint32_t)prop_val;
1318 		else if (strcmp(prop_name, "max_bandwidth") == 0)
1319 			props->MaximumBandwidth = (uint32_t)prop_val;
1320 		else if (strcmp(prop_name, "recommended_transfer_size") == 0)
1321 			props->RecTransferSize = (uint32_t)prop_val;
1322 		else if (strcmp(prop_name, "flags") == 0)
1323 			props->Flags.LinkProperty = (uint32_t)prop_val;
1324 	}
1325 
1326 
1327 err2:
1328 	free(read_buf);
1329 err1:
1330 	fclose(fd);
1331 	return ret;
1332 }
1333 
1334 /* topology_get_free_io_link_slot_for_node - For the given node_id, find the
1335  * next available free slot to add an io_link
1336  */
1337 static HsaIoLinkProperties *topology_get_free_io_link_slot_for_node(uint32_t node_id,
1338 								    const HsaSystemProperties *sys_props,
1339 								    node_t *nodes)
1340 {
1341 	HsaIoLinkProperties *props;
1342 
1343 	if (node_id >= sys_props->NumNodes) {
1344 		pr_err("Invalid node [%d]\n", node_id);
1345 		return NULL;
1346 	}
1347 
1348 	props = nodes[node_id].link;
1349 	if (!props) {
1350 		pr_err("No io_link reported for Node [%d]\n", node_id);
1351 		return NULL;
1352 	}
1353 
1354 	if (nodes[node_id].node.NumIOLinks >= sys_props->NumNodes - 1) {
1355 		pr_err("No more space for io_link for Node [%d]\n", node_id);
1356 		return NULL;
1357 	}
1358 
1359 	return &props[nodes[node_id].node.NumIOLinks];
1360 }
1361 
1362 /* topology_add_io_link_for_node - If a free slot is available,
1363  * add io_link for the given Node. If bi_directional is true, set up two
1364  * links for both directions.
1365  * TODO: Add other members of HsaIoLinkProperties
1366  */
1367 static HSAKMT_STATUS topology_add_io_link_for_node(uint32_t node_id,
1368 						   const HsaSystemProperties *sys_props,
1369 						   node_t *nodes,
1370 						   HSA_IOLINKTYPE IoLinkType,
1371 						   uint32_t NodeTo,
1372 						   uint32_t Weight, bool bi_dir)
1373 {
1374 	HsaIoLinkProperties *props;
1375 	/* If bi-directional is set true, it's two links to add. */
1376 	uint32_t i, num_links = (bi_dir == true) ? 2 : 1;
1377 	uint32_t node_from = node_id, node_to = NodeTo;
1378 
1379 	for (i = 0; i < num_links; i++) {
1380 		props = topology_get_free_io_link_slot_for_node(node_from,
1381 			sys_props, nodes);
1382 		if (!props)
1383 			return HSAKMT_STATUS_NO_MEMORY;
1384 
1385 		props->IoLinkType = IoLinkType;
1386 		props->NodeFrom = node_from;
1387 		props->NodeTo = node_to;
1388 		props->Weight = Weight;
1389 		nodes[node_from].node.NumIOLinks++;
1390 		/* switch direction on the 2nd link when num_links=2 */
1391 		node_from = NodeTo;
1392 		node_to = node_id;
1393 	}
1394 
1395 	return HSAKMT_STATUS_SUCCESS;
1396 }
1397 
1398 /* Find the CPU that this GPU (gpu_node) directly connects to */
1399 static int32_t gpu_get_direct_link_cpu(uint32_t gpu_node, node_t *nodes)
1400 {
1401 	HsaIoLinkProperties *props = nodes[gpu_node].link;
1402 	uint32_t i;
1403 
1404 	if (!nodes[gpu_node].gpu_id || !props ||
1405 			nodes[gpu_node].node.NumIOLinks == 0)
1406 		return -1;
1407 
1408 	for (i = 0; i < nodes[gpu_node].node.NumIOLinks; i++)
1409 		if (props[i].IoLinkType == HSA_IOLINKTYPE_PCIEXPRESS &&
1410 			props[i].Weight <= 20) /* >20 is GPU->CPU->GPU */
1411 			return props[i].NodeTo;
1412 
1413 	return -1;
1414 }
1415 
1416 /* Get node1->node2 IO link information. This should be a direct link that has
1417  * been created in the kernel.
1418  */
1419 static HSAKMT_STATUS get_direct_iolink_info(uint32_t node1, uint32_t node2,
1420 					    node_t *nodes, HSAuint32 *weight,
1421 					    HSA_IOLINKTYPE *type)
1422 {
1423 	HsaIoLinkProperties *props = nodes[node1].link;
1424 	uint32_t i;
1425 
1426 	if (!props)
1427 		return HSAKMT_STATUS_INVALID_NODE_UNIT;
1428 
1429 	for (i = 0; i < nodes[node1].node.NumIOLinks; i++)
1430 		if (props[i].NodeTo == node2) {
1431 			if (weight)
1432 				*weight = props[i].Weight;
1433 			if (type)
1434 				*type = props[i].IoLinkType;
1435 			return HSAKMT_STATUS_SUCCESS;
1436 		}
1437 
1438 	return HSAKMT_STATUS_INVALID_PARAMETER;
1439 }
1440 
1441 static HSAKMT_STATUS get_indirect_iolink_info(uint32_t node1, uint32_t node2,
1442 					      node_t *nodes, HSAuint32 *weight,
1443 					      HSA_IOLINKTYPE *type)
1444 {
1445 	int32_t dir_cpu1 = -1, dir_cpu2 = -1;
1446 	HSAuint32 weight1 = 0, weight2 = 0, weight3 = 0;
1447 	HSAKMT_STATUS ret;
1448 
1449 	*weight = 0;
1450 	*type = HSA_IOLINKTYPE_UNDEFINED;
1451 
1452 	if (node1 == node2)
1453 		return HSAKMT_STATUS_INVALID_PARAMETER;
1454 
1455 	/* CPU->CPU is not an indirect link */
1456 	if (!nodes[node1].gpu_id && !nodes[node2].gpu_id)
1457 		return HSAKMT_STATUS_INVALID_NODE_UNIT;
1458 
1459 	if (nodes[node1].gpu_id)
1460 		dir_cpu1 = gpu_get_direct_link_cpu(node1, nodes);
1461 	if (nodes[node2].gpu_id)
1462 		dir_cpu2 = gpu_get_direct_link_cpu(node2, nodes);
1463 
1464 	if (dir_cpu1 < 0 && dir_cpu2 < 0)
1465 		return HSAKMT_STATUS_ERROR;
1466 
1467 	/* Possible topology:
1468 	 *   GPU --(weight1) -- CPU -- (weight2) -- GPU
1469 	 *   GPU --(weight1) -- CPU -- (weight2) -- CPU -- (weight3) -- GPU
1470 	 *   GPU --(weight1) -- CPU -- (weight2) -- CPU
1471 	 *   CPU -- (weight2) -- CPU -- (weight3) -- GPU
1472 	 */
1473 	if (dir_cpu1 >= 0) { /* GPU->CPU ... */
1474 		if (dir_cpu2 >= 0) {
1475 			if (dir_cpu1 == dir_cpu2) /* GPU->CPU->GPU*/ {
1476 				ret = get_direct_iolink_info(node1, dir_cpu1,
1477 						nodes, &weight1, NULL);
1478 				if (ret != HSAKMT_STATUS_SUCCESS)
1479 					return ret;
1480 				ret = get_direct_iolink_info(dir_cpu1, node2,
1481 						nodes, &weight2, type);
1482 			} else /* GPU->CPU->CPU->GPU*/ {
1483 				ret = get_direct_iolink_info(node1, dir_cpu1,
1484 						nodes, &weight1, NULL);
1485 				if (ret != HSAKMT_STATUS_SUCCESS)
1486 					return ret;
1487 				ret = get_direct_iolink_info(dir_cpu1, dir_cpu2,
1488 						nodes, &weight2, type);
1489 				if (ret != HSAKMT_STATUS_SUCCESS)
1490 					return ret;
1491 				/* On QPI interconnection, GPUs can't access
1492 				 * each other if they are attached to different
1493 				 * CPU sockets. CPU<->CPU weight larger than 20
1494 				 * means the two CPUs are in different sockets.
1495 				 */
1496 				if (*type == HSA_IOLINK_TYPE_QPI_1_1
1497 					&& weight2 > 20)
1498 					return HSAKMT_STATUS_NOT_SUPPORTED;
1499 				ret = get_direct_iolink_info(dir_cpu2, node2,
1500 						nodes, &weight3, NULL);
1501 			}
1502 		} else /* GPU->CPU->CPU */ {
1503 			ret = get_direct_iolink_info(node1, dir_cpu1, nodes,
1504 							&weight1, NULL);
1505 			if (ret != HSAKMT_STATUS_SUCCESS)
1506 				return ret;
1507 			ret = get_direct_iolink_info(dir_cpu1, node2, nodes,
1508 							&weight2, type);
1509 		}
1510 	} else { /* CPU->CPU->GPU */
1511 		ret = get_direct_iolink_info(node1, dir_cpu2, nodes, &weight2,
1512 					type);
1513 		if (ret != HSAKMT_STATUS_SUCCESS)
1514 			return ret;
1515 		ret = get_direct_iolink_info(dir_cpu2, node2, nodes, &weight3,
1516 						NULL);
1517 	}
1518 
1519 	if (ret != HSAKMT_STATUS_SUCCESS)
1520 		return ret;
1521 
1522 	*weight = weight1 + weight2 + weight3;
1523 	return HSAKMT_STATUS_SUCCESS;
1524 }
1525 
1526 static void topology_create_indirect_gpu_links(const HsaSystemProperties *sys_props,
1527 					       node_t *nodes)
1528 {
1529 
1530 	uint32_t i, j;
1531 	HSAuint32 weight;
1532 	HSA_IOLINKTYPE type;
1533 
1534 	for (i = 0; i < sys_props->NumNodes - 1; i++) {
1535 		for (j = i + 1; j < sys_props->NumNodes; j++) {
1536 			get_indirect_iolink_info(i, j, nodes, &weight, &type);
1537 			if (!weight)
1538 				continue;
1539 			if (topology_add_io_link_for_node(i, sys_props, nodes,
1540 				type, j, weight, true) != HSAKMT_STATUS_SUCCESS)
1541 				pr_err("Fail to add IO link %d->%d\n", i, j);
1542 		}
1543 	}
1544 }
1545 
1546 HSAKMT_STATUS topology_take_snapshot(void)
1547 {
1548 	uint32_t gen_start, gen_end, i, mem_id, cache_id, link_id;
1549 	HsaSystemProperties sys_props;
1550 	node_t *temp_nodes = 0;
1551 	void *cpu_ci_list = NULL;
1552 	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
1553 	struct pci_access *pacc;
1554 	char *envvar;
1555 
1556 	topology_set_processor_vendor();
1557 	envvar = getenv("HSA_RUNNING_UNDER_VALGRIND");
1558 	if (envvar && !strcmp(envvar, "1"))
1559 		is_valgrind = 1;
1560 	else
1561 		is_valgrind = 0;
1562 
1563 retry:
1564 	ret = topology_sysfs_get_generation(&gen_start);
1565 	if (ret != HSAKMT_STATUS_SUCCESS)
1566 		return ret;
1567 	ret = topology_sysfs_get_system_props(&sys_props);
1568 	if (ret != HSAKMT_STATUS_SUCCESS)
1569 		return ret;
1570 	if (sys_props.NumNodes > 0) {
1571 		topology_create_temp_cpu_cache_list(&cpu_ci_list);
1572 		temp_nodes = calloc(sys_props.NumNodes * sizeof(node_t), 1);
1573 		if (!temp_nodes)
1574 			return HSAKMT_STATUS_NO_MEMORY;
1575 		pacc = pci_alloc();
1576 		pci_init(pacc);
1577 		for (i = 0; i < sys_props.NumNodes; i++) {
1578 			ret = topology_sysfs_get_node_props(i,
1579 					&temp_nodes[i].node,
1580 					&temp_nodes[i].gpu_id, pacc);
1581 			if (ret != HSAKMT_STATUS_SUCCESS) {
1582 				free_nodes(temp_nodes, i);
1583 				goto err;
1584 			}
1585 			if (temp_nodes[i].node.NumMemoryBanks) {
1586 				temp_nodes[i].mem = calloc(temp_nodes[i].node.NumMemoryBanks * sizeof(HsaMemoryProperties), 1);
1587 				if (!temp_nodes[i].mem) {
1588 					ret = HSAKMT_STATUS_NO_MEMORY;
1589 					free_nodes(temp_nodes, i + 1);
1590 					goto err;
1591 				}
1592 				for (mem_id = 0; mem_id < temp_nodes[i].node.NumMemoryBanks; mem_id++) {
1593 					ret = topology_sysfs_get_mem_props(i, mem_id, &temp_nodes[i].mem[mem_id]);
1594 					if (ret != HSAKMT_STATUS_SUCCESS) {
1595 						free_nodes(temp_nodes, i + 1);
1596 						goto err;
1597 					}
1598 				}
1599 			}
1600 
1601 			if (temp_nodes[i].node.NumCaches) {
1602 				temp_nodes[i].cache = calloc(temp_nodes[i].node.NumCaches * sizeof(HsaCacheProperties), 1);
1603 				if (!temp_nodes[i].cache) {
1604 					ret = HSAKMT_STATUS_NO_MEMORY;
1605 					free_nodes(temp_nodes, i + 1);
1606 					goto err;
1607 				}
1608 				for (cache_id = 0; cache_id < temp_nodes[i].node.NumCaches; cache_id++) {
1609 					ret = topology_sysfs_get_cache_props(i, cache_id, &temp_nodes[i].cache[cache_id]);
1610 					if (ret != HSAKMT_STATUS_SUCCESS) {
1611 						free_nodes(temp_nodes, i + 1);
1612 						goto err;
1613 					}
1614 				}
1615 			} else if (!temp_nodes[i].gpu_id) { /* a CPU node */
1616 				ret = topology_get_cpu_cache_props(
1617 						&temp_nodes[i], cpu_ci_list);
1618 				if (ret != HSAKMT_STATUS_SUCCESS) {
1619 					free_nodes(temp_nodes, i + 1);
1620 					goto err;
1621 				}
1622 			}
1623 
1624 			/* To simplify, allocate maximum needed memory for io_links for each node. This
1625 			 * removes the need for realloc when indirect and QPI links are added later
1626 			 */
1627 			temp_nodes[i].link = calloc(sys_props.NumNodes - 1, sizeof(HsaIoLinkProperties));
1628 			if (!temp_nodes[i].link) {
1629 				ret = HSAKMT_STATUS_NO_MEMORY;
1630 				free_nodes(temp_nodes, i + 1);
1631 				goto err;
1632 			}
1633 
1634 			if (temp_nodes[i].node.NumIOLinks) {
1635 				for (link_id = 0; link_id < temp_nodes[i].node.NumIOLinks; link_id++) {
1636 					ret = topology_sysfs_get_iolink_props(i, link_id, &temp_nodes[i].link[link_id]);
1637 					if (ret != HSAKMT_STATUS_SUCCESS) {
1638 						free_nodes(temp_nodes, i+1);
1639 						goto err;
1640 					}
1641 				}
1642 			}
1643 		}
1644 		pci_cleanup(pacc);
1645 	}
1646 
1647 	/* All direct IO links are created in the kernel. Here we need to
1648 	 * connect GPU<->GPU or GPU<->CPU indirect IO links.
1649 	 */
1650 	topology_create_indirect_gpu_links(&sys_props, temp_nodes);
1651 
1652 	ret = topology_sysfs_get_generation(&gen_end);
1653 	if (ret != HSAKMT_STATUS_SUCCESS) {
1654 		free_nodes(temp_nodes, sys_props.NumNodes);
1655 		goto err;
1656 	}
1657 
1658 	if (gen_start != gen_end) {
1659 		free_nodes(temp_nodes, sys_props.NumNodes);
1660 		temp_nodes = 0;
1661 		goto retry;
1662 	}
1663 
1664 	if (!_system) {
1665 		_system = malloc(sizeof(HsaSystemProperties));
1666 		if (!_system) {
1667 			free_nodes(temp_nodes, sys_props.NumNodes);
1668 			return HSAKMT_STATUS_NO_MEMORY;
1669 		}
1670 	}
1671 
1672 	*_system = sys_props;
1673 	if (node)
1674 		free(node);
1675 	node = temp_nodes;
1676 err:
1677 	topology_destroy_temp_cpu_cache_list(cpu_ci_list);
1678 	return ret;
1679 }
1680 
1681 /* Drop the Snashot of the HSA topology information. Assume lock is held. */
1682 HSAKMT_STATUS topology_drop_snapshot(void)
1683 {
1684 	HSAKMT_STATUS err;
1685 
1686 	if (!!_system != !!node) {
1687 		pr_warn("Probably inconsistency?\n");
1688 		err = HSAKMT_STATUS_SUCCESS;
1689 		goto out;
1690 	}
1691 
1692 	if (node) {
1693 		/* Remove state */
1694 		free_nodes(node, _system->NumNodes);
1695 		node = NULL;
1696 	}
1697 
1698 	free(_system);
1699 	_system = NULL;
1700 	err = HSAKMT_STATUS_SUCCESS;
1701 
1702 out:
1703 	return err;
1704 }
1705 
1706 HSAKMT_STATUS validate_nodeid(uint32_t nodeid, uint32_t *gpu_id)
1707 {
1708 	if (!node || !_system || _system->NumNodes <= nodeid)
1709 		return HSAKMT_STATUS_INVALID_NODE_UNIT;
1710 	if (gpu_id)
1711 		*gpu_id = node[nodeid].gpu_id;
1712 
1713 	return HSAKMT_STATUS_SUCCESS;
1714 }
1715 
1716 HSAKMT_STATUS gpuid_to_nodeid(uint32_t gpu_id, uint32_t *node_id)
1717 {
1718 	uint64_t node_idx;
1719 
1720 	for (node_idx = 0; node_idx < _system->NumNodes; node_idx++) {
1721 		if (node[node_idx].gpu_id == gpu_id) {
1722 			*node_id = node_idx;
1723 			return HSAKMT_STATUS_SUCCESS;
1724 		}
1725 	}
1726 
1727 	return HSAKMT_STATUS_INVALID_NODE_UNIT;
1728 
1729 }
1730 
1731 HSAKMT_STATUS HSAKMTAPI hsaKmtAcquireSystemProperties(HsaSystemProperties *SystemProperties)
1732 {
1733 	HSAKMT_STATUS err;
1734 
1735 	CHECK_KFD_OPEN();
1736 
1737 	if (!SystemProperties)
1738 		return HSAKMT_STATUS_INVALID_PARAMETER;
1739 
1740 	pthread_mutex_lock(&hsakmt_mutex);
1741 
1742 	err = topology_take_snapshot();
1743 	if (err != HSAKMT_STATUS_SUCCESS)
1744 		goto out;
1745 
1746 	assert(_system);
1747 
1748 	*SystemProperties = *_system;
1749 	err = HSAKMT_STATUS_SUCCESS;
1750 
1751 out:
1752 	pthread_mutex_unlock(&hsakmt_mutex);
1753 	return err;
1754 }
1755 
1756 HSAKMT_STATUS HSAKMTAPI hsaKmtReleaseSystemProperties(void)
1757 {
1758 	CHECK_KFD_OPEN();
1759 
1760 	HSAKMT_STATUS err;
1761 
1762 	pthread_mutex_lock(&hsakmt_mutex);
1763 
1764 	err = topology_drop_snapshot();
1765 
1766 	pthread_mutex_unlock(&hsakmt_mutex);
1767 
1768 	return err;
1769 }
1770 
1771 HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeProperties(HSAuint32 NodeId,
1772 						HsaNodeProperties *NodeProperties)
1773 {
1774 	HSAKMT_STATUS err;
1775 	uint32_t gpu_id;
1776 
1777 	if (!NodeProperties)
1778 		return HSAKMT_STATUS_INVALID_PARAMETER;
1779 
1780 	CHECK_KFD_OPEN();
1781 	pthread_mutex_lock(&hsakmt_mutex);
1782 
1783 	/* KFD ADD page 18, snapshot protocol violation */
1784 	if (!_system) {
1785 		err = HSAKMT_STATUS_INVALID_NODE_UNIT;
1786 		assert(_system);
1787 		goto out;
1788 	}
1789 
1790 	if (NodeId >= _system->NumNodes) {
1791 		err = HSAKMT_STATUS_INVALID_PARAMETER;
1792 		goto out;
1793 	}
1794 
1795 	err = validate_nodeid(NodeId, &gpu_id);
1796 	if (err != HSAKMT_STATUS_SUCCESS)
1797 		return err;
1798 
1799 	*NodeProperties = node[NodeId].node;
1800 	/* For CPU only node don't add any additional GPU memory banks. */
1801 	if (gpu_id) {
1802 		if (topology_is_dgpu(get_device_id_by_gpu_id(gpu_id)))
1803 			NodeProperties->NumMemoryBanks += NUM_OF_DGPU_HEAPS;
1804 		else
1805 			NodeProperties->NumMemoryBanks += NUM_OF_IGPU_HEAPS;
1806 	}
1807 	err = HSAKMT_STATUS_SUCCESS;
1808 
1809 out:
1810 	pthread_mutex_unlock(&hsakmt_mutex);
1811 	return err;
1812 }
1813 
1814 HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeMemoryProperties(HSAuint32 NodeId,
1815 						      HSAuint32 NumBanks,
1816 						      HsaMemoryProperties *MemoryProperties)
1817 {
1818 	HSAKMT_STATUS err = HSAKMT_STATUS_SUCCESS;
1819 	uint32_t i, gpu_id;
1820 	HSAuint64 aperture_limit;
1821 	bool nodeIsDGPU;
1822 
1823 	if (!MemoryProperties)
1824 		return HSAKMT_STATUS_INVALID_PARAMETER;
1825 
1826 	CHECK_KFD_OPEN();
1827 	pthread_mutex_lock(&hsakmt_mutex);
1828 
1829 	/* KFD ADD page 18, snapshot protocol violation */
1830 	if (!_system) {
1831 		err = HSAKMT_STATUS_INVALID_NODE_UNIT;
1832 		assert(_system);
1833 		goto out;
1834 	}
1835 
1836 	/* Check still necessary */
1837 	if (NodeId >= _system->NumNodes) {
1838 		err = HSAKMT_STATUS_INVALID_PARAMETER;
1839 		goto out;
1840 	}
1841 
1842 	err = validate_nodeid(NodeId, &gpu_id);
1843 	if (err != HSAKMT_STATUS_SUCCESS)
1844 		goto out;
1845 
1846 	memset(MemoryProperties, 0, NumBanks * sizeof(HsaMemoryProperties));
1847 
1848 	for (i = 0; i < MIN(node[NodeId].node.NumMemoryBanks, NumBanks); i++) {
1849 		assert(node[NodeId].mem);
1850 		MemoryProperties[i] = node[NodeId].mem[i];
1851 	}
1852 
1853 	/* The following memory banks does not apply to CPU only node */
1854 	if (gpu_id == 0)
1855 		goto out;
1856 
1857 	nodeIsDGPU = topology_is_dgpu(get_device_id_by_gpu_id(gpu_id));
1858 
1859 	/*Add LDS*/
1860 	if (i < NumBanks &&
1861 		fmm_get_aperture_base_and_limit(FMM_LDS, gpu_id,
1862 				&MemoryProperties[i].VirtualBaseAddress, &aperture_limit) == HSAKMT_STATUS_SUCCESS) {
1863 		MemoryProperties[i].HeapType = HSA_HEAPTYPE_GPU_LDS;
1864 		MemoryProperties[i].SizeInBytes = node[NodeId].node.LDSSizeInKB * 1024;
1865 		i++;
1866 	}
1867 
1868 	/* Add Local memory - HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE.
1869 	 * For dGPU the topology node contains Local Memory and it is added by
1870 	 * the for loop above
1871 	 */
1872 	if (!nodeIsDGPU && i < NumBanks && node[NodeId].node.LocalMemSize > 0 &&
1873 		fmm_get_aperture_base_and_limit(FMM_GPUVM, gpu_id,
1874 				&MemoryProperties[i].VirtualBaseAddress, &aperture_limit) == HSAKMT_STATUS_SUCCESS) {
1875 		MemoryProperties[i].HeapType = HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE;
1876 		MemoryProperties[i].SizeInBytes = node[NodeId].node.LocalMemSize;
1877 		i++;
1878 	}
1879 
1880 	/* Add SCRATCH */
1881 	if (i < NumBanks &&
1882 		fmm_get_aperture_base_and_limit(FMM_SCRATCH, gpu_id,
1883 				&MemoryProperties[i].VirtualBaseAddress, &aperture_limit) == HSAKMT_STATUS_SUCCESS) {
1884 		MemoryProperties[i].HeapType = HSA_HEAPTYPE_GPU_SCRATCH;
1885 		MemoryProperties[i].SizeInBytes = (aperture_limit - MemoryProperties[i].VirtualBaseAddress) + 1;
1886 		i++;
1887 	}
1888 
1889 	/* On dGPUs add SVM aperture */
1890 	if (nodeIsDGPU && i < NumBanks &&
1891 	    fmm_get_aperture_base_and_limit(
1892 		    FMM_SVM, gpu_id, &MemoryProperties[i].VirtualBaseAddress,
1893 		    &aperture_limit) == HSAKMT_STATUS_SUCCESS) {
1894 		MemoryProperties[i].HeapType = HSA_HEAPTYPE_DEVICE_SVM;
1895 		MemoryProperties[i].SizeInBytes = (aperture_limit - MemoryProperties[i].VirtualBaseAddress) + 1;
1896 		i++;
1897 	}
1898 
1899 out:
1900 	pthread_mutex_unlock(&hsakmt_mutex);
1901 	return err;
1902 }
1903 
1904 HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeCacheProperties(HSAuint32 NodeId,
1905 						     HSAuint32 ProcessorId,
1906 						     HSAuint32 NumCaches,
1907 						     HsaCacheProperties *CacheProperties)
1908 {
1909 	HSAKMT_STATUS err;
1910 	uint32_t i;
1911 
1912 	if (!CacheProperties)
1913 		return HSAKMT_STATUS_INVALID_PARAMETER;
1914 
1915 	CHECK_KFD_OPEN();
1916 	pthread_mutex_lock(&hsakmt_mutex);
1917 
1918 	/* KFD ADD page 18, snapshot protocol violation */
1919 	if (!_system) {
1920 		err = HSAKMT_STATUS_INVALID_NODE_UNIT;
1921 		assert(_system);
1922 		goto out;
1923 	}
1924 
1925 	if (NodeId >= _system->NumNodes || NumCaches > node[NodeId].node.NumCaches) {
1926 		err = HSAKMT_STATUS_INVALID_PARAMETER;
1927 		goto out;
1928 	}
1929 
1930 	for (i = 0; i < MIN(node[NodeId].node.NumCaches, NumCaches); i++) {
1931 		assert(node[NodeId].cache);
1932 		CacheProperties[i] = node[NodeId].cache[i];
1933 	}
1934 
1935 	err = HSAKMT_STATUS_SUCCESS;
1936 
1937 out:
1938 	pthread_mutex_unlock(&hsakmt_mutex);
1939 	return err;
1940 }
1941 
1942 HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeIoLinkProperties(HSAuint32 NodeId,
1943 						      HSAuint32 NumIoLinks,
1944 						      HsaIoLinkProperties *IoLinkProperties)
1945 {
1946 	HSAKMT_STATUS err;
1947 	uint32_t i;
1948 
1949 	if (!IoLinkProperties)
1950 		return HSAKMT_STATUS_INVALID_PARAMETER;
1951 
1952 	CHECK_KFD_OPEN();
1953 
1954 	pthread_mutex_lock(&hsakmt_mutex);
1955 
1956 	/* KFD ADD page 18, snapshot protocol violation */
1957 	if (!_system) {
1958 		err = HSAKMT_STATUS_INVALID_NODE_UNIT;
1959 		assert(_system);
1960 		goto out;
1961 	}
1962 
1963 	if (NodeId >= _system->NumNodes || NumIoLinks > node[NodeId].node.NumIOLinks) {
1964 		err = HSAKMT_STATUS_INVALID_PARAMETER;
1965 		goto out;
1966 	}
1967 
1968 	for (i = 0; i < MIN(node[NodeId].node.NumIOLinks, NumIoLinks); i++) {
1969 		assert(node[NodeId].link);
1970 		IoLinkProperties[i] = node[NodeId].link[i];
1971 	}
1972 
1973 	err = HSAKMT_STATUS_SUCCESS;
1974 
1975 out:
1976 	pthread_mutex_unlock(&hsakmt_mutex);
1977 	return err;
1978 }
1979 
1980 uint16_t get_device_id_by_node(HSAuint32 node_id)
1981 {
1982 	if (!node || !_system || _system->NumNodes <= node_id)
1983 		return 0;
1984 
1985 	return node[node_id].node.DeviceId;
1986 }
1987 
1988 uint16_t get_device_id_by_gpu_id(HSAuint32 gpu_id)
1989 {
1990 	unsigned int i;
1991 
1992 	if (!node || !_system)
1993 		return 0;
1994 
1995 	for (i = 0; i < _system->NumNodes; i++) {
1996 		if (node[i].gpu_id == gpu_id)
1997 			return node[i].node.DeviceId;
1998 	}
1999 
2000 	return 0;
2001 }
2002 
2003 HSAKMT_STATUS validate_nodeid_array(uint32_t **gpu_id_array,
2004 		uint32_t NumberOfNodes, uint32_t *NodeArray)
2005 {
2006 	HSAKMT_STATUS ret;
2007 	unsigned int i;
2008 
2009 	if (NumberOfNodes == 0 || !NodeArray || !gpu_id_array)
2010 		return HSAKMT_STATUS_INVALID_PARAMETER;
2011 
2012 	/* Translate Node IDs to gpu_ids */
2013 	*gpu_id_array = malloc(NumberOfNodes * sizeof(uint32_t));
2014 	if (!(*gpu_id_array))
2015 		return HSAKMT_STATUS_NO_MEMORY;
2016 	for (i = 0; i < NumberOfNodes; i++) {
2017 		ret = validate_nodeid(NodeArray[i], *gpu_id_array + i);
2018 		if (ret != HSAKMT_STATUS_SUCCESS) {
2019 			free(*gpu_id_array);
2020 			break;
2021 		}
2022 	}
2023 
2024 	return ret;
2025 }
2026