1 /*
2 * Copyright © 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person
5 * obtaining a copy of this software and associated documentation
6 * files (the "Software"), to deal in the Software without
7 * restriction, including without limitation the rights to use, copy,
8 * modify, merge, publish, distribute, sublicense, and/or sell copies
9 * of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including
13 * the next paragraph) shall be included in all copies or substantial
14 * portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
20 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
21 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 * DEALINGS IN THE SOFTWARE.
24 */
25
26 #define _GNU_SOURCE
27 #include <assert.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <dirent.h>
31 #if !defined(__FreeBSD__) && !defined(__DragonFly__)
32 #include <malloc.h>
33 #endif
34 #include <string.h>
35 #include <unistd.h>
36 #include <ctype.h>
37 #include <sched.h>
38 #include <pci/pci.h>
39 #ifdef __FreeBSD__
40 #include <sys/cpuset.h>
41 #endif
42
43 #include "libhsakmt.h"
44 #include "fmm.h"
45
46 /* Number of memory banks added by thunk on top of topology */
47 #define NUM_OF_IGPU_HEAPS 3
48 #define NUM_OF_DGPU_HEAPS 3
49 /* SYSFS related */
50 #define KFD_SYSFS_PATH_GENERATION_ID "/sys/devices/virtual/kfd/kfd/topology/generation_id"
51 #define KFD_SYSFS_PATH_SYSTEM_PROPERTIES "/sys/devices/virtual/kfd/kfd/topology/system_properties"
52 #define KFD_SYSFS_PATH_NODES "/sys/devices/virtual/kfd/kfd/topology/nodes"
53 #define PROC_CPUINFO_PATH "/proc/cpuinfo"
54
55 typedef struct {
56 uint32_t gpu_id;
57 HsaNodeProperties node;
58 HsaMemoryProperties *mem; /* node->NumBanks elements */
59 HsaCacheProperties *cache;
60 HsaIoLinkProperties *link;
61 } node_t;
62
63 static HsaSystemProperties *_system = NULL;
64 static node_t *node = NULL;
65 static int is_valgrind;
66
67 static int processor_vendor;
68 /* Supported System Vendors */
69 enum SUPPORTED_PROCESSOR_VENDORS {
70 GENUINE_INTEL = 0,
71 AUTHENTIC_AMD
72 };
73 /* Adding newline to make the search easier */
74 static const char *supported_processor_vendor_name[] = {
75 "GenuineIntel\n",
76 "AuthenticAMD\n"
77 };
78
79 static HSAKMT_STATUS topology_take_snapshot(void);
80 static HSAKMT_STATUS topology_drop_snapshot(void);
81
82 static struct hsa_gfxip_table {
83 uint16_t device_id; // Device ID
84 unsigned char major; // GFXIP Major engine version
85 unsigned char minor; // GFXIP Minor engine version
86 unsigned char stepping; // GFXIP Stepping info
87 unsigned char is_dgpu; // Predicate for dGPU devices
88 const char *amd_name; // CALName of the device
89 enum asic_family_type asic_family;
90 } gfxip_lookup_table[] = {
91 /* Kaveri Family */
92 { 0x1304, 7, 0, 0, 0, "Spectre", CHIP_KAVERI },
93 { 0x1305, 7, 0, 0, 0, "Spectre", CHIP_KAVERI },
94 { 0x1306, 7, 0, 0, 0, "Spectre", CHIP_KAVERI },
95 { 0x1307, 7, 0, 0, 0, "Spectre", CHIP_KAVERI },
96 { 0x1309, 7, 0, 0, 0, "Spectre", CHIP_KAVERI },
97 { 0x130A, 7, 0, 0, 0, "Spectre", CHIP_KAVERI },
98 { 0x130B, 7, 0, 0, 0, "Spectre", CHIP_KAVERI },
99 { 0x130C, 7, 0, 0, 0, "Spectre", CHIP_KAVERI },
100 { 0x130D, 7, 0, 0, 0, "Spectre", CHIP_KAVERI },
101 { 0x130E, 7, 0, 0, 0, "Spectre", CHIP_KAVERI },
102 { 0x130F, 7, 0, 0, 0, "Spectre", CHIP_KAVERI },
103 { 0x1310, 7, 0, 0, 0, "Spectre", CHIP_KAVERI },
104 { 0x1311, 7, 0, 0, 0, "Spectre", CHIP_KAVERI },
105 { 0x1312, 7, 0, 0, 0, "Spooky", CHIP_KAVERI },
106 { 0x1313, 7, 0, 0, 0, "Spectre", CHIP_KAVERI },
107 { 0x1315, 7, 0, 0, 0, "Spectre", CHIP_KAVERI },
108 { 0x1316, 7, 0, 0, 0, "Spooky", CHIP_KAVERI },
109 { 0x1317, 7, 0, 0, 0, "Spooky", CHIP_KAVERI },
110 { 0x1318, 7, 0, 0, 0, "Spectre", CHIP_KAVERI },
111 { 0x131B, 7, 0, 0, 0, "Spectre", CHIP_KAVERI },
112 { 0x131C, 7, 0, 0, 0, "Spectre", CHIP_KAVERI },
113 { 0x131D, 7, 0, 0, 0, "Spectre", CHIP_KAVERI },
114 /* Hawaii Family */
115 { 0x67A0, 7, 0, 1, 1, "Hawaii", CHIP_HAWAII },
116 { 0x67A1, 7, 0, 1, 1, "Hawaii", CHIP_HAWAII },
117 { 0x67A2, 7, 0, 1, 1, "Hawaii", CHIP_HAWAII },
118 { 0x67A8, 7, 0, 1, 1, "Hawaii", CHIP_HAWAII },
119 { 0x67A9, 7, 0, 1, 1, "Hawaii", CHIP_HAWAII },
120 { 0x67AA, 7, 0, 1, 1, "Hawaii", CHIP_HAWAII },
121 { 0x67B0, 7, 0, 1, 1, "Hawaii", CHIP_HAWAII },
122 { 0x67B1, 7, 0, 1, 1, "Hawaii", CHIP_HAWAII },
123 { 0x67B8, 7, 0, 1, 1, "Hawaii", CHIP_HAWAII },
124 { 0x67B9, 7, 0, 1, 1, "Hawaii", CHIP_HAWAII },
125 { 0x67BA, 7, 0, 1, 1, "Hawaii", CHIP_HAWAII },
126 { 0x67BE, 7, 0, 1, 1, "Hawaii", CHIP_HAWAII },
127 /* Carrizo Family */
128 { 0x9870, 8, 0, 1, 0, "Carrizo", CHIP_CARRIZO },
129 { 0x9874, 8, 0, 1, 0, "Carrizo", CHIP_CARRIZO },
130 { 0x9875, 8, 0, 1, 0, "Carrizo", CHIP_CARRIZO },
131 { 0x9876, 8, 0, 1, 0, "Carrizo", CHIP_CARRIZO },
132 { 0x9877, 8, 0, 1, 0, "Carrizo", CHIP_CARRIZO },
133 /* Tonga Family */
134 { 0x6920, 8, 0, 2, 1, "Tonga", CHIP_TONGA },
135 { 0x6921, 8, 0, 2, 1, "Tonga", CHIP_TONGA },
136 { 0x6928, 8, 0, 2, 1, "Tonga", CHIP_TONGA },
137 { 0x6929, 8, 0, 2, 1, "Tonga", CHIP_TONGA },
138 { 0x692B, 8, 0, 2, 1, "Tonga", CHIP_TONGA },
139 { 0x692F, 8, 0, 2, 1, "Tonga", CHIP_TONGA },
140 { 0x6930, 8, 0, 2, 1, "Tonga", CHIP_TONGA },
141 { 0x6938, 8, 0, 2, 1, "Tonga", CHIP_TONGA },
142 { 0x6939, 8, 0, 2, 1, "Tonga", CHIP_TONGA },
143 /* Fiji */
144 { 0x7300, 8, 0, 3, 1, "Fiji", CHIP_FIJI },
145 { 0x730F, 8, 0, 3, 1, "Fiji", CHIP_FIJI },
146 /* Polaris10 */
147 { 0x67C0, 8, 0, 3, 1, "Polaris10", CHIP_POLARIS10 },
148 { 0x67C1, 8, 0, 3, 1, "Polaris10", CHIP_POLARIS10 },
149 { 0x67C2, 8, 0, 3, 1, "Polaris10", CHIP_POLARIS10 },
150 { 0x67C4, 8, 0, 3, 1, "Polaris10", CHIP_POLARIS10 },
151 { 0x67C7, 8, 0, 3, 1, "Polaris10", CHIP_POLARIS10 },
152 { 0x67C8, 8, 0, 3, 1, "Polaris10", CHIP_POLARIS10 },
153 { 0x67C9, 8, 0, 3, 1, "Polaris10", CHIP_POLARIS10 },
154 { 0x67CA, 8, 0, 3, 1, "Polaris10", CHIP_POLARIS10 },
155 { 0x67CC, 8, 0, 3, 1, "Polaris10", CHIP_POLARIS10 },
156 { 0x67CF, 8, 0, 3, 1, "Polaris10", CHIP_POLARIS10 },
157 { 0x67D0, 8, 0, 3, 1, "Polaris10", CHIP_POLARIS10 },
158 { 0x67DF, 8, 0, 3, 1, "Polaris10", CHIP_POLARIS10 },
159 /* Polaris11 */
160 { 0x67E0, 8, 0, 3, 1, "Polaris11", CHIP_POLARIS11 },
161 { 0x67E1, 8, 0, 3, 1, "Polaris11", CHIP_POLARIS11 },
162 { 0x67E3, 8, 0, 3, 1, "Polaris11", CHIP_POLARIS11 },
163 { 0x67E7, 8, 0, 3, 1, "Polaris11", CHIP_POLARIS11 },
164 { 0x67E8, 8, 0, 3, 1, "Polaris11", CHIP_POLARIS11 },
165 { 0x67E9, 8, 0, 3, 1, "Polaris11", CHIP_POLARIS11 },
166 { 0x67EB, 8, 0, 3, 1, "Polaris11", CHIP_POLARIS11 },
167 { 0x67EF, 8, 0, 3, 1, "Polaris11", CHIP_POLARIS11 },
168 { 0x67FF, 8, 0, 3, 1, "Polaris11", CHIP_POLARIS11 },
169 /* Vega10 */
170 { 0x6860, 9, 0, 0, 1, "Vega10", CHIP_VEGA10 },
171 { 0x6861, 9, 0, 0, 1, "Vega10", CHIP_VEGA10 },
172 { 0x6862, 9, 0, 0, 1, "Vega10", CHIP_VEGA10 },
173 { 0x6863, 9, 0, 0, 1, "Vega10", CHIP_VEGA10 },
174 { 0x6864, 9, 0, 0, 1, "Vega10", CHIP_VEGA10 },
175 { 0x6867, 9, 0, 0, 1, "Vega10", CHIP_VEGA10 },
176 { 0x6868, 9, 0, 0, 1, "Vega10", CHIP_VEGA10 },
177 { 0x686C, 9, 0, 0, 1, "Vega10", CHIP_VEGA10 },
178 { 0x687F, 9, 0, 0, 1, "Vega10", CHIP_VEGA10 },
179 /* Vega12 */
180 { 0x69A0, 9, 0, 4, 1, "Vega12", CHIP_VEGA10 },
181 { 0x69A1, 9, 0, 4, 1, "Vega12", CHIP_VEGA10 },
182 { 0x69A3, 9, 0, 4, 1, "Vega12", CHIP_VEGA10 },
183 { 0x69Af, 9, 0, 4, 1, "Vega12", CHIP_VEGA10 },
184 /* Raven */
185 { 0x15DD, 9, 0, 2, 0, "Raven", CHIP_RAVEN },
186 /* Vega20 */
187 { 0x66A0, 9, 0, 6, 1, "Vega20", CHIP_VEGA20 },
188 { 0x66A1, 9, 0, 6, 1, "Vega20", CHIP_VEGA20 },
189 { 0x66A2, 9, 0, 6, 1, "Vega20", CHIP_VEGA20 },
190 { 0x66A3, 9, 0, 6, 1, "Vega20", CHIP_VEGA20 },
191 { 0x66A7, 9, 0, 6, 1, "Vega20", CHIP_VEGA20 },
192 { 0x66AF, 9, 0, 6, 1, "Vega20", CHIP_VEGA20 },
193 };
194
195 enum cache_type {
196 CACHE_TYPE_NULL = 0,
197 CACHE_TYPE_DATA = 1,
198 CACHE_TYPE_INST = 2,
199 CACHE_TYPE_UNIFIED = 3
200 };
201
202 typedef struct cacheinfo {
203 HsaCacheProperties hsa_cache_prop;
204 uint32_t num_threads_sharing; /* how many CPUs share this cache */
205 } cacheinfo_t;
206
207 /* CPU cache table for all CPUs on the system. Each entry has the relative CPU
208 * info and caches connected to that CPU.
209 */
210 typedef struct cpu_cacheinfo {
211 uint32_t len; /* length of the table -> number of online procs */
212 uint32_t num_caches; /* number of caches connected to this cpu */
213 uint32_t num_duplicated_caches; /* to count caches being shared */
214 uint32_t apicid; /* this cpu's apic id */
215 uint32_t max_num_apicid; /* max number of addressable IDs */
216 cacheinfo_t *cache_info; /* an array for cache information */
217 } cpu_cacheinfo_t;
218
219 /* Deterministic Cache Parameters Leaf in cpuid */
220 union _cpuid_leaf_eax { /* Register EAX */
221 struct {
222 enum cache_type type:5;
223 uint32_t level:3;
224 uint32_t is_self_initializing:1;
225 uint32_t is_fully_associative:1;
226 uint32_t reserved:4;
227 uint32_t num_threads_sharing:12;
228 uint32_t num_cores_on_die:6;
229 } split;
230 uint32_t full;
231 };
232
233 union _cpuid_leaf_ebx { /* Register EBX */
234 struct {
235 uint32_t coherency_line_size:12;
236 uint32_t physical_line_partition:10;
237 uint32_t ways_of_associativity:10;
238 } split;
239 uint32_t full;
240 };
241
242 static void
free_node(node_t * n)243 free_node(node_t *n)
244 {
245 assert(n);
246
247 if (!n)
248 return;
249
250 if ((n)->mem)
251 free((n)->mem);
252 if ((n)->cache)
253 free((n)->cache);
254 if ((n)->link)
255 free((n)->link);
256 }
257
free_nodes(node_t * temp_nodes,int size)258 static void free_nodes(node_t *temp_nodes, int size)
259 {
260 int i;
261
262 if (temp_nodes) {
263 for (i = 0; i < size; i++)
264 free_node(&temp_nodes[i]);
265 free(temp_nodes);
266 }
267 }
268
269 /* num_subdirs - find the number of sub-directories in the specified path
270 * @dirpath - directory path to find sub-directories underneath
271 * @prefix - only count sub-directory names starting with prefix.
272 * Use blank string, "", to count all.
273 * Return - number of sub-directories
274 */
num_subdirs(char * dirpath,char * prefix)275 static int num_subdirs(char *dirpath, char *prefix)
276 {
277 int count = 0;
278 DIR *dirp;
279 struct dirent *dir;
280 int prefix_len = strlen(prefix);
281
282 dirp = opendir(dirpath);
283 if (dirp) {
284 while ((dir = readdir(dirp)) != 0) {
285 if ((strcmp(dir->d_name, ".") == 0) ||
286 (strcmp(dir->d_name, "..") == 0))
287 continue;
288 if (prefix_len &&
289 strncmp(dir->d_name, prefix, prefix_len))
290 continue;
291 count++;
292 }
293 closedir(dirp);
294 }
295
296 return count;
297 }
298
299 #if defined(__x86_64__) || defined(__i386__)
300 /* cpuid instruction returns processor identification and feature information
301 * to the EAX, EBX, ECX, and EDX registers, as determined by input entered in
302 * EAX (in some cases, ECX as well).
303 */
cpuid(uint32_t * eax,uint32_t * ebx,uint32_t * ecx,uint32_t * edx)304 static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx,
305 uint32_t *edx)
306 {
307 __asm__ __volatile__(
308 "cpuid;"
309 : "=a" (*eax),
310 "=b" (*ebx),
311 "=c" (*ecx),
312 "=d" (*edx)
313 : "0" (*eax), "2" (*ecx)
314 : "memory"
315 );
316 }
317
318 /* In cases ECX is also used as an input for cpuid, i.e. cache leaf */
cpuid_count(uint32_t op,int count,uint32_t * eax,uint32_t * ebx,uint32_t * ecx,uint32_t * edx)319 static void cpuid_count(uint32_t op, int count, uint32_t *eax, uint32_t *ebx,
320 uint32_t *ecx, uint32_t *edx)
321 {
322 *eax = op;
323 *ecx = count;
324 cpuid(eax, ebx, ecx, edx);
325 }
326
327 /* Lock current process to the specified processor */
lock_to_processor(int processor)328 static int lock_to_processor(int processor)
329 {
330 #ifdef __FreeBSD__
331 cpuset_t set;
332
333 CPU_ZERO(&set);
334 CPU_SET(processor, &set);
335 return cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, sizeof(set), &set);
336 #else
337 cpu_set_t cpuset;
338
339 memset(&cpuset, 0, sizeof(cpu_set_t));
340 CPU_SET(processor, &cpuset);
341 /* 0: this process */
342 return sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
343 #endif
344 }
345
346 /* Get count's order of 2. In other words, 2^rtn_value = count
347 * When count is not an order of 2, round it up to the closest.
348 */
get_count_order(unsigned int count)349 static int get_count_order(unsigned int count)
350 {
351 int bit;
352 uint32_t num;
353
354 for (bit = 31; bit >= 0; bit--) {
355 num = 1 << bit;
356 if (count >= num)
357 break;
358 }
359 if (count & (count - 1))
360 ++bit;
361
362 return bit;
363 }
364
365 /* cpuid_find_num_cache_leaves - Use cpuid instruction to find out how many
366 * cache leaves the CPU has.
367 * @op - cpuid opcode to get cache information
368 * Return - the number of cache leaves
369 */
cpuid_find_num_cache_leaves(uint32_t op)370 static int cpuid_find_num_cache_leaves(uint32_t op)
371 {
372 union _cpuid_leaf_eax eax;
373 union _cpuid_leaf_ebx ebx;
374 unsigned int ecx;
375 unsigned int edx;
376 int idx = -1;
377
378 do {
379 ++idx;
380 cpuid_count(op, idx, &eax.full, &ebx.full, &ecx, &edx);
381 /* Modern systems have cache levels up to 3. */
382 } while (eax.split.type != CACHE_TYPE_NULL && idx < 4);
383 return idx;
384 }
385
386 /* cpuid_get_cpu_cache_info - Use cpuid instruction to get cache information
387 * @op - cpuid opcode to get cache information
388 * @cpu_ci - this parameter is an input and also an output.
389 * [IN] cpu_ci->num_caches: the number of caches of this cpu
390 * [OUT] cpu_ci->cache_info: to store cache info collected
391 */
cpuid_get_cpu_cache_info(uint32_t op,cpu_cacheinfo_t * cpu_ci)392 static void cpuid_get_cpu_cache_info(uint32_t op, cpu_cacheinfo_t *cpu_ci)
393 {
394 union _cpuid_leaf_eax eax;
395 union _cpuid_leaf_ebx ebx;
396 uint32_t ecx;
397 uint32_t edx;
398 uint32_t index;
399 cacheinfo_t *this_leaf;
400
401 for (index = 0; index < cpu_ci->num_caches; index++) {
402 cpuid_count(op, index, &eax.full, &ebx.full, &ecx, &edx);
403 this_leaf = cpu_ci->cache_info + index;
404 this_leaf->hsa_cache_prop.ProcessorIdLow = cpu_ci->apicid;
405 this_leaf->num_threads_sharing =
406 eax.split.num_threads_sharing + 1;
407 this_leaf->hsa_cache_prop.CacheLevel = eax.split.level;
408 this_leaf->hsa_cache_prop.CacheType.ui32.CPU = 1;
409 if (eax.split.type & CACHE_TYPE_DATA)
410 this_leaf->hsa_cache_prop.CacheType.ui32.Data = 1;
411 if (eax.split.type & CACHE_TYPE_INST)
412 this_leaf->hsa_cache_prop.CacheType.ui32.Instruction = 1;
413 this_leaf->hsa_cache_prop.CacheLineSize =
414 ebx.split.coherency_line_size + 1;
415 this_leaf->hsa_cache_prop.CacheAssociativity =
416 ebx.split.ways_of_associativity + 1;
417 this_leaf->hsa_cache_prop.CacheLinesPerTag =
418 ebx.split.physical_line_partition + 1;
419 this_leaf->hsa_cache_prop.CacheSize = (ecx + 1) *
420 (ebx.split.coherency_line_size + 1) *
421 (ebx.split.physical_line_partition + 1) *
422 (ebx.split.ways_of_associativity + 1);
423 }
424 }
425
426 /* find_cpu_cache_siblings - In the cache list, some caches may be listed more
427 * than once if they are shared by multiple CPUs. Identify the cache's CPU
428 * siblings, record it to SiblingMap[], then remove the duplicated cache by
429 * changing the cache size to 0.
430 */
find_cpu_cache_siblings(cpu_cacheinfo_t * cpu_ci_list)431 static void find_cpu_cache_siblings(cpu_cacheinfo_t *cpu_ci_list)
432 {
433 cacheinfo_t *this_leaf, *leaf2;
434 uint32_t n, j, idx_msb, apicid1, apicid2;
435 cpu_cacheinfo_t *this_cpu, *cpu2;
436 uint32_t index;
437
438 /* FixMe: cpuid under Valgrind doesn't return data from the processor we set
439 * affinity to. We can't use that data to calculate siblings.
440 */
441 if (is_valgrind)
442 return;
443
444 for (n = 0; n < cpu_ci_list->len; n++) {
445 this_cpu = cpu_ci_list + n;
446 for (index = 0; index < this_cpu->num_caches; index++) {
447 this_leaf = this_cpu->cache_info + index;
448 /* CacheSize 0 means an invalid cache */
449 if (!this_leaf->hsa_cache_prop.CacheSize)
450 continue;
451 if (this_leaf->num_threads_sharing == 1) // no siblings
452 continue;
453 idx_msb = get_count_order(this_leaf->num_threads_sharing);
454 for (j = n + 1; j < cpu_ci_list->len; j++) {
455 cpu2 = cpu_ci_list + j;
456 leaf2 = cpu2->cache_info + index;
457 apicid1 = this_leaf->hsa_cache_prop.ProcessorIdLow;
458 apicid2 = leaf2->hsa_cache_prop.ProcessorIdLow;
459 if ((apicid2 >> idx_msb) != (apicid1 >> idx_msb))
460 continue;
461 /* A sibling leaf is found. Cache properties
462 * use ProcIdLow as offset to represent siblings
463 * in SiblingMap, so keep the lower apicid and
464 * delete the other by changing CacheSize to 0.
465 */
466 if (apicid1 < apicid2) {
467 this_leaf->hsa_cache_prop.SiblingMap[0] = 1;
468 this_leaf->hsa_cache_prop.SiblingMap[apicid2 - apicid1] = 1;
469 leaf2->hsa_cache_prop.CacheSize = 0;
470 cpu2->num_duplicated_caches++;
471 } else {
472 leaf2->hsa_cache_prop.SiblingMap[0] = 1;
473 leaf2->hsa_cache_prop.SiblingMap[apicid1 - apicid2] = 1;
474 this_leaf->hsa_cache_prop.CacheSize = 0;
475 this_cpu->num_duplicated_caches++;
476 }
477 }
478 }
479 }
480 }
481 #endif /* X86 platform */
482
topology_sysfs_get_generation(uint32_t * gen)483 static HSAKMT_STATUS topology_sysfs_get_generation(uint32_t *gen)
484 {
485 FILE *fd;
486 HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
487
488 assert(gen);
489 fd = fopen(KFD_SYSFS_PATH_GENERATION_ID, "r");
490 if (!fd)
491 return HSAKMT_STATUS_ERROR;
492 if (fscanf(fd, "%ul", gen) != 1) {
493 ret = HSAKMT_STATUS_ERROR;
494 goto err;
495 }
496
497 err:
498 fclose(fd);
499 return ret;
500 }
501
topology_sysfs_get_system_props(HsaSystemProperties * props)502 HSAKMT_STATUS topology_sysfs_get_system_props(HsaSystemProperties *props)
503 {
504 FILE *fd;
505 char *read_buf, *p;
506 char prop_name[256];
507 unsigned long long prop_val;
508 uint32_t prog;
509 int read_size;
510 HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
511
512
513 assert(props);
514 fd = fopen(KFD_SYSFS_PATH_SYSTEM_PROPERTIES, "r");
515 if (!fd)
516 return HSAKMT_STATUS_ERROR;
517
518 read_buf = malloc(PAGE_SIZE);
519 if (!read_buf) {
520 ret = HSAKMT_STATUS_NO_MEMORY;
521 goto err1;
522 }
523
524 read_size = fread(read_buf, 1, PAGE_SIZE, fd);
525 if (read_size <= 0) {
526 ret = HSAKMT_STATUS_ERROR;
527 goto err2;
528 }
529
530 /* Since we're using the buffer as a string, we make sure the string terminates */
531 if (read_size >= PAGE_SIZE)
532 read_size = PAGE_SIZE - 1;
533 read_buf[read_size] = 0;
534
535 /* Read the system properties */
536 prog = 0;
537 p = read_buf;
538 while (sscanf(p += prog, "%s %llu\n%n", prop_name, &prop_val, &prog) == 2) {
539 if (strcmp(prop_name, "platform_oem") == 0)
540 props->PlatformOem = (uint32_t)prop_val;
541 else if (strcmp(prop_name, "platform_id") == 0)
542 props->PlatformId = (uint32_t)prop_val;
543 else if (strcmp(prop_name, "platform_rev") == 0)
544 props->PlatformRev = (uint32_t)prop_val;
545 }
546
547 /*
548 * Discover the number of nodes:
549 * Assuming that inside nodes folder there are only folders
550 * which represent the node numbers
551 */
552 props->NumNodes = num_subdirs(KFD_SYSFS_PATH_NODES, "");
553
554 err2:
555 free(read_buf);
556 err1:
557 fclose(fd);
558 return ret;
559 }
560
topology_sysfs_get_gpu_id(uint32_t node_id,uint32_t * gpu_id)561 HSAKMT_STATUS topology_sysfs_get_gpu_id(uint32_t node_id, uint32_t *gpu_id)
562 {
563 FILE *fd;
564 char path[256];
565 HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
566
567 assert(gpu_id);
568 snprintf(path, 256, "%s/%d/gpu_id", KFD_SYSFS_PATH_NODES, node_id);
569 fd = fopen(path, "r");
570 if (!fd)
571 return HSAKMT_STATUS_ERROR;
572 if (fscanf(fd, "%ul", gpu_id) != 1)
573 ret = HSAKMT_STATUS_ERROR;
574 fclose(fd);
575
576 return ret;
577 }
578
find_hsa_gfxip_device(uint16_t device_id)579 static const struct hsa_gfxip_table *find_hsa_gfxip_device(uint16_t device_id)
580 {
581 uint32_t i, table_size;
582
583 table_size = sizeof(gfxip_lookup_table)/sizeof(struct hsa_gfxip_table);
584 for (i = 0; i < table_size; i++) {
585 if (gfxip_lookup_table[i].device_id == device_id)
586 return &gfxip_lookup_table[i];
587 }
588 return NULL;
589 }
590
topology_get_asic_family(uint16_t device_id,enum asic_family_type * asic)591 HSAKMT_STATUS topology_get_asic_family(uint16_t device_id,
592 enum asic_family_type *asic)
593 {
594 const struct hsa_gfxip_table *hsa_gfxip =
595 find_hsa_gfxip_device(device_id);
596
597 if (!hsa_gfxip)
598 return HSAKMT_STATUS_INVALID_PARAMETER;
599
600 *asic = hsa_gfxip->asic_family;
601 return HSAKMT_STATUS_SUCCESS;
602 }
603
topology_is_dgpu(uint16_t device_id)604 bool topology_is_dgpu(uint16_t device_id)
605 {
606 const struct hsa_gfxip_table *hsa_gfxip =
607 find_hsa_gfxip_device(device_id);
608
609 if (hsa_gfxip && hsa_gfxip->is_dgpu) {
610 is_dgpu = true;
611 return true;
612 }
613 is_dgpu = false;
614 return false;
615 }
616
topology_is_svm_needed(uint16_t device_id)617 bool topology_is_svm_needed(uint16_t device_id)
618 {
619 const struct hsa_gfxip_table *hsa_gfxip;
620
621 if (topology_is_dgpu(device_id))
622 return true;
623
624 hsa_gfxip = find_hsa_gfxip_device(device_id);
625
626 if (hsa_gfxip && hsa_gfxip->asic_family >= CHIP_VEGA10)
627 return true;
628
629 return false;
630 }
631
topology_get_cpu_model_name(HsaNodeProperties * props,bool is_apu)632 static HSAKMT_STATUS topology_get_cpu_model_name(HsaNodeProperties *props,
633 bool is_apu)
634 {
635 FILE *fd;
636 char read_buf[256], cpu_model_name[HSA_PUBLIC_NAME_SIZE];
637 const char *p;
638 uint32_t i = 0, apic_id = 0;
639
640 if (!props)
641 return HSAKMT_STATUS_INVALID_PARAMETER;
642
643 fd = fopen(PROC_CPUINFO_PATH, "r");
644 if (!fd) {
645 pr_err("Failed to open [%s]. Unable to get CPU Model Name",
646 PROC_CPUINFO_PATH);
647 return HSAKMT_STATUS_ERROR;
648 }
649
650 while (fgets(read_buf, sizeof(read_buf), fd)) {
651 /* Get the model name first, in case matching
652 * apic IDs are also present in the file
653 */
654 if (!strncmp("model name", read_buf, sizeof("model name") - 1)) {
655 p = strrchr(read_buf, ':');
656 if (!p)
657 goto err;
658
659 p++; // remove separator ':'
660 for (; isspace(*p); p++)
661 ; /* remove white space */
662
663 /* Extract model name from string */
664 for (i = 0; i < sizeof(cpu_model_name) - 1 && p[i] != '\n'; i++)
665 cpu_model_name[i] = p[i];
666 cpu_model_name[i] = '\0';
667 }
668
669 if (!strncmp("apicid", read_buf, sizeof("apicid") - 1)) {
670 p = strrchr(read_buf, ':');
671 if (!p)
672 goto err;
673
674 p++; // remove separator ':'
675 for (; isspace(*p); p++)
676 ; /* remove white space */
677
678 /* Extract apic_id from remaining chars */
679 apic_id = atoi(p);
680
681 /* Set CPU model name only if corresponding apic id */
682 if (props->CComputeIdLo == apic_id) {
683 /* Retrieve the CAL name of CPU node */
684 if (!is_apu)
685 strncpy((char *)props->AMDName, cpu_model_name, sizeof(props->AMDName));
686 /* Convert from UTF8 to UTF16 */
687 for (i = 0; cpu_model_name[i] != '\0' && i < HSA_PUBLIC_NAME_SIZE - 1; i++)
688 props->MarketingName[i] = cpu_model_name[i];
689 props->MarketingName[i] = '\0';
690 }
691 }
692 }
693 fclose(fd);
694 return HSAKMT_STATUS_SUCCESS;
695 err:
696 fclose(fd);
697 return HSAKMT_STATUS_ERROR;
698 }
699
topology_search_processor_vendor(const char * processor_name)700 static int topology_search_processor_vendor(const char *processor_name)
701 {
702 unsigned int i;
703
704 for (i = 0; i < ARRAY_LEN(supported_processor_vendor_name); i++) {
705 if (!strcmp(processor_name, supported_processor_vendor_name[i]))
706 return i;
707 }
708 return -1;
709 }
710
711 /* topology_set_processor_vendor - Parse /proc/cpuinfo and
712 * to find processor vendor and set global variable processor_vendor
713 *
714 * cat /proc/cpuinfo format is - "token : Value"
715 * where token = "vendor_id" and
716 * Value = indicates System Vendor
717 */
topology_set_processor_vendor(void)718 static void topology_set_processor_vendor(void)
719 {
720 FILE *fd;
721 char read_buf[256];
722 const char *p;
723
724 fd = fopen(PROC_CPUINFO_PATH, "r");
725 if (!fd) {
726 pr_err("Failed to open [%s]. Setting Processor Vendor to %s",
727 PROC_CPUINFO_PATH, supported_processor_vendor_name[GENUINE_INTEL]);
728 processor_vendor = GENUINE_INTEL;
729 return;
730 }
731
732 while (fgets(read_buf, sizeof(read_buf), fd)) {
733 if (!strncmp("vendor_id", read_buf, sizeof("vendor_id") - 1)) {
734 p = strrchr(read_buf, ':');
735 p++; // remove separator ':'
736 for (; *p && isspace(*p); p++)
737 ; /* remove white space */
738 processor_vendor = topology_search_processor_vendor(p);
739 if (processor_vendor != -1) {
740 fclose(fd);
741 return;
742 }
743 }
744 }
745 fclose(fd);
746 pr_err("Failed to get Processor Vendor. Setting to %s",
747 supported_processor_vendor_name[GENUINE_INTEL]);
748 processor_vendor = GENUINE_INTEL;
749 }
750
topology_sysfs_get_node_props(uint32_t node_id,HsaNodeProperties * props,uint32_t * gpu_id,struct pci_access * pacc)751 HSAKMT_STATUS topology_sysfs_get_node_props(uint32_t node_id,
752 HsaNodeProperties *props,
753 uint32_t *gpu_id,
754 struct pci_access *pacc)
755 {
756 FILE *fd;
757 char *read_buf, *p, *envvar, dummy;
758 char prop_name[256];
759 char path[256];
760 unsigned long long prop_val;
761 uint32_t i, prog, major, minor, step;
762 int read_size;
763 const struct hsa_gfxip_table *hsa_gfxip;
764 char namebuf[HSA_PUBLIC_NAME_SIZE];
765 const char *name;
766
767 HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
768
769 assert(props);
770 assert(gpu_id);
771 /* Retrieve the GPU ID */
772 ret = topology_sysfs_get_gpu_id(node_id, gpu_id);
773
774 read_buf = malloc(PAGE_SIZE);
775 if (!read_buf)
776 return HSAKMT_STATUS_NO_MEMORY;
777
778 /* Retrieve the node properties */
779 snprintf(path, 256, "%s/%d/properties", KFD_SYSFS_PATH_NODES, node_id);
780 fd = fopen(path, "r");
781 if (!fd) {
782 free(read_buf);
783 return HSAKMT_STATUS_ERROR;
784 }
785
786 read_size = fread(read_buf, 1, PAGE_SIZE, fd);
787 if (read_size <= 0) {
788 ret = HSAKMT_STATUS_ERROR;
789 goto err;
790 }
791
792 /* Since we're using the buffer as a string, we make sure the string terminates */
793 if (read_size >= PAGE_SIZE)
794 read_size = PAGE_SIZE - 1;
795 read_buf[read_size] = 0;
796
797 /* Read the node properties */
798 prog = 0;
799 p = read_buf;
800 while (sscanf(p += prog, "%s %llu\n%n", prop_name, &prop_val, &prog) == 2) {
801 if (strcmp(prop_name, "cpu_cores_count") == 0)
802 props->NumCPUCores = (uint32_t)prop_val;
803 else if (strcmp(prop_name, "simd_count") == 0)
804 props->NumFComputeCores = (uint32_t)prop_val;
805 else if (strcmp(prop_name, "mem_banks_count") == 0)
806 props->NumMemoryBanks = (uint32_t)prop_val;
807 else if (strcmp(prop_name, "caches_count") == 0)
808 props->NumCaches = (uint32_t)prop_val;
809 else if (strcmp(prop_name, "io_links_count") == 0)
810 props->NumIOLinks = (uint32_t)prop_val;
811 else if (strcmp(prop_name, "cpu_core_id_base") == 0)
812 props->CComputeIdLo = (uint32_t)prop_val;
813 else if (strcmp(prop_name, "simd_id_base") == 0)
814 props->FComputeIdLo = (uint32_t)prop_val;
815 else if (strcmp(prop_name, "capability") == 0)
816 props->Capability.Value = (uint32_t)prop_val;
817 else if (strcmp(prop_name, "max_waves_per_simd") == 0)
818 props->MaxWavesPerSIMD = (uint32_t)prop_val;
819 else if (strcmp(prop_name, "lds_size_in_kb") == 0)
820 props->LDSSizeInKB = (uint32_t)prop_val;
821 else if (strcmp(prop_name, "gds_size_in_kb") == 0)
822 props->GDSSizeInKB = (uint32_t)prop_val;
823 else if (strcmp(prop_name, "wave_front_size") == 0)
824 props->WaveFrontSize = (uint32_t)prop_val;
825 else if (strcmp(prop_name, "array_count") == 0)
826 props->NumShaderBanks = (uint32_t)prop_val;
827 else if (strcmp(prop_name, "simd_arrays_per_engine") == 0)
828 props->NumArrays = (uint32_t)prop_val;
829 else if (strcmp(prop_name, "cu_per_simd_array") == 0)
830 props->NumCUPerArray = (uint32_t)prop_val;
831 else if (strcmp(prop_name, "simd_per_cu") == 0)
832 props->NumSIMDPerCU = (uint32_t)prop_val;
833 else if (strcmp(prop_name, "max_slots_scratch_cu") == 0)
834 props->MaxSlotsScratchCU = (uint32_t)prop_val;
835 else if (strcmp(prop_name, "fw_version") == 0)
836 props->EngineId.Value = (uint32_t)prop_val & 0x3ff;
837 else if (strcmp(prop_name, "vendor_id") == 0)
838 props->VendorId = (uint32_t)prop_val;
839 else if (strcmp(prop_name, "device_id") == 0)
840 props->DeviceId = (uint32_t)prop_val;
841 else if (strcmp(prop_name, "location_id") == 0)
842 props->LocationId = (uint32_t)prop_val;
843 else if (strcmp(prop_name, "max_engine_clk_fcompute") == 0)
844 props->MaxEngineClockMhzFCompute = (uint32_t)prop_val;
845 else if (strcmp(prop_name, "max_engine_clk_ccompute") == 0)
846 props->MaxEngineClockMhzCCompute = (uint32_t)prop_val;
847 else if (strcmp(prop_name, "local_mem_size") == 0)
848 props->LocalMemSize = prop_val;
849 else if (strcmp(prop_name, "drm_render_minor") == 0)
850 props->DrmRenderMinor = (int32_t)prop_val;
851 else if (strcmp(prop_name, "sdma_fw_version") == 0)
852 props->uCodeEngineVersions.Value = (uint32_t)prop_val & 0x3ff;
853 }
854
855 hsa_gfxip = find_hsa_gfxip_device(props->DeviceId);
856 if (hsa_gfxip) {
857 envvar = getenv("HSA_OVERRIDE_GFX_VERSION");
858 if (envvar) {
859 /* HSA_OVERRIDE_GFX_VERSION=major.minor.stepping */
860 if ((sscanf(envvar, "%u.%u.%u%c",
861 &major, &minor, &step, &dummy) != 3) ||
862 (major > 63 || minor > 255 || step > 255)) {
863 pr_err("HSA_OVERRIDE_GFX_VERSION %s is invalid\n",
864 envvar);
865 ret = HSAKMT_STATUS_ERROR;
866 goto err;
867 }
868 props->EngineId.ui32.Major = major & 0x3f;
869 props->EngineId.ui32.Minor = minor & 0xff;
870 props->EngineId.ui32.Stepping = step & 0xff;
871 } else {
872 props->EngineId.ui32.Major = hsa_gfxip->major & 0x3f;
873 props->EngineId.ui32.Minor = hsa_gfxip->minor;
874 props->EngineId.ui32.Stepping = hsa_gfxip->stepping;
875 }
876
877 if (!hsa_gfxip->amd_name) {
878 ret = HSAKMT_STATUS_ERROR;
879 goto err;
880 }
881
882 /* Retrieve the CAL name of the node */
883 strncpy((char *)props->AMDName, hsa_gfxip->amd_name, sizeof(props->AMDName));
884 if (props->NumCPUCores) {
885 /* Is APU node */
886 ret = topology_get_cpu_model_name(props, true);
887 if (ret != HSAKMT_STATUS_SUCCESS) {
888 pr_err("Failed to get APU Model Name from %s\n", PROC_CPUINFO_PATH);
889 ret = HSAKMT_STATUS_SUCCESS; /* No hard error, continue regardless */
890 }
891 } else {
892 /* Is dGPU Node
893 * Retrieve the marketing name of the node using pcilib,
894 * convert UTF8 to UTF16
895 */
896 name = pci_lookup_name(pacc, namebuf, sizeof(namebuf), PCI_LOOKUP_DEVICE,
897 props->VendorId, props->DeviceId);
898 for (i = 0; name[i] != 0 && i < HSA_PUBLIC_NAME_SIZE - 1; i++)
899 props->MarketingName[i] = name[i];
900 props->MarketingName[i] = '\0';
901 }
902 } else {
903 /* Is CPU Node */
904 if (!props->NumFComputeCores || !props->DeviceId) {
905 ret = topology_get_cpu_model_name(props, false);
906 if (ret != HSAKMT_STATUS_SUCCESS) {
907 pr_err("Failed to get CPU Model Name from %s\n", PROC_CPUINFO_PATH);
908 ret = HSAKMT_STATUS_SUCCESS; /* No hard error, continue regardless */
909 }
910 } else {
911 ret = HSAKMT_STATUS_ERROR;
912 goto err;
913 }
914 }
915 if (props->NumFComputeCores)
916 assert(props->EngineId.ui32.Major);
917
918 err:
919 free(read_buf);
920 fclose(fd);
921 return ret;
922 }
923
topology_sysfs_get_mem_props(uint32_t node_id,uint32_t mem_id,HsaMemoryProperties * props)924 static HSAKMT_STATUS topology_sysfs_get_mem_props(uint32_t node_id,
925 uint32_t mem_id,
926 HsaMemoryProperties *props)
927 {
928 FILE *fd;
929 char *read_buf, *p;
930 char prop_name[256];
931 char path[256];
932 unsigned long long prop_val;
933 uint32_t prog;
934 int read_size;
935 HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
936
937 assert(props);
938 snprintf(path, 256, "%s/%d/mem_banks/%d/properties", KFD_SYSFS_PATH_NODES, node_id, mem_id);
939 fd = fopen(path, "r");
940 if (!fd)
941 return HSAKMT_STATUS_ERROR;
942 read_buf = malloc(PAGE_SIZE);
943 if (!read_buf) {
944 ret = HSAKMT_STATUS_NO_MEMORY;
945 goto err1;
946 }
947
948 read_size = fread(read_buf, 1, PAGE_SIZE, fd);
949 if (read_size <= 0) {
950 ret = HSAKMT_STATUS_ERROR;
951 goto err2;
952 }
953
954 /* Since we're using the buffer as a string, we make sure the string terminates */
955 if (read_size >= PAGE_SIZE)
956 read_size = PAGE_SIZE - 1;
957 read_buf[read_size] = 0;
958
959 prog = 0;
960 p = read_buf;
961 while (sscanf(p += prog, "%s %llu\n%n", prop_name, &prop_val, &prog) == 2) {
962 if (strcmp(prop_name, "heap_type") == 0)
963 props->HeapType = (uint32_t)prop_val;
964 else if (strcmp(prop_name, "size_in_bytes") == 0)
965 props->SizeInBytes = (uint64_t)prop_val;
966 else if (strcmp(prop_name, "flags") == 0)
967 props->Flags.MemoryProperty = (uint32_t)prop_val;
968 else if (strcmp(prop_name, "width") == 0)
969 props->Width = (uint32_t)prop_val;
970 else if (strcmp(prop_name, "mem_clk_max") == 0)
971 props->MemoryClockMax = (uint32_t)prop_val;
972 }
973
974 err2:
975 free(read_buf);
976 err1:
977 fclose(fd);
978 return ret;
979 }
980
981 #if defined(__x86_64__) || defined(__i386__)
982 /* topology_destroy_temp_cpu_cache_list - Free the memory allocated in
983 * topology_create_temp_cpu_cache_list().
984 */
topology_destroy_temp_cpu_cache_list(void * temp_cpu_ci_list)985 static void topology_destroy_temp_cpu_cache_list(void *temp_cpu_ci_list)
986 {
987 uint32_t n;
988 cpu_cacheinfo_t *p_temp_cpu_ci_list = (cpu_cacheinfo_t *)temp_cpu_ci_list;
989 cpu_cacheinfo_t *this_cpu;
990
991 if (p_temp_cpu_ci_list) {
992 for (n = 0; n < p_temp_cpu_ci_list->len; n++) {
993 this_cpu = p_temp_cpu_ci_list + n;
994 if (this_cpu->cache_info)
995 free(this_cpu->cache_info);
996 }
997 free(p_temp_cpu_ci_list);
998 }
999
1000 p_temp_cpu_ci_list = NULL;
1001 }
1002
1003 /* topology_create_temp_cpu_cache_list - Create a temporary cpu-cache list to
1004 * store cpu cache information. This list will be used to copy
1005 * cache information to each CPU node. Must call
1006 * topology_destroy_temp_cpu_cache_list to free the memory after
1007 * the information is copied.
1008 * @temp_cpu_ci_list - [OUT] temporary cpu-cache-info list to store data
1009 * Return - HSAKMT_STATUS_SUCCESS in success or error number in failure
1010 */
topology_create_temp_cpu_cache_list(void ** temp_cpu_ci_list)1011 static HSAKMT_STATUS topology_create_temp_cpu_cache_list(void **temp_cpu_ci_list)
1012 {
1013 HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
1014 void *p_temp_cpu_ci_list;
1015 int procs_online;
1016 #ifdef __FreeBSD__
1017 cpuset_t orig_cpuset;
1018 #else
1019 cpu_set_t orig_cpuset;
1020 #endif
1021 int i;
1022 uint32_t cpuid_op_cache;
1023 uint32_t eax, ebx, ecx = 0, edx; /* cpuid registers */
1024 cpu_cacheinfo_t *cpu_ci_list, *this_cpu;
1025
1026 if (!temp_cpu_ci_list) {
1027 ret = HSAKMT_STATUS_ERROR;
1028 goto exit;
1029 }
1030 *temp_cpu_ci_list = NULL;
1031
1032 procs_online = (int)sysconf(_SC_NPROCESSORS_ONLN);
1033 if (procs_online <= 0) {
1034 ret = HSAKMT_STATUS_ERROR;
1035 goto exit;
1036 }
1037
1038 p_temp_cpu_ci_list = calloc(sizeof(cpu_cacheinfo_t) * procs_online, 1);
1039 if (!p_temp_cpu_ci_list) {
1040 ret = HSAKMT_STATUS_NO_MEMORY;
1041 goto exit;
1042 }
1043
1044 cpu_ci_list = (cpu_cacheinfo_t *)p_temp_cpu_ci_list;
1045 cpu_ci_list->len = procs_online;
1046
1047 if (processor_vendor == AUTHENTIC_AMD)
1048 cpuid_op_cache = 0x8000001d;
1049 else
1050 cpuid_op_cache = 0x4;
1051
1052 /* lock_to_processor() changes the affinity. Save the current affinity
1053 * so we can restore it after cpuid is done.
1054 */
1055 CPU_ZERO(&orig_cpuset);
1056 #ifdef __FreeBSD__
1057 if (cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, sizeof(orig_cpuset), &orig_cpuset) != 0) {
1058 #else
1059 if (sched_getaffinity(0, sizeof(cpu_set_t), &orig_cpuset) != 0) {
1060 #endif
1061 pr_err("Failed to get CPU affinity\n");
1062 free(p_temp_cpu_ci_list);
1063 ret = HSAKMT_STATUS_ERROR;
1064 goto exit;
1065 }
1066
1067 for (i = 0; i < procs_online; i++) {
1068 this_cpu = cpu_ci_list + i;
1069 lock_to_processor(i); /* so cpuid is executed in correct cpu */
1070
1071 eax = 0x1;
1072 cpuid(&eax, &ebx, &ecx, &edx);
1073 this_cpu->apicid = (ebx >> 24) & 0xff;
1074 this_cpu->max_num_apicid = (ebx >> 16) & 0x0FF;
1075 this_cpu->num_caches = cpuid_find_num_cache_leaves(cpuid_op_cache);
1076 this_cpu->num_duplicated_caches = 0;
1077 this_cpu->cache_info = calloc(
1078 sizeof(cacheinfo_t) * this_cpu->num_caches, 1);
1079 if (!this_cpu->cache_info) {
1080 ret = HSAKMT_STATUS_NO_MEMORY;
1081 goto err;
1082 }
1083 cpuid_get_cpu_cache_info(cpuid_op_cache, this_cpu);
1084 }
1085
1086 find_cpu_cache_siblings(cpu_ci_list);
1087 *temp_cpu_ci_list = p_temp_cpu_ci_list;
1088
1089 err:
1090 /* restore affinity to original */
1091 #ifdef __FreeBSD__
1092 cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, sizeof(orig_cpuset), &orig_cpuset);
1093 #else
1094 sched_setaffinity(0, sizeof(cpu_set_t), &orig_cpuset);
1095 #endif
1096 exit:
1097 if (ret != HSAKMT_STATUS_SUCCESS) {
1098 pr_warn("Topology fails to create cpu cache list\n");
1099 topology_destroy_temp_cpu_cache_list(*temp_cpu_ci_list);
1100 }
1101 return ret;
1102 }
1103
1104 /* topology_get_cpu_cache_props - Read CPU cache information from the temporary
1105 * cache list and put them to the node's cache properties entry.
1106 * @tbl - the node table to fill up
1107 * @cpu_ci_list - the cpu cache information list to look up cache info
1108 * Return - HSAKMT_STATUS_SUCCESS in success or error number in failure
1109 */
1110 static HSAKMT_STATUS topology_get_cpu_cache_props(node_t *tbl,
1111 cpu_cacheinfo_t *cpu_ci_list)
1112 {
1113 HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
1114 uint32_t apicid_low = tbl->node.CComputeIdLo, apicid_max = 0;
1115 uint32_t n, cache_cnt, idx;
1116 cpu_cacheinfo_t *this_cpu;
1117 cacheinfo_t *this_leaf;
1118
1119 /* CPU cache info list contains all CPUs. Find out CPUs belonging to
1120 * this node and number of caches under, so we can allocate the cache
1121 * properties in the node.
1122 */
1123 tbl->node.NumCaches = 0;
1124 for (n = 0; n < cpu_ci_list->len; n++) {
1125 this_cpu = cpu_ci_list + n;
1126 if (this_cpu->apicid == apicid_low)
1127 /* found the first cpu in the node */
1128 apicid_max = apicid_low + this_cpu->max_num_apicid - 1;
1129
1130 if ((this_cpu->apicid < apicid_low) ||
1131 (this_cpu->apicid > apicid_max))
1132 continue; /* this cpu doesn't belong to the node */
1133 tbl->node.NumCaches +=
1134 this_cpu->num_caches - this_cpu->num_duplicated_caches;
1135 }
1136
1137 /* FixMe: cpuid under Valgrind doesn't return data from the processor we set
1138 * affinity to. All the data come from one specific processor. We'll report
1139 * this one processor's cache and ignore others.
1140 */
1141 if (is_valgrind) {
1142 this_cpu = cpu_ci_list;
1143 tbl->node.NumCaches = this_cpu->num_caches;
1144 apicid_low = apicid_max = this_cpu->apicid;
1145 }
1146
1147 tbl->cache = calloc(
1148 sizeof(HsaCacheProperties) * tbl->node.NumCaches, 1);
1149 if (!tbl->cache) {
1150 ret = HSAKMT_STATUS_NO_MEMORY;
1151 goto exit;
1152 }
1153
1154 /* Now fill in the information to cache properties. */
1155 cache_cnt = 0;
1156 for (n = 0; n < cpu_ci_list->len; n++) {
1157 this_cpu = cpu_ci_list + n;
1158 if ((this_cpu->apicid < apicid_low) || this_cpu->apicid > apicid_max)
1159 continue; /* this cpu doesn't belong to the node */
1160 for (idx = 0; idx < this_cpu->num_caches; idx++) {
1161 this_leaf = this_cpu->cache_info + idx;
1162 if (this_leaf->hsa_cache_prop.CacheSize > 0)
1163 memcpy(&tbl->cache[cache_cnt++], &this_leaf->hsa_cache_prop, sizeof(HsaCacheProperties));
1164 if (cache_cnt >= tbl->node.NumCaches)
1165 goto exit;
1166 }
1167 }
1168
1169 exit:
1170 return ret;
1171 }
1172 #else /* not X86 */
1173 static void topology_destroy_temp_cpu_cache_list(void *temp_cpu_ci_list)
1174 {
1175 }
1176
1177 static HSAKMT_STATUS topology_create_temp_cpu_cache_list(void **temp_cpu_ci_list)
1178 {
1179 return HSAKMT_STATUS_SUCCESS;
1180 }
1181
1182 static HSAKMT_STATUS topology_get_cpu_cache_props(node_t *tbl,
1183 cpu_cacheinfo_t *cpu_ci_list)
1184 {
1185 return HSAKMT_STATUS_SUCCESS;
1186 }
1187 #endif
1188
1189 static HSAKMT_STATUS topology_sysfs_get_cache_props(uint32_t node_id,
1190 uint32_t cache_id,
1191 HsaCacheProperties *props)
1192 {
1193 FILE *fd;
1194 char *read_buf, *p;
1195 char prop_name[256];
1196 char path[256];
1197 unsigned long long prop_val;
1198 uint32_t i, prog;
1199 int read_size;
1200 HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
1201
1202 assert(props);
1203 snprintf(path, 256, "%s/%d/caches/%d/properties", KFD_SYSFS_PATH_NODES, node_id, cache_id);
1204 fd = fopen(path, "r");
1205 if (!fd)
1206 return HSAKMT_STATUS_ERROR;
1207 read_buf = malloc(PAGE_SIZE);
1208 if (!read_buf) {
1209 ret = HSAKMT_STATUS_NO_MEMORY;
1210 goto err1;
1211 }
1212
1213 read_size = fread(read_buf, 1, PAGE_SIZE, fd);
1214 if (read_size <= 0) {
1215 ret = HSAKMT_STATUS_ERROR;
1216 goto err2;
1217 }
1218
1219 /* Since we're using the buffer as a string, we make sure the string terminates */
1220 if (read_size >= PAGE_SIZE)
1221 read_size = PAGE_SIZE - 1;
1222 read_buf[read_size] = 0;
1223
1224 prog = 0;
1225 p = read_buf;
1226 while (sscanf(p += prog, "%s %llu\n%n", prop_name, &prop_val, &prog) == 2) {
1227 if (strcmp(prop_name, "processor_id_low") == 0)
1228 props->ProcessorIdLow = (uint32_t)prop_val;
1229 else if (strcmp(prop_name, "level") == 0)
1230 props->CacheLevel = (uint32_t)prop_val;
1231 else if (strcmp(prop_name, "size") == 0)
1232 props->CacheSize = (uint32_t)prop_val;
1233 else if (strcmp(prop_name, "cache_line_size") == 0)
1234 props->CacheLineSize = (uint32_t)prop_val;
1235 else if (strcmp(prop_name, "cache_lines_per_tag") == 0)
1236 props->CacheLinesPerTag = (uint32_t)prop_val;
1237 else if (strcmp(prop_name, "association") == 0)
1238 props->CacheAssociativity = (uint32_t)prop_val;
1239 else if (strcmp(prop_name, "latency") == 0)
1240 props->CacheLatency = (uint32_t)prop_val;
1241 else if (strcmp(prop_name, "type") == 0)
1242 props->CacheType.Value = (uint32_t)prop_val;
1243 else if (strcmp(prop_name, "sibling_map") == 0)
1244 break;
1245 }
1246
1247 prog = 0;
1248 if ((sscanf(p, "sibling_map %n", &prog)) == 0 && prog) {
1249 i = 0;
1250 while ((i < HSA_CPU_SIBLINGS) &&
1251 (sscanf(p += prog, "%u%*[,\n]%n", &props->SiblingMap[i++], &prog) == 1))
1252 continue;
1253 }
1254
1255 err2:
1256 free(read_buf);
1257 err1:
1258 fclose(fd);
1259 return ret;
1260 }
1261
1262 static HSAKMT_STATUS topology_sysfs_get_iolink_props(uint32_t node_id,
1263 uint32_t iolink_id,
1264 HsaIoLinkProperties *props)
1265 {
1266 FILE *fd;
1267 char *read_buf, *p;
1268 char prop_name[256];
1269 char path[256];
1270 unsigned long long prop_val;
1271 uint32_t prog;
1272 int read_size;
1273 HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
1274
1275 assert(props);
1276 snprintf(path, 256, "%s/%d/io_links/%d/properties", KFD_SYSFS_PATH_NODES, node_id, iolink_id);
1277 fd = fopen(path, "r");
1278 if (!fd)
1279 return HSAKMT_STATUS_ERROR;
1280 read_buf = malloc(PAGE_SIZE);
1281 if (!read_buf) {
1282 ret = HSAKMT_STATUS_NO_MEMORY;
1283 goto err1;
1284 }
1285
1286 read_size = fread(read_buf, 1, PAGE_SIZE, fd);
1287 if (read_size <= 0) {
1288 ret = HSAKMT_STATUS_ERROR;
1289 goto err2;
1290 }
1291
1292 /* Since we're using the buffer as a string, we make sure the string terminates */
1293 if (read_size >= PAGE_SIZE)
1294 read_size = PAGE_SIZE - 1;
1295 read_buf[read_size] = 0;
1296
1297 prog = 0;
1298 p = read_buf;
1299 while (sscanf(p += prog, "%s %llu\n%n", prop_name, &prop_val, &prog) == 2) {
1300 if (strcmp(prop_name, "type") == 0)
1301 props->IoLinkType = (uint32_t)prop_val;
1302 else if (strcmp(prop_name, "version_major") == 0)
1303 props->VersionMajor = (uint32_t)prop_val;
1304 else if (strcmp(prop_name, "version_minor") == 0)
1305 props->VersionMinor = (uint32_t)prop_val;
1306 else if (strcmp(prop_name, "node_from") == 0)
1307 props->NodeFrom = (uint32_t)prop_val;
1308 else if (strcmp(prop_name, "node_to") == 0)
1309 props->NodeTo = (uint32_t)prop_val;
1310 else if (strcmp(prop_name, "weight") == 0)
1311 props->Weight = (uint32_t)prop_val;
1312 else if (strcmp(prop_name, "min_latency") == 0)
1313 props->MinimumLatency = (uint32_t)prop_val;
1314 else if (strcmp(prop_name, "max_latency") == 0)
1315 props->MaximumLatency = (uint32_t)prop_val;
1316 else if (strcmp(prop_name, "min_bandwidth") == 0)
1317 props->MinimumBandwidth = (uint32_t)prop_val;
1318 else if (strcmp(prop_name, "max_bandwidth") == 0)
1319 props->MaximumBandwidth = (uint32_t)prop_val;
1320 else if (strcmp(prop_name, "recommended_transfer_size") == 0)
1321 props->RecTransferSize = (uint32_t)prop_val;
1322 else if (strcmp(prop_name, "flags") == 0)
1323 props->Flags.LinkProperty = (uint32_t)prop_val;
1324 }
1325
1326
1327 err2:
1328 free(read_buf);
1329 err1:
1330 fclose(fd);
1331 return ret;
1332 }
1333
1334 /* topology_get_free_io_link_slot_for_node - For the given node_id, find the
1335 * next available free slot to add an io_link
1336 */
1337 static HsaIoLinkProperties *topology_get_free_io_link_slot_for_node(uint32_t node_id,
1338 const HsaSystemProperties *sys_props,
1339 node_t *nodes)
1340 {
1341 HsaIoLinkProperties *props;
1342
1343 if (node_id >= sys_props->NumNodes) {
1344 pr_err("Invalid node [%d]\n", node_id);
1345 return NULL;
1346 }
1347
1348 props = nodes[node_id].link;
1349 if (!props) {
1350 pr_err("No io_link reported for Node [%d]\n", node_id);
1351 return NULL;
1352 }
1353
1354 if (nodes[node_id].node.NumIOLinks >= sys_props->NumNodes - 1) {
1355 pr_err("No more space for io_link for Node [%d]\n", node_id);
1356 return NULL;
1357 }
1358
1359 return &props[nodes[node_id].node.NumIOLinks];
1360 }
1361
1362 /* topology_add_io_link_for_node - If a free slot is available,
1363 * add io_link for the given Node. If bi_directional is true, set up two
1364 * links for both directions.
1365 * TODO: Add other members of HsaIoLinkProperties
1366 */
1367 static HSAKMT_STATUS topology_add_io_link_for_node(uint32_t node_id,
1368 const HsaSystemProperties *sys_props,
1369 node_t *nodes,
1370 HSA_IOLINKTYPE IoLinkType,
1371 uint32_t NodeTo,
1372 uint32_t Weight, bool bi_dir)
1373 {
1374 HsaIoLinkProperties *props;
1375 /* If bi-directional is set true, it's two links to add. */
1376 uint32_t i, num_links = (bi_dir == true) ? 2 : 1;
1377 uint32_t node_from = node_id, node_to = NodeTo;
1378
1379 for (i = 0; i < num_links; i++) {
1380 props = topology_get_free_io_link_slot_for_node(node_from,
1381 sys_props, nodes);
1382 if (!props)
1383 return HSAKMT_STATUS_NO_MEMORY;
1384
1385 props->IoLinkType = IoLinkType;
1386 props->NodeFrom = node_from;
1387 props->NodeTo = node_to;
1388 props->Weight = Weight;
1389 nodes[node_from].node.NumIOLinks++;
1390 /* switch direction on the 2nd link when num_links=2 */
1391 node_from = NodeTo;
1392 node_to = node_id;
1393 }
1394
1395 return HSAKMT_STATUS_SUCCESS;
1396 }
1397
1398 /* Find the CPU that this GPU (gpu_node) directly connects to */
1399 static int32_t gpu_get_direct_link_cpu(uint32_t gpu_node, node_t *nodes)
1400 {
1401 HsaIoLinkProperties *props = nodes[gpu_node].link;
1402 uint32_t i;
1403
1404 if (!nodes[gpu_node].gpu_id || !props ||
1405 nodes[gpu_node].node.NumIOLinks == 0)
1406 return -1;
1407
1408 for (i = 0; i < nodes[gpu_node].node.NumIOLinks; i++)
1409 if (props[i].IoLinkType == HSA_IOLINKTYPE_PCIEXPRESS &&
1410 props[i].Weight <= 20) /* >20 is GPU->CPU->GPU */
1411 return props[i].NodeTo;
1412
1413 return -1;
1414 }
1415
1416 /* Get node1->node2 IO link information. This should be a direct link that has
1417 * been created in the kernel.
1418 */
1419 static HSAKMT_STATUS get_direct_iolink_info(uint32_t node1, uint32_t node2,
1420 node_t *nodes, HSAuint32 *weight,
1421 HSA_IOLINKTYPE *type)
1422 {
1423 HsaIoLinkProperties *props = nodes[node1].link;
1424 uint32_t i;
1425
1426 if (!props)
1427 return HSAKMT_STATUS_INVALID_NODE_UNIT;
1428
1429 for (i = 0; i < nodes[node1].node.NumIOLinks; i++)
1430 if (props[i].NodeTo == node2) {
1431 if (weight)
1432 *weight = props[i].Weight;
1433 if (type)
1434 *type = props[i].IoLinkType;
1435 return HSAKMT_STATUS_SUCCESS;
1436 }
1437
1438 return HSAKMT_STATUS_INVALID_PARAMETER;
1439 }
1440
1441 static HSAKMT_STATUS get_indirect_iolink_info(uint32_t node1, uint32_t node2,
1442 node_t *nodes, HSAuint32 *weight,
1443 HSA_IOLINKTYPE *type)
1444 {
1445 int32_t dir_cpu1 = -1, dir_cpu2 = -1;
1446 HSAuint32 weight1 = 0, weight2 = 0, weight3 = 0;
1447 HSAKMT_STATUS ret;
1448
1449 *weight = 0;
1450 *type = HSA_IOLINKTYPE_UNDEFINED;
1451
1452 if (node1 == node2)
1453 return HSAKMT_STATUS_INVALID_PARAMETER;
1454
1455 /* CPU->CPU is not an indirect link */
1456 if (!nodes[node1].gpu_id && !nodes[node2].gpu_id)
1457 return HSAKMT_STATUS_INVALID_NODE_UNIT;
1458
1459 if (nodes[node1].gpu_id)
1460 dir_cpu1 = gpu_get_direct_link_cpu(node1, nodes);
1461 if (nodes[node2].gpu_id)
1462 dir_cpu2 = gpu_get_direct_link_cpu(node2, nodes);
1463
1464 if (dir_cpu1 < 0 && dir_cpu2 < 0)
1465 return HSAKMT_STATUS_ERROR;
1466
1467 /* Possible topology:
1468 * GPU --(weight1) -- CPU -- (weight2) -- GPU
1469 * GPU --(weight1) -- CPU -- (weight2) -- CPU -- (weight3) -- GPU
1470 * GPU --(weight1) -- CPU -- (weight2) -- CPU
1471 * CPU -- (weight2) -- CPU -- (weight3) -- GPU
1472 */
1473 if (dir_cpu1 >= 0) { /* GPU->CPU ... */
1474 if (dir_cpu2 >= 0) {
1475 if (dir_cpu1 == dir_cpu2) /* GPU->CPU->GPU*/ {
1476 ret = get_direct_iolink_info(node1, dir_cpu1,
1477 nodes, &weight1, NULL);
1478 if (ret != HSAKMT_STATUS_SUCCESS)
1479 return ret;
1480 ret = get_direct_iolink_info(dir_cpu1, node2,
1481 nodes, &weight2, type);
1482 } else /* GPU->CPU->CPU->GPU*/ {
1483 ret = get_direct_iolink_info(node1, dir_cpu1,
1484 nodes, &weight1, NULL);
1485 if (ret != HSAKMT_STATUS_SUCCESS)
1486 return ret;
1487 ret = get_direct_iolink_info(dir_cpu1, dir_cpu2,
1488 nodes, &weight2, type);
1489 if (ret != HSAKMT_STATUS_SUCCESS)
1490 return ret;
1491 /* On QPI interconnection, GPUs can't access
1492 * each other if they are attached to different
1493 * CPU sockets. CPU<->CPU weight larger than 20
1494 * means the two CPUs are in different sockets.
1495 */
1496 if (*type == HSA_IOLINK_TYPE_QPI_1_1
1497 && weight2 > 20)
1498 return HSAKMT_STATUS_NOT_SUPPORTED;
1499 ret = get_direct_iolink_info(dir_cpu2, node2,
1500 nodes, &weight3, NULL);
1501 }
1502 } else /* GPU->CPU->CPU */ {
1503 ret = get_direct_iolink_info(node1, dir_cpu1, nodes,
1504 &weight1, NULL);
1505 if (ret != HSAKMT_STATUS_SUCCESS)
1506 return ret;
1507 ret = get_direct_iolink_info(dir_cpu1, node2, nodes,
1508 &weight2, type);
1509 }
1510 } else { /* CPU->CPU->GPU */
1511 ret = get_direct_iolink_info(node1, dir_cpu2, nodes, &weight2,
1512 type);
1513 if (ret != HSAKMT_STATUS_SUCCESS)
1514 return ret;
1515 ret = get_direct_iolink_info(dir_cpu2, node2, nodes, &weight3,
1516 NULL);
1517 }
1518
1519 if (ret != HSAKMT_STATUS_SUCCESS)
1520 return ret;
1521
1522 *weight = weight1 + weight2 + weight3;
1523 return HSAKMT_STATUS_SUCCESS;
1524 }
1525
1526 static void topology_create_indirect_gpu_links(const HsaSystemProperties *sys_props,
1527 node_t *nodes)
1528 {
1529
1530 uint32_t i, j;
1531 HSAuint32 weight;
1532 HSA_IOLINKTYPE type;
1533
1534 for (i = 0; i < sys_props->NumNodes - 1; i++) {
1535 for (j = i + 1; j < sys_props->NumNodes; j++) {
1536 get_indirect_iolink_info(i, j, nodes, &weight, &type);
1537 if (!weight)
1538 continue;
1539 if (topology_add_io_link_for_node(i, sys_props, nodes,
1540 type, j, weight, true) != HSAKMT_STATUS_SUCCESS)
1541 pr_err("Fail to add IO link %d->%d\n", i, j);
1542 }
1543 }
1544 }
1545
1546 HSAKMT_STATUS topology_take_snapshot(void)
1547 {
1548 uint32_t gen_start, gen_end, i, mem_id, cache_id, link_id;
1549 HsaSystemProperties sys_props;
1550 node_t *temp_nodes = 0;
1551 void *cpu_ci_list = NULL;
1552 HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
1553 struct pci_access *pacc;
1554 char *envvar;
1555
1556 topology_set_processor_vendor();
1557 envvar = getenv("HSA_RUNNING_UNDER_VALGRIND");
1558 if (envvar && !strcmp(envvar, "1"))
1559 is_valgrind = 1;
1560 else
1561 is_valgrind = 0;
1562
1563 retry:
1564 ret = topology_sysfs_get_generation(&gen_start);
1565 if (ret != HSAKMT_STATUS_SUCCESS)
1566 return ret;
1567 ret = topology_sysfs_get_system_props(&sys_props);
1568 if (ret != HSAKMT_STATUS_SUCCESS)
1569 return ret;
1570 if (sys_props.NumNodes > 0) {
1571 topology_create_temp_cpu_cache_list(&cpu_ci_list);
1572 temp_nodes = calloc(sys_props.NumNodes * sizeof(node_t), 1);
1573 if (!temp_nodes)
1574 return HSAKMT_STATUS_NO_MEMORY;
1575 pacc = pci_alloc();
1576 pci_init(pacc);
1577 for (i = 0; i < sys_props.NumNodes; i++) {
1578 ret = topology_sysfs_get_node_props(i,
1579 &temp_nodes[i].node,
1580 &temp_nodes[i].gpu_id, pacc);
1581 if (ret != HSAKMT_STATUS_SUCCESS) {
1582 free_nodes(temp_nodes, i);
1583 goto err;
1584 }
1585 if (temp_nodes[i].node.NumMemoryBanks) {
1586 temp_nodes[i].mem = calloc(temp_nodes[i].node.NumMemoryBanks * sizeof(HsaMemoryProperties), 1);
1587 if (!temp_nodes[i].mem) {
1588 ret = HSAKMT_STATUS_NO_MEMORY;
1589 free_nodes(temp_nodes, i + 1);
1590 goto err;
1591 }
1592 for (mem_id = 0; mem_id < temp_nodes[i].node.NumMemoryBanks; mem_id++) {
1593 ret = topology_sysfs_get_mem_props(i, mem_id, &temp_nodes[i].mem[mem_id]);
1594 if (ret != HSAKMT_STATUS_SUCCESS) {
1595 free_nodes(temp_nodes, i + 1);
1596 goto err;
1597 }
1598 }
1599 }
1600
1601 if (temp_nodes[i].node.NumCaches) {
1602 temp_nodes[i].cache = calloc(temp_nodes[i].node.NumCaches * sizeof(HsaCacheProperties), 1);
1603 if (!temp_nodes[i].cache) {
1604 ret = HSAKMT_STATUS_NO_MEMORY;
1605 free_nodes(temp_nodes, i + 1);
1606 goto err;
1607 }
1608 for (cache_id = 0; cache_id < temp_nodes[i].node.NumCaches; cache_id++) {
1609 ret = topology_sysfs_get_cache_props(i, cache_id, &temp_nodes[i].cache[cache_id]);
1610 if (ret != HSAKMT_STATUS_SUCCESS) {
1611 free_nodes(temp_nodes, i + 1);
1612 goto err;
1613 }
1614 }
1615 } else if (!temp_nodes[i].gpu_id) { /* a CPU node */
1616 ret = topology_get_cpu_cache_props(
1617 &temp_nodes[i], cpu_ci_list);
1618 if (ret != HSAKMT_STATUS_SUCCESS) {
1619 free_nodes(temp_nodes, i + 1);
1620 goto err;
1621 }
1622 }
1623
1624 /* To simplify, allocate maximum needed memory for io_links for each node. This
1625 * removes the need for realloc when indirect and QPI links are added later
1626 */
1627 temp_nodes[i].link = calloc(sys_props.NumNodes - 1, sizeof(HsaIoLinkProperties));
1628 if (!temp_nodes[i].link) {
1629 ret = HSAKMT_STATUS_NO_MEMORY;
1630 free_nodes(temp_nodes, i + 1);
1631 goto err;
1632 }
1633
1634 if (temp_nodes[i].node.NumIOLinks) {
1635 for (link_id = 0; link_id < temp_nodes[i].node.NumIOLinks; link_id++) {
1636 ret = topology_sysfs_get_iolink_props(i, link_id, &temp_nodes[i].link[link_id]);
1637 if (ret != HSAKMT_STATUS_SUCCESS) {
1638 free_nodes(temp_nodes, i+1);
1639 goto err;
1640 }
1641 }
1642 }
1643 }
1644 pci_cleanup(pacc);
1645 }
1646
1647 /* All direct IO links are created in the kernel. Here we need to
1648 * connect GPU<->GPU or GPU<->CPU indirect IO links.
1649 */
1650 topology_create_indirect_gpu_links(&sys_props, temp_nodes);
1651
1652 ret = topology_sysfs_get_generation(&gen_end);
1653 if (ret != HSAKMT_STATUS_SUCCESS) {
1654 free_nodes(temp_nodes, sys_props.NumNodes);
1655 goto err;
1656 }
1657
1658 if (gen_start != gen_end) {
1659 free_nodes(temp_nodes, sys_props.NumNodes);
1660 temp_nodes = 0;
1661 goto retry;
1662 }
1663
1664 if (!_system) {
1665 _system = malloc(sizeof(HsaSystemProperties));
1666 if (!_system) {
1667 free_nodes(temp_nodes, sys_props.NumNodes);
1668 return HSAKMT_STATUS_NO_MEMORY;
1669 }
1670 }
1671
1672 *_system = sys_props;
1673 if (node)
1674 free(node);
1675 node = temp_nodes;
1676 err:
1677 topology_destroy_temp_cpu_cache_list(cpu_ci_list);
1678 return ret;
1679 }
1680
1681 /* Drop the Snashot of the HSA topology information. Assume lock is held. */
1682 HSAKMT_STATUS topology_drop_snapshot(void)
1683 {
1684 HSAKMT_STATUS err;
1685
1686 if (!!_system != !!node) {
1687 pr_warn("Probably inconsistency?\n");
1688 err = HSAKMT_STATUS_SUCCESS;
1689 goto out;
1690 }
1691
1692 if (node) {
1693 /* Remove state */
1694 free_nodes(node, _system->NumNodes);
1695 node = NULL;
1696 }
1697
1698 free(_system);
1699 _system = NULL;
1700 err = HSAKMT_STATUS_SUCCESS;
1701
1702 out:
1703 return err;
1704 }
1705
1706 HSAKMT_STATUS validate_nodeid(uint32_t nodeid, uint32_t *gpu_id)
1707 {
1708 if (!node || !_system || _system->NumNodes <= nodeid)
1709 return HSAKMT_STATUS_INVALID_NODE_UNIT;
1710 if (gpu_id)
1711 *gpu_id = node[nodeid].gpu_id;
1712
1713 return HSAKMT_STATUS_SUCCESS;
1714 }
1715
1716 HSAKMT_STATUS gpuid_to_nodeid(uint32_t gpu_id, uint32_t *node_id)
1717 {
1718 uint64_t node_idx;
1719
1720 for (node_idx = 0; node_idx < _system->NumNodes; node_idx++) {
1721 if (node[node_idx].gpu_id == gpu_id) {
1722 *node_id = node_idx;
1723 return HSAKMT_STATUS_SUCCESS;
1724 }
1725 }
1726
1727 return HSAKMT_STATUS_INVALID_NODE_UNIT;
1728
1729 }
1730
1731 HSAKMT_STATUS HSAKMTAPI hsaKmtAcquireSystemProperties(HsaSystemProperties *SystemProperties)
1732 {
1733 HSAKMT_STATUS err;
1734
1735 CHECK_KFD_OPEN();
1736
1737 if (!SystemProperties)
1738 return HSAKMT_STATUS_INVALID_PARAMETER;
1739
1740 pthread_mutex_lock(&hsakmt_mutex);
1741
1742 err = topology_take_snapshot();
1743 if (err != HSAKMT_STATUS_SUCCESS)
1744 goto out;
1745
1746 assert(_system);
1747
1748 *SystemProperties = *_system;
1749 err = HSAKMT_STATUS_SUCCESS;
1750
1751 out:
1752 pthread_mutex_unlock(&hsakmt_mutex);
1753 return err;
1754 }
1755
1756 HSAKMT_STATUS HSAKMTAPI hsaKmtReleaseSystemProperties(void)
1757 {
1758 CHECK_KFD_OPEN();
1759
1760 HSAKMT_STATUS err;
1761
1762 pthread_mutex_lock(&hsakmt_mutex);
1763
1764 err = topology_drop_snapshot();
1765
1766 pthread_mutex_unlock(&hsakmt_mutex);
1767
1768 return err;
1769 }
1770
1771 HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeProperties(HSAuint32 NodeId,
1772 HsaNodeProperties *NodeProperties)
1773 {
1774 HSAKMT_STATUS err;
1775 uint32_t gpu_id;
1776
1777 if (!NodeProperties)
1778 return HSAKMT_STATUS_INVALID_PARAMETER;
1779
1780 CHECK_KFD_OPEN();
1781 pthread_mutex_lock(&hsakmt_mutex);
1782
1783 /* KFD ADD page 18, snapshot protocol violation */
1784 if (!_system) {
1785 err = HSAKMT_STATUS_INVALID_NODE_UNIT;
1786 assert(_system);
1787 goto out;
1788 }
1789
1790 if (NodeId >= _system->NumNodes) {
1791 err = HSAKMT_STATUS_INVALID_PARAMETER;
1792 goto out;
1793 }
1794
1795 err = validate_nodeid(NodeId, &gpu_id);
1796 if (err != HSAKMT_STATUS_SUCCESS)
1797 return err;
1798
1799 *NodeProperties = node[NodeId].node;
1800 /* For CPU only node don't add any additional GPU memory banks. */
1801 if (gpu_id) {
1802 if (topology_is_dgpu(get_device_id_by_gpu_id(gpu_id)))
1803 NodeProperties->NumMemoryBanks += NUM_OF_DGPU_HEAPS;
1804 else
1805 NodeProperties->NumMemoryBanks += NUM_OF_IGPU_HEAPS;
1806 }
1807 err = HSAKMT_STATUS_SUCCESS;
1808
1809 out:
1810 pthread_mutex_unlock(&hsakmt_mutex);
1811 return err;
1812 }
1813
1814 HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeMemoryProperties(HSAuint32 NodeId,
1815 HSAuint32 NumBanks,
1816 HsaMemoryProperties *MemoryProperties)
1817 {
1818 HSAKMT_STATUS err = HSAKMT_STATUS_SUCCESS;
1819 uint32_t i, gpu_id;
1820 HSAuint64 aperture_limit;
1821 bool nodeIsDGPU;
1822
1823 if (!MemoryProperties)
1824 return HSAKMT_STATUS_INVALID_PARAMETER;
1825
1826 CHECK_KFD_OPEN();
1827 pthread_mutex_lock(&hsakmt_mutex);
1828
1829 /* KFD ADD page 18, snapshot protocol violation */
1830 if (!_system) {
1831 err = HSAKMT_STATUS_INVALID_NODE_UNIT;
1832 assert(_system);
1833 goto out;
1834 }
1835
1836 /* Check still necessary */
1837 if (NodeId >= _system->NumNodes) {
1838 err = HSAKMT_STATUS_INVALID_PARAMETER;
1839 goto out;
1840 }
1841
1842 err = validate_nodeid(NodeId, &gpu_id);
1843 if (err != HSAKMT_STATUS_SUCCESS)
1844 goto out;
1845
1846 memset(MemoryProperties, 0, NumBanks * sizeof(HsaMemoryProperties));
1847
1848 for (i = 0; i < MIN(node[NodeId].node.NumMemoryBanks, NumBanks); i++) {
1849 assert(node[NodeId].mem);
1850 MemoryProperties[i] = node[NodeId].mem[i];
1851 }
1852
1853 /* The following memory banks does not apply to CPU only node */
1854 if (gpu_id == 0)
1855 goto out;
1856
1857 nodeIsDGPU = topology_is_dgpu(get_device_id_by_gpu_id(gpu_id));
1858
1859 /*Add LDS*/
1860 if (i < NumBanks &&
1861 fmm_get_aperture_base_and_limit(FMM_LDS, gpu_id,
1862 &MemoryProperties[i].VirtualBaseAddress, &aperture_limit) == HSAKMT_STATUS_SUCCESS) {
1863 MemoryProperties[i].HeapType = HSA_HEAPTYPE_GPU_LDS;
1864 MemoryProperties[i].SizeInBytes = node[NodeId].node.LDSSizeInKB * 1024;
1865 i++;
1866 }
1867
1868 /* Add Local memory - HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE.
1869 * For dGPU the topology node contains Local Memory and it is added by
1870 * the for loop above
1871 */
1872 if (!nodeIsDGPU && i < NumBanks && node[NodeId].node.LocalMemSize > 0 &&
1873 fmm_get_aperture_base_and_limit(FMM_GPUVM, gpu_id,
1874 &MemoryProperties[i].VirtualBaseAddress, &aperture_limit) == HSAKMT_STATUS_SUCCESS) {
1875 MemoryProperties[i].HeapType = HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE;
1876 MemoryProperties[i].SizeInBytes = node[NodeId].node.LocalMemSize;
1877 i++;
1878 }
1879
1880 /* Add SCRATCH */
1881 if (i < NumBanks &&
1882 fmm_get_aperture_base_and_limit(FMM_SCRATCH, gpu_id,
1883 &MemoryProperties[i].VirtualBaseAddress, &aperture_limit) == HSAKMT_STATUS_SUCCESS) {
1884 MemoryProperties[i].HeapType = HSA_HEAPTYPE_GPU_SCRATCH;
1885 MemoryProperties[i].SizeInBytes = (aperture_limit - MemoryProperties[i].VirtualBaseAddress) + 1;
1886 i++;
1887 }
1888
1889 /* On dGPUs add SVM aperture */
1890 if (nodeIsDGPU && i < NumBanks &&
1891 fmm_get_aperture_base_and_limit(
1892 FMM_SVM, gpu_id, &MemoryProperties[i].VirtualBaseAddress,
1893 &aperture_limit) == HSAKMT_STATUS_SUCCESS) {
1894 MemoryProperties[i].HeapType = HSA_HEAPTYPE_DEVICE_SVM;
1895 MemoryProperties[i].SizeInBytes = (aperture_limit - MemoryProperties[i].VirtualBaseAddress) + 1;
1896 i++;
1897 }
1898
1899 out:
1900 pthread_mutex_unlock(&hsakmt_mutex);
1901 return err;
1902 }
1903
1904 HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeCacheProperties(HSAuint32 NodeId,
1905 HSAuint32 ProcessorId,
1906 HSAuint32 NumCaches,
1907 HsaCacheProperties *CacheProperties)
1908 {
1909 HSAKMT_STATUS err;
1910 uint32_t i;
1911
1912 if (!CacheProperties)
1913 return HSAKMT_STATUS_INVALID_PARAMETER;
1914
1915 CHECK_KFD_OPEN();
1916 pthread_mutex_lock(&hsakmt_mutex);
1917
1918 /* KFD ADD page 18, snapshot protocol violation */
1919 if (!_system) {
1920 err = HSAKMT_STATUS_INVALID_NODE_UNIT;
1921 assert(_system);
1922 goto out;
1923 }
1924
1925 if (NodeId >= _system->NumNodes || NumCaches > node[NodeId].node.NumCaches) {
1926 err = HSAKMT_STATUS_INVALID_PARAMETER;
1927 goto out;
1928 }
1929
1930 for (i = 0; i < MIN(node[NodeId].node.NumCaches, NumCaches); i++) {
1931 assert(node[NodeId].cache);
1932 CacheProperties[i] = node[NodeId].cache[i];
1933 }
1934
1935 err = HSAKMT_STATUS_SUCCESS;
1936
1937 out:
1938 pthread_mutex_unlock(&hsakmt_mutex);
1939 return err;
1940 }
1941
1942 HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeIoLinkProperties(HSAuint32 NodeId,
1943 HSAuint32 NumIoLinks,
1944 HsaIoLinkProperties *IoLinkProperties)
1945 {
1946 HSAKMT_STATUS err;
1947 uint32_t i;
1948
1949 if (!IoLinkProperties)
1950 return HSAKMT_STATUS_INVALID_PARAMETER;
1951
1952 CHECK_KFD_OPEN();
1953
1954 pthread_mutex_lock(&hsakmt_mutex);
1955
1956 /* KFD ADD page 18, snapshot protocol violation */
1957 if (!_system) {
1958 err = HSAKMT_STATUS_INVALID_NODE_UNIT;
1959 assert(_system);
1960 goto out;
1961 }
1962
1963 if (NodeId >= _system->NumNodes || NumIoLinks > node[NodeId].node.NumIOLinks) {
1964 err = HSAKMT_STATUS_INVALID_PARAMETER;
1965 goto out;
1966 }
1967
1968 for (i = 0; i < MIN(node[NodeId].node.NumIOLinks, NumIoLinks); i++) {
1969 assert(node[NodeId].link);
1970 IoLinkProperties[i] = node[NodeId].link[i];
1971 }
1972
1973 err = HSAKMT_STATUS_SUCCESS;
1974
1975 out:
1976 pthread_mutex_unlock(&hsakmt_mutex);
1977 return err;
1978 }
1979
1980 uint16_t get_device_id_by_node(HSAuint32 node_id)
1981 {
1982 if (!node || !_system || _system->NumNodes <= node_id)
1983 return 0;
1984
1985 return node[node_id].node.DeviceId;
1986 }
1987
1988 uint16_t get_device_id_by_gpu_id(HSAuint32 gpu_id)
1989 {
1990 unsigned int i;
1991
1992 if (!node || !_system)
1993 return 0;
1994
1995 for (i = 0; i < _system->NumNodes; i++) {
1996 if (node[i].gpu_id == gpu_id)
1997 return node[i].node.DeviceId;
1998 }
1999
2000 return 0;
2001 }
2002
2003 HSAKMT_STATUS validate_nodeid_array(uint32_t **gpu_id_array,
2004 uint32_t NumberOfNodes, uint32_t *NodeArray)
2005 {
2006 HSAKMT_STATUS ret;
2007 unsigned int i;
2008
2009 if (NumberOfNodes == 0 || !NodeArray || !gpu_id_array)
2010 return HSAKMT_STATUS_INVALID_PARAMETER;
2011
2012 /* Translate Node IDs to gpu_ids */
2013 *gpu_id_array = malloc(NumberOfNodes * sizeof(uint32_t));
2014 if (!(*gpu_id_array))
2015 return HSAKMT_STATUS_NO_MEMORY;
2016 for (i = 0; i < NumberOfNodes; i++) {
2017 ret = validate_nodeid(NodeArray[i], *gpu_id_array + i);
2018 if (ret != HSAKMT_STATUS_SUCCESS) {
2019 free(*gpu_id_array);
2020 break;
2021 }
2022 }
2023
2024 return ret;
2025 }
2026