1 /*
2  * Copyright © 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person
5  * obtaining a copy of this software and associated documentation
6  * files (the "Software"), to deal in the Software without
7  * restriction, including without limitation the rights to use, copy,
8  * modify, merge, publish, distribute, sublicense, and/or sell copies
9  * of the Software, and to permit persons to whom the Software is
10  * furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including
13  * the next paragraph) shall be included in all copies or substantial
14  * portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19  * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
20  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
21  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23  * DEALINGS IN THE SOFTWARE.
24  */
25 
26 #include <stdlib.h>
27 #include <stdio.h>
28 #include <string.h>
29 #ifdef __linux__
30 #include <linux/perf_event.h>
31 #endif
32 #include <sys/syscall.h>
33 #include "libhsakmt.h"
34 #include "pmc_table.h"
35 #include "linux/kfd_ioctl.h"
36 #include <unistd.h>
37 #include <sys/ioctl.h>
38 #include <errno.h>
39 #include <sys/mman.h>
40 #include <fcntl.h>
41 #include <semaphore.h>
42 
43 #define BITS_PER_BYTE		CHAR_BIT
44 
45 #define HSA_PERF_MAGIC4CC	0x54415348
46 
47 enum perf_trace_state {
48 	PERF_TRACE_STATE__STOPPED = 0,
49 	PERF_TRACE_STATE__STARTED
50 };
51 
52 struct perf_trace_block {
53 	enum perf_block_id block_id;
54 	uint32_t num_counters;
55 	uint64_t *counter_id;
56 	int *perf_event_fd;
57 };
58 
59 struct perf_trace {
60 	uint32_t magic4cc;
61 	uint32_t gpu_id;
62 	enum perf_trace_state state;
63 	uint32_t num_blocks;
64 	void *buf;
65 	uint64_t buf_size;
66 	struct perf_trace_block blocks[0];
67 };
68 
69 enum perf_trace_action {
70 	PERF_TRACE_ACTION__ACQUIRE = 0,
71 	PERF_TRACE_ACTION__RELEASE
72 };
73 
74 struct perf_shared_table {
75 	uint32_t magic4cc;
76 	uint32_t iommu_slots_left;
77 };
78 
79 struct perf_counts_values {
80 	union {
81 		struct {
82 			u64 val;
83 			u64 ena;
84 			u64 run;
85 		};
86 		u64 values[3];
87 	};
88 };
89 
90 static HsaCounterProperties **counter_props;
91 static unsigned int counter_props_count;
92 static const char shmem_name[] = "/hsakmt_shared_mem";
93 static int shmem_fd;
94 static const char sem_name[] = "hsakmt_semaphore";
95 static sem_t *sem = SEM_FAILED;
96 struct perf_shared_table *shared_table;
97 
readn(int fd,void * buf,size_t n)98 static ssize_t readn(int fd, void *buf, size_t n)
99 {
100 	size_t left = n;
101 	ssize_t bytes;
102 
103 	while (left) {
104 		bytes = read(fd, buf, left);
105 		if (!bytes) /* reach EOF */
106 			return (n - left);
107 		if (bytes < 0) {
108 			if (errno == EINTR) /* read got interrupted */
109 				continue;
110 			else
111 				return -errno;
112 		}
113 		left -= bytes;
114 		buf = VOID_PTR_ADD(buf, bytes);
115 	}
116 	return n;
117 }
118 
init_shared_region(void)119 static HSAKMT_STATUS init_shared_region(void)
120 {
121 	sem = sem_open(sem_name, O_CREAT, 0666, 1);
122 	if (sem == SEM_FAILED)
123 		return HSAKMT_STATUS_ERROR;
124 
125 	shmem_fd = shm_open(shmem_name, O_CREAT | O_RDWR, 0666);
126 	if (shmem_fd < 0)
127 		goto exit_1;
128 
129 	if (ftruncate(shmem_fd, sizeof(struct perf_shared_table)) < 0)
130 		goto exit_2;
131 
132 	shared_table = mmap(NULL, sizeof(*shared_table),
133 			PROT_READ | PROT_WRITE, MAP_SHARED, shmem_fd, 0);
134 	if (shared_table == MAP_FAILED)
135 		goto exit_2;
136 
137 	return HSAKMT_STATUS_SUCCESS;
138 
139 exit_2:
140 	shm_unlink(shmem_name);
141 	shmem_fd = 0;
142 exit_1:
143 	sem_close(sem);
144 	sem_unlink(sem_name);
145 	sem = SEM_FAILED;
146 	return HSAKMT_STATUS_ERROR;
147 }
148 
destroy_shared_region(void)149 static void destroy_shared_region(void)
150 {
151 	if (shared_table && shared_table != MAP_FAILED)
152 		munmap(shared_table, sizeof(*shared_table));
153 
154 	if (shmem_fd > 0) {
155 		close(shmem_fd);
156 		shm_unlink(shmem_name);
157 	}
158 
159 	if (sem != SEM_FAILED) {
160 		sem_close(sem);
161 		sem_unlink(sem_name);
162 		sem = SEM_FAILED;
163 	}
164 }
165 
init_perf_shared_table(void)166 static void init_perf_shared_table(void)
167 {
168 	sem_wait(sem);
169 
170 	/* If the magic number exists, the perf shared table has been
171 	 * initialized by another process and is in use. Don't overwrite it.
172 	 */
173 	if (shared_table->magic4cc == HSA_PERF_MAGIC4CC) {
174 		sem_post(sem);
175 		return;
176 	}
177 
178 	/* write the perf content */
179 	shared_table->magic4cc = HSA_PERF_MAGIC4CC;
180 	shared_table->iommu_slots_left =
181 		pmc_table_get_max_concurrent(PERFCOUNTER_BLOCKID__IOMMUV2);
182 
183 	sem_post(sem);
184 }
185 
init_counter_props(unsigned int NumNodes)186 HSAKMT_STATUS init_counter_props(unsigned int NumNodes)
187 {
188 	counter_props = calloc(NumNodes, sizeof(struct HsaCounterProperties *));
189 	if (!counter_props) {
190 		pr_warn("Profiling is not available.\n");
191 		return HSAKMT_STATUS_NO_MEMORY;
192 	}
193 
194 	counter_props_count = NumNodes;
195 	alloc_pmc_blocks();
196 
197 	if (init_shared_region() != HSAKMT_STATUS_SUCCESS) {
198 		pr_warn("Profiling of privileged blocks is not available.\n");
199 		return HSAKMT_STATUS_ERROR;
200 	}
201 	init_perf_shared_table();
202 
203 	return HSAKMT_STATUS_SUCCESS;
204 }
205 
destroy_counter_props(void)206 void destroy_counter_props(void)
207 {
208 	unsigned int i;
209 
210 	destroy_shared_region();
211 
212 	if (!counter_props)
213 		return;
214 
215 	for (i = 0; i < counter_props_count; i++)
216 		if (counter_props[i]) {
217 			free(counter_props[i]);
218 			counter_props[i] = NULL;
219 		}
220 
221 	free(counter_props);
222 	free_pmc_blocks();
223 }
224 
blockid2uuid(enum perf_block_id block_id,HSA_UUID * uuid)225 static int blockid2uuid(enum perf_block_id block_id, HSA_UUID *uuid)
226 {
227 	int rc = 0;
228 
229 	switch (block_id) {
230 	case PERFCOUNTER_BLOCKID__CB:
231 		*uuid = HSA_PROFILEBLOCK_AMD_CB;
232 		break;
233 	case PERFCOUNTER_BLOCKID__CPF:
234 		*uuid = HSA_PROFILEBLOCK_AMD_CPF;
235 		break;
236 	case PERFCOUNTER_BLOCKID__CPG:
237 		*uuid = HSA_PROFILEBLOCK_AMD_CPG;
238 		break;
239 	case PERFCOUNTER_BLOCKID__DB:
240 		*uuid = HSA_PROFILEBLOCK_AMD_DB;
241 		break;
242 	case PERFCOUNTER_BLOCKID__GDS:
243 		*uuid = HSA_PROFILEBLOCK_AMD_GDS;
244 		break;
245 	case PERFCOUNTER_BLOCKID__GRBM:
246 		*uuid = HSA_PROFILEBLOCK_AMD_GRBM;
247 		break;
248 	case PERFCOUNTER_BLOCKID__GRBMSE:
249 		*uuid = HSA_PROFILEBLOCK_AMD_GRBMSE;
250 		break;
251 	case PERFCOUNTER_BLOCKID__IA:
252 		*uuid = HSA_PROFILEBLOCK_AMD_IA;
253 		break;
254 	case PERFCOUNTER_BLOCKID__MC:
255 		*uuid = HSA_PROFILEBLOCK_AMD_MC;
256 		break;
257 	case PERFCOUNTER_BLOCKID__PASC:
258 		*uuid = HSA_PROFILEBLOCK_AMD_PASC;
259 		break;
260 	case PERFCOUNTER_BLOCKID__PASU:
261 		*uuid = HSA_PROFILEBLOCK_AMD_PASU;
262 		break;
263 	case PERFCOUNTER_BLOCKID__SPI:
264 		*uuid = HSA_PROFILEBLOCK_AMD_SPI;
265 		break;
266 	case PERFCOUNTER_BLOCKID__SRBM:
267 		*uuid = HSA_PROFILEBLOCK_AMD_SRBM;
268 		break;
269 	case PERFCOUNTER_BLOCKID__SQ:
270 		*uuid = HSA_PROFILEBLOCK_AMD_SQ;
271 		break;
272 	case PERFCOUNTER_BLOCKID__SX:
273 		*uuid = HSA_PROFILEBLOCK_AMD_SX;
274 		break;
275 	case PERFCOUNTER_BLOCKID__TA:
276 		*uuid = HSA_PROFILEBLOCK_AMD_TA;
277 		break;
278 	case PERFCOUNTER_BLOCKID__TCA:
279 		*uuid = HSA_PROFILEBLOCK_AMD_TCA;
280 		break;
281 	case PERFCOUNTER_BLOCKID__TCC:
282 		*uuid = HSA_PROFILEBLOCK_AMD_TCC;
283 		break;
284 	case PERFCOUNTER_BLOCKID__TCP:
285 		*uuid = HSA_PROFILEBLOCK_AMD_TCP;
286 		break;
287 	case PERFCOUNTER_BLOCKID__TCS:
288 		*uuid = HSA_PROFILEBLOCK_AMD_TCS;
289 		break;
290 	case PERFCOUNTER_BLOCKID__TD:
291 		*uuid = HSA_PROFILEBLOCK_AMD_TD;
292 		break;
293 	case PERFCOUNTER_BLOCKID__VGT:
294 		*uuid = HSA_PROFILEBLOCK_AMD_VGT;
295 		break;
296 	case PERFCOUNTER_BLOCKID__WD:
297 		*uuid = HSA_PROFILEBLOCK_AMD_WD;
298 		break;
299 	case PERFCOUNTER_BLOCKID__IOMMUV2:
300 		*uuid = HSA_PROFILEBLOCK_AMD_IOMMUV2;
301 		break;
302 	default:
303 		/* If we reach this point, it's a bug */
304 		rc = -1;
305 		break;
306 	}
307 
308 	return rc;
309 }
310 
get_block_concurrent_limit(uint32_t node_id,HSAuint32 block_id)311 static HSAuint32 get_block_concurrent_limit(uint32_t node_id,
312 						HSAuint32 block_id)
313 {
314 	uint32_t i;
315 	HsaCounterBlockProperties *block = &counter_props[node_id]->Blocks[0];
316 
317 	for (i = 0; i < PERFCOUNTER_BLOCKID__MAX; i++) {
318 		if (block->Counters[0].BlockIndex == block_id)
319 			return block->NumConcurrent;
320 		block = (HsaCounterBlockProperties *)&block->Counters[block->NumCounters];
321 	}
322 
323 	return 0;
324 }
325 
update_block_slots(enum perf_trace_action action,uint32_t block_id,uint32_t num_slots)326 static HSAKMT_STATUS update_block_slots(enum perf_trace_action action,
327 					uint32_t block_id, uint32_t num_slots)
328 {
329 	uint32_t *slots_left;
330 	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
331 
332 	if (shmem_fd <= 0)
333 		return HSAKMT_STATUS_UNAVAILABLE;
334 	if (sem == SEM_FAILED)
335 		return HSAKMT_STATUS_UNAVAILABLE;
336 
337 	sem_wait(sem);
338 
339 	if (block_id == PERFCOUNTER_BLOCKID__IOMMUV2)
340 		slots_left = &shared_table->iommu_slots_left;
341 	else {
342 		ret = HSAKMT_STATUS_UNAVAILABLE;
343 		goto out;
344 	}
345 
346 	switch (action) {
347 	case PERF_TRACE_ACTION__ACQUIRE:
348 		if (*slots_left >= num_slots)
349 			*slots_left -= num_slots;
350 		else
351 			ret = HSAKMT_STATUS_UNAVAILABLE;
352 		break;
353 	case PERF_TRACE_ACTION__RELEASE:
354 		if ((*slots_left + num_slots) <=
355 				pmc_table_get_max_concurrent(block_id))
356 			*slots_left += num_slots;
357 		else
358 			ret = HSAKMT_STATUS_ERROR;
359 		break;
360 	default:
361 		ret = HSAKMT_STATUS_INVALID_PARAMETER;
362 		break;
363 	}
364 
365 out:
366 	sem_post(sem);
367 
368 	return ret;
369 }
370 
get_perf_event_type(enum perf_block_id block_id)371 static unsigned int get_perf_event_type(enum perf_block_id block_id)
372 {
373 	FILE *file = NULL;
374 	unsigned int type = 0;
375 
376 	if (block_id == PERFCOUNTER_BLOCKID__IOMMUV2) {
377 		/* Starting from kernel 4.12, amd_iommu_0 is used */
378 		file = fopen("/sys/bus/event_source/devices/amd_iommu_0/type",
379 			 "r");
380 		if (!file)
381 			file = fopen(/* kernel 4.11 and older */
382 				"/sys/bus/event_source/devices/amd_iommu/type",
383 				"r");
384 	}
385 
386 	if (!file)
387 		return 0;
388 
389 	if (fscanf(file, "%d", &type) != 1)
390 		type = 0;
391 	fclose(file);
392 
393 	return type;
394 }
395 
396 /* close_perf_event_fd - Close all FDs opened for this block.
397  * When RT acquires the trace access, RT has no ideas about each
398  * individual FD opened for this block. We should treat the whole
399  * block as one and close all of them.
400  */
close_perf_event_fd(struct perf_trace_block * block)401 static void close_perf_event_fd(struct perf_trace_block *block)
402 {
403 	uint32_t i;
404 
405 	if (!block || !block->perf_event_fd)
406 		return;
407 
408 	for (i = 0; i < block->num_counters; i++)
409 		if (block->perf_event_fd[i] > 0) {
410 			close(block->perf_event_fd[i]);
411 			block->perf_event_fd[i] = 0;
412 		}
413 }
414 
415 /* open_perf_event_fd - Open FDs required for this block.
416  * If one of them fails, we should close all FDs that have been
417  * opened because RT has no ideas about those FDs successfully
418  * opened and it won't send anything to close them.
419  */
open_perf_event_fd(struct perf_trace_block * block)420 static HSAKMT_STATUS open_perf_event_fd(struct perf_trace_block *block)
421 {
422 #ifdef __linux__
423 	struct perf_event_attr attr;
424 	uint32_t i;
425 	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
426 
427 	if (!block || !block->perf_event_fd)
428 		return HSAKMT_STATUS_INVALID_HANDLE;
429 
430 	if (getuid()) {
431 		pr_err("Must be root to open perf_event.\n");
432 		return HSAKMT_STATUS_ERROR;
433 	}
434 
435 	memset(&attr, 0, sizeof(struct perf_event_attr));
436 	attr.type = get_perf_event_type(block->block_id);
437 	if (!attr.type)
438 		return HSAKMT_STATUS_ERROR;
439 
440 	for (i = 0; i < block->num_counters; i++) {
441 		attr.size = sizeof(struct perf_event_attr);
442 		attr.config = block->counter_id[i];
443 		attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
444 					PERF_FORMAT_TOTAL_TIME_RUNNING;
445 		attr.disabled = 1;
446 		attr.inherit = 1;
447 
448 		/* We are profiling system wide, not per cpu, so no threads,
449 		 * no groups -> pid=-1 and group_fd=-1. cpu = 0
450 		 * flags=PERF_FLAG_FD_NO_GROUP
451 		 */
452 		block->perf_event_fd[i] = syscall(__NR_perf_event_open, &attr,
453 					-1, 0, -1, PERF_FLAG_FD_NO_GROUP);
454 
455 		if (block->perf_event_fd[i] < 0) {
456 			ret = HSAKMT_STATUS_ERROR;
457 			close_perf_event_fd(block);
458 			break;
459 		}
460 	}
461 
462 	return ret;
463 #else
464 	return HSAKMT_STATUS_ERROR;
465 #endif
466 }
467 
perf_trace_ioctl(struct perf_trace_block * block,uint32_t cmd)468 static HSAKMT_STATUS perf_trace_ioctl(struct perf_trace_block *block,
469 				      uint32_t cmd)
470 {
471 	uint32_t i;
472 
473 	for (i = 0; i < block->num_counters; i++) {
474 		if (block->perf_event_fd[i] < 0)
475 			return HSAKMT_STATUS_UNAVAILABLE;
476 		if (ioctl(block->perf_event_fd[i], cmd, NULL))
477 			return HSAKMT_STATUS_ERROR;
478 	}
479 
480 	return HSAKMT_STATUS_SUCCESS;
481 }
482 
query_trace(int fd,uint64_t * buf)483 static HSAKMT_STATUS query_trace(int fd, uint64_t *buf)
484 {
485 #ifdef __linux__
486 	struct perf_counts_values content;
487 
488 	if (fd < 0)
489 		return HSAKMT_STATUS_ERROR;
490 	if (readn(fd, &content, sizeof(content)) != sizeof(content))
491 		return HSAKMT_STATUS_ERROR;
492 
493 	*buf = content.val;
494 	return HSAKMT_STATUS_SUCCESS;
495 #else
496 	return HSAKMT_STATUS_ERROR;
497 #endif
498 }
499 
hsaKmtPmcGetCounterProperties(HSAuint32 NodeId,HsaCounterProperties ** CounterProperties)500 HSAKMT_STATUS HSAKMTAPI hsaKmtPmcGetCounterProperties(HSAuint32 NodeId,
501 						      HsaCounterProperties **CounterProperties)
502 {
503 #ifdef __linux__
504 	HSAKMT_STATUS rc = HSAKMT_STATUS_SUCCESS;
505 	uint32_t gpu_id, i, block_id;
506 	uint32_t counter_props_size = 0;
507 	uint32_t total_counters = 0;
508 	uint32_t total_concurrent = 0;
509 	struct perf_counter_block block = {0};
510 	uint32_t total_blocks = 0;
511 	HsaCounterBlockProperties *block_prop;
512 
513 	if (!counter_props)
514 		return HSAKMT_STATUS_NO_MEMORY;
515 
516 	if (!CounterProperties)
517 		return HSAKMT_STATUS_INVALID_PARAMETER;
518 
519 	if (validate_nodeid(NodeId, &gpu_id) != HSAKMT_STATUS_SUCCESS)
520 		return HSAKMT_STATUS_INVALID_NODE_UNIT;
521 
522 	if (counter_props[NodeId]) {
523 		*CounterProperties = counter_props[NodeId];
524 		return HSAKMT_STATUS_SUCCESS;
525 	}
526 
527 	for (i = 0; i < PERFCOUNTER_BLOCKID__MAX; i++) {
528 		rc = get_block_properties(NodeId, i, &block);
529 		if (rc != HSAKMT_STATUS_SUCCESS)
530 			return rc;
531 		total_concurrent += block.num_of_slots;
532 		total_counters += block.num_of_counters;
533 		/* If num_of_slots=0, this block doesn't exist */
534 		if (block.num_of_slots)
535 			total_blocks++;
536 	}
537 
538 	counter_props_size = sizeof(HsaCounterProperties) +
539 			sizeof(HsaCounterBlockProperties) * (total_blocks - 1) +
540 			sizeof(HsaCounter) * (total_counters - total_blocks);
541 
542 	counter_props[NodeId] = malloc(counter_props_size);
543 	if (!counter_props[NodeId])
544 		return HSAKMT_STATUS_NO_MEMORY;
545 
546 	counter_props[NodeId]->NumBlocks = total_blocks;
547 	counter_props[NodeId]->NumConcurrent = total_concurrent;
548 
549 	block_prop = &counter_props[NodeId]->Blocks[0];
550 	for (block_id = 0; block_id < PERFCOUNTER_BLOCKID__MAX; block_id++) {
551 		rc = get_block_properties(NodeId, block_id, &block);
552 		if (rc != HSAKMT_STATUS_SUCCESS) {
553 			free(counter_props[NodeId]);
554 			counter_props[NodeId] = NULL;
555 			return rc;
556 		}
557 
558 		if (!block.num_of_slots) /* not a valid block */
559 			continue;
560 
561 		blockid2uuid(block_id, &block_prop->BlockId);
562 		block_prop->NumCounters = block.num_of_counters;
563 		block_prop->NumConcurrent = block.num_of_slots;
564 		for (i = 0; i < block.num_of_counters; i++) {
565 			block_prop->Counters[i].BlockIndex = block_id;
566 			block_prop->Counters[i].CounterId = block.counter_ids[i];
567 			block_prop->Counters[i].CounterSizeInBits = block.counter_size_in_bits;
568 			block_prop->Counters[i].CounterMask = block.counter_mask;
569 			block_prop->Counters[i].Flags.ui32.Global = 1;
570 			if (block_id == PERFCOUNTER_BLOCKID__IOMMUV2)
571 				block_prop->Counters[i].Type = HSA_PROFILE_TYPE_PRIVILEGED_IMMEDIATE;
572 			else
573 				block_prop->Counters[i].Type = HSA_PROFILE_TYPE_NONPRIV_IMMEDIATE;
574 		}
575 
576 		block_prop = (HsaCounterBlockProperties *)&block_prop->Counters[block_prop->NumCounters];
577 	}
578 
579 	*CounterProperties = counter_props[NodeId];
580 
581 	return HSAKMT_STATUS_SUCCESS;
582 #else
583 	return HSAKMT_STATUS_ERROR;
584 #endif
585 }
586 
587 /* Registers a set of (HW) counters to be used for tracing/profiling */
hsaKmtPmcRegisterTrace(HSAuint32 NodeId,HSAuint32 NumberOfCounters,HsaCounter * Counters,HsaPmcTraceRoot * TraceRoot)588 HSAKMT_STATUS HSAKMTAPI hsaKmtPmcRegisterTrace(HSAuint32 NodeId,
589 					       HSAuint32 NumberOfCounters,
590 					       HsaCounter *Counters,
591 					       HsaPmcTraceRoot *TraceRoot)
592 {
593 	uint32_t gpu_id, i, j;
594 	uint64_t min_buf_size = 0;
595 	struct perf_trace *trace = NULL;
596 	uint32_t concurrent_limit;
597 	const uint32_t MAX_COUNTERS = 512;
598 	uint64_t counter_id[PERFCOUNTER_BLOCKID__MAX][MAX_COUNTERS];
599 	uint32_t num_counters[PERFCOUNTER_BLOCKID__MAX] = {0};
600 	uint32_t block, num_blocks = 0, total_counters = 0;
601 	uint64_t *counter_id_ptr;
602 	int *fd_ptr;
603 
604 	pr_debug("[%s] Number of counters %d\n", __func__, NumberOfCounters);
605 
606 	if (!counter_props)
607 		return HSAKMT_STATUS_NO_MEMORY;
608 
609 	if (!Counters || !TraceRoot || NumberOfCounters == 0)
610 		return HSAKMT_STATUS_INVALID_PARAMETER;
611 
612 	if (validate_nodeid(NodeId, &gpu_id) != HSAKMT_STATUS_SUCCESS)
613 		return HSAKMT_STATUS_INVALID_NODE_UNIT;
614 
615 	if (NumberOfCounters > MAX_COUNTERS) {
616 		pr_err("MAX_COUNTERS is too small for %d.\n",
617 			NumberOfCounters);
618 		return HSAKMT_STATUS_NO_MEMORY;
619 	}
620 
621 	/* Calculating the minimum buffer size */
622 	for (i = 0; i < NumberOfCounters; i++) {
623 		if (Counters[i].BlockIndex >= PERFCOUNTER_BLOCKID__MAX)
624 			return HSAKMT_STATUS_INVALID_PARAMETER;
625 		/* Only privileged counters need to register */
626 		if (Counters[i].Type > HSA_PROFILE_TYPE_PRIVILEGED_STREAMING)
627 			continue;
628 		min_buf_size += Counters[i].CounterSizeInBits/BITS_PER_BYTE;
629 		/* j: the first blank entry in the block to record counter_id */
630 		j = num_counters[Counters[i].BlockIndex];
631 		counter_id[Counters[i].BlockIndex][j] = Counters[i].CounterId;
632 		num_counters[Counters[i].BlockIndex]++;
633 		total_counters++;
634 	}
635 
636 	/* Verify that the number of counters per block is not larger than the
637 	 * number of slots.
638 	 */
639 	for (i = 0; i < PERFCOUNTER_BLOCKID__MAX; i++) {
640 		if (!num_counters[i])
641 			continue;
642 		concurrent_limit = get_block_concurrent_limit(NodeId, i);
643 		if (!concurrent_limit) {
644 			pr_err("Invalid block ID: %d\n", i);
645 			return HSAKMT_STATUS_INVALID_PARAMETER;
646 		}
647 		if (num_counters[i] > concurrent_limit) {
648 			pr_err("Counters exceed the limit.\n");
649 			return HSAKMT_STATUS_INVALID_PARAMETER;
650 		}
651 		num_blocks++;
652 	}
653 
654 	if (!num_blocks)
655 		return HSAKMT_STATUS_INVALID_PARAMETER;
656 
657 	/* Now we have sorted blocks/counters information in
658 	 * num_counters[block_id] and counter_id[block_id][]. Allocate trace
659 	 * and record the information.
660 	 */
661 	trace = (struct perf_trace *)calloc(sizeof(struct perf_trace)
662 			+ sizeof(struct perf_trace_block) * num_blocks
663 			+ sizeof(uint64_t) * total_counters
664 			+ sizeof(int) * total_counters,
665 			1);
666 	if (!trace)
667 		return HSAKMT_STATUS_NO_MEMORY;
668 
669 	/* Allocated area is partitioned as:
670 	 * +---------------------------------+ trace
671 	 * |    perf_trace                   |
672 	 * |---------------------------------| trace->blocks[0]
673 	 * | perf_trace_block 0              |
674 	 * | ....                            |
675 	 * | perf_trace_block N-1            | trace->blocks[N-1]
676 	 * |---------------------------------| <-- counter_id_ptr starts here
677 	 * | block 0's counter IDs(uint64_t) |
678 	 * | ......                          |
679 	 * | block N-1's counter IDs         |
680 	 * |---------------------------------| <-- perf_event_fd starts here
681 	 * | block 0's perf_event_fds(int)   |
682 	 * | ......                          |
683 	 * | block N-1's perf_event_fds      |
684 	 * +---------------------------------+
685 	 */
686 	block = 0;
687 	counter_id_ptr = (uint64_t *)((char *)
688 			trace + sizeof(struct perf_trace)
689 			+ sizeof(struct perf_trace_block) * num_blocks);
690 	fd_ptr = (int *)(counter_id_ptr + total_counters);
691 	/* Fill in each block's information to the TraceId */
692 	for (i = 0; i < PERFCOUNTER_BLOCKID__MAX; i++) {
693 		if (!num_counters[i]) /* not a block to trace */
694 			continue;
695 		/* Following perf_trace + perf_trace_block x N are those
696 		 * counter_id arrays. Assign the counter_id array belonging to
697 		 * this block.
698 		 */
699 		trace->blocks[block].counter_id = counter_id_ptr;
700 		/* Fill in counter IDs to the counter_id array. */
701 		for (j = 0; j < num_counters[i]; j++)
702 			trace->blocks[block].counter_id[j] = counter_id[i][j];
703 		trace->blocks[block].perf_event_fd = fd_ptr;
704 		/* how many counters to trace */
705 		trace->blocks[block].num_counters = num_counters[i];
706 		/* block index in "enum perf_block_id" */
707 		trace->blocks[block].block_id = i;
708 		block++; /* move to next */
709 		counter_id_ptr += num_counters[i];
710 		fd_ptr += num_counters[i];
711 	}
712 
713 	trace->magic4cc = HSA_PERF_MAGIC4CC;
714 	trace->gpu_id = gpu_id;
715 	trace->state = PERF_TRACE_STATE__STOPPED;
716 	trace->num_blocks = num_blocks;
717 
718 	TraceRoot->NumberOfPasses = 1;
719 	TraceRoot->TraceBufferMinSizeBytes = PAGE_ALIGN_UP(min_buf_size);
720 	TraceRoot->TraceId = PORT_VPTR_TO_UINT64(trace);
721 
722 	return HSAKMT_STATUS_SUCCESS;
723 }
724 
725 /* Unregisters a set of (HW) counters used for tracing/profiling */
726 
hsaKmtPmcUnregisterTrace(HSAuint32 NodeId,HSATraceId TraceId)727 HSAKMT_STATUS HSAKMTAPI hsaKmtPmcUnregisterTrace(HSAuint32 NodeId,
728 						 HSATraceId TraceId)
729 {
730 	uint32_t gpu_id;
731 	struct perf_trace *trace;
732 
733 	pr_debug("[%s] Trace ID 0x%lx\n", __func__, TraceId);
734 
735 	if (TraceId == 0)
736 		return HSAKMT_STATUS_INVALID_PARAMETER;
737 
738 	if (validate_nodeid(NodeId, &gpu_id) != HSAKMT_STATUS_SUCCESS)
739 		return HSAKMT_STATUS_INVALID_NODE_UNIT;
740 
741 	trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
742 
743 	if (trace->magic4cc != HSA_PERF_MAGIC4CC)
744 		return HSAKMT_STATUS_INVALID_HANDLE;
745 
746 	if (trace->gpu_id != gpu_id)
747 		return HSAKMT_STATUS_INVALID_NODE_UNIT;
748 
749 	/* If the trace is in the running state, stop it */
750 	if (trace->state == PERF_TRACE_STATE__STARTED) {
751 		HSAKMT_STATUS status = hsaKmtPmcStopTrace(TraceId);
752 
753 		if (status != HSAKMT_STATUS_SUCCESS)
754 			return status;
755 	}
756 
757 	free(trace);
758 
759 	return HSAKMT_STATUS_SUCCESS;
760 }
761 
hsaKmtPmcAcquireTraceAccess(HSAuint32 NodeId,HSATraceId TraceId)762 HSAKMT_STATUS HSAKMTAPI hsaKmtPmcAcquireTraceAccess(HSAuint32 NodeId,
763 						    HSATraceId TraceId)
764 {
765 	struct perf_trace *trace;
766 	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
767 	uint32_t gpu_id, i;
768 	int j;
769 
770 	pr_debug("[%s] Trace ID 0x%lx\n", __func__, TraceId);
771 
772 	if (TraceId == 0)
773 		return HSAKMT_STATUS_INVALID_PARAMETER;
774 
775 	trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
776 
777 	if (trace->magic4cc != HSA_PERF_MAGIC4CC)
778 		return HSAKMT_STATUS_INVALID_HANDLE;
779 
780 	if (validate_nodeid(NodeId, &gpu_id) != HSAKMT_STATUS_SUCCESS)
781 		return HSAKMT_STATUS_INVALID_NODE_UNIT;
782 
783 	for (i = 0; i < trace->num_blocks; i++) {
784 		ret = update_block_slots(PERF_TRACE_ACTION__ACQUIRE,
785 					trace->blocks[i].block_id,
786 					trace->blocks[i].num_counters);
787 		if (ret != HSAKMT_STATUS_SUCCESS)
788 			goto out;
789 		ret = open_perf_event_fd(&trace->blocks[i]);
790 		if (ret != HSAKMT_STATUS_SUCCESS) {
791 			i++; /* to release slots just reserved */
792 			goto out;
793 		}
794 	}
795 
796 out:
797 	if (ret != HSAKMT_STATUS_SUCCESS) {
798 		for (j = i-1; j >= 0; j--) {
799 			update_block_slots(PERF_TRACE_ACTION__RELEASE,
800 					trace->blocks[j].block_id,
801 					trace->blocks[j].num_counters);
802 			close_perf_event_fd(&trace->blocks[j]);
803 		}
804 	}
805 
806 	return ret;
807 }
808 
hsaKmtPmcReleaseTraceAccess(HSAuint32 NodeId,HSATraceId TraceId)809 HSAKMT_STATUS HSAKMTAPI hsaKmtPmcReleaseTraceAccess(HSAuint32 NodeId,
810 						    HSATraceId TraceId)
811 {
812 	struct perf_trace *trace;
813 	uint32_t i;
814 
815 	pr_debug("[%s] Trace ID 0x%lx\n", __func__, TraceId);
816 
817 	if (TraceId == 0)
818 		return HSAKMT_STATUS_INVALID_PARAMETER;
819 
820 	trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
821 
822 	if (trace->magic4cc != HSA_PERF_MAGIC4CC)
823 		return HSAKMT_STATUS_INVALID_HANDLE;
824 
825 	for (i = 0; i < trace->num_blocks; i++) {
826 		update_block_slots(PERF_TRACE_ACTION__RELEASE,
827 				trace->blocks[i].block_id,
828 				trace->blocks[i].num_counters);
829 		close_perf_event_fd(&trace->blocks[i]);
830 	}
831 
832 	return HSAKMT_STATUS_SUCCESS;
833 }
834 
835 
836 /* Starts tracing operation on a previously established set of performance counters */
hsaKmtPmcStartTrace(HSATraceId TraceId,void * TraceBuffer,HSAuint64 TraceBufferSizeBytes)837 HSAKMT_STATUS HSAKMTAPI hsaKmtPmcStartTrace(HSATraceId TraceId,
838 					    void *TraceBuffer,
839 					    HSAuint64 TraceBufferSizeBytes)
840 {
841 #ifdef __linux__
842 	struct perf_trace *trace =
843 			(struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
844 	uint32_t i;
845 	int32_t j;
846 	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
847 
848 	pr_debug("[%s] Trace ID 0x%lx\n", __func__, TraceId);
849 
850 	if (TraceId == 0 || !TraceBuffer || TraceBufferSizeBytes == 0)
851 		return HSAKMT_STATUS_INVALID_PARAMETER;
852 
853 	if (trace->magic4cc != HSA_PERF_MAGIC4CC)
854 		return HSAKMT_STATUS_INVALID_HANDLE;
855 
856 	for (i = 0; i < trace->num_blocks; i++) {
857 		ret = perf_trace_ioctl(&trace->blocks[i],
858 					PERF_EVENT_IOC_ENABLE);
859 		if (ret != HSAKMT_STATUS_SUCCESS)
860 			break;
861 	}
862 	if (ret != HSAKMT_STATUS_SUCCESS) {
863 		/* Disable enabled blocks before returning the failure. */
864 		j = (int32_t)i;
865 		while (--j >= 0)
866 			perf_trace_ioctl(&trace->blocks[j],
867 					PERF_EVENT_IOC_DISABLE);
868 		return ret;
869 	}
870 
871 	trace->state = PERF_TRACE_STATE__STARTED;
872 	trace->buf = TraceBuffer;
873 	trace->buf_size = TraceBufferSizeBytes;
874 
875 	return HSAKMT_STATUS_SUCCESS;
876 #else
877 	return HSAKMT_STATUS_ERROR;
878 #endif
879 }
880 
881 
882 /*Forces an update of all the counters that a previously started trace operation has registered */
883 
hsaKmtPmcQueryTrace(HSATraceId TraceId)884 HSAKMT_STATUS HSAKMTAPI hsaKmtPmcQueryTrace(HSATraceId TraceId)
885 {
886 	struct perf_trace *trace =
887 			(struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
888 	uint32_t i, j;
889 	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
890 	uint64_t *buf;
891 	uint64_t buf_filled = 0;
892 
893 	if (TraceId == 0)
894 		return HSAKMT_STATUS_INVALID_PARAMETER;
895 
896 	if (trace->magic4cc != HSA_PERF_MAGIC4CC)
897 		return HSAKMT_STATUS_INVALID_HANDLE;
898 
899 	buf = (uint64_t *)trace->buf;
900 	pr_debug("[%s] Trace buffer(%p): ", __func__, buf);
901 	for (i = 0; i < trace->num_blocks; i++)
902 		for (j = 0; j < trace->blocks[i].num_counters; j++) {
903 			buf_filled += sizeof(uint64_t);
904 			if (buf_filled > trace->buf_size)
905 				return HSAKMT_STATUS_NO_MEMORY;
906 			ret = query_trace(trace->blocks[i].perf_event_fd[j],
907 					buf);
908 			if (ret != HSAKMT_STATUS_SUCCESS)
909 				return ret;
910 			pr_debug("%lu_", *buf);
911 			buf++;
912 		}
913 	pr_debug("\n");
914 
915 	return HSAKMT_STATUS_SUCCESS;
916 }
917 
918 
919 /* Stops tracing operation on a previously established set of performance counters */
hsaKmtPmcStopTrace(HSATraceId TraceId)920 HSAKMT_STATUS HSAKMTAPI hsaKmtPmcStopTrace(HSATraceId TraceId)
921 {
922 #ifdef __linux__
923 	struct perf_trace *trace =
924 			(struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
925 	uint32_t i;
926 	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
927 
928 	pr_debug("[%s] Trace ID 0x%lx\n", __func__, TraceId);
929 
930 	if (TraceId == 0)
931 		return HSAKMT_STATUS_INVALID_PARAMETER;
932 
933 	if (trace->magic4cc != HSA_PERF_MAGIC4CC)
934 		return HSAKMT_STATUS_INVALID_HANDLE;
935 
936 	for (i = 0; i < trace->num_blocks; i++) {
937 		ret = perf_trace_ioctl(&trace->blocks[i],
938 					PERF_EVENT_IOC_DISABLE);
939 		if (ret != HSAKMT_STATUS_SUCCESS)
940 			return ret;
941 	}
942 
943 	trace->state = PERF_TRACE_STATE__STOPPED;
944 
945 	return ret;
946 #else
947 	return HSAKMT_STATUS_ERROR;
948 #endif
949 }
950