1 /*
2 * Copyright 2015 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25 #include "si_build_pm4.h"
26 #include "si_query.h"
27 #include "util/u_memory.h"
28
29 #include "ac_perfcounter.h"
30
31 struct si_query_group {
32 struct si_query_group *next;
33 struct ac_pc_block *block;
34 unsigned sub_gid; /* only used during init */
35 unsigned result_base; /* only used during init */
36 int se;
37 int instance;
38 unsigned num_counters;
39 unsigned selectors[AC_QUERY_MAX_COUNTERS];
40 };
41
42 struct si_query_counter {
43 unsigned base;
44 unsigned qwords;
45 unsigned stride; /* in uint64s */
46 };
47
48 struct si_query_pc {
49 struct si_query b;
50 struct si_query_buffer buffer;
51
52 /* Size of the results in memory, in bytes. */
53 unsigned result_size;
54
55 unsigned shaders;
56 unsigned num_counters;
57 struct si_query_counter *counters;
58 struct si_query_group *groups;
59 };
60
si_pc_emit_instance(struct si_context * sctx,int se,int instance)61 static void si_pc_emit_instance(struct si_context *sctx, int se, int instance)
62 {
63 struct radeon_cmdbuf *cs = &sctx->gfx_cs;
64 unsigned value = S_030800_SH_BROADCAST_WRITES(1);
65
66 if (se >= 0) {
67 value |= S_030800_SE_INDEX(se);
68 } else {
69 value |= S_030800_SE_BROADCAST_WRITES(1);
70 }
71
72 if (sctx->chip_class >= GFX10) {
73 /* TODO: Expose counters from each shader array separately if needed. */
74 value |= S_030800_SA_BROADCAST_WRITES(1);
75 }
76
77 if (instance >= 0) {
78 value |= S_030800_INSTANCE_INDEX(instance);
79 } else {
80 value |= S_030800_INSTANCE_BROADCAST_WRITES(1);
81 }
82
83 radeon_begin(cs);
84 radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, value);
85 radeon_end();
86 }
87
si_pc_emit_shaders(struct si_context * sctx,unsigned shaders)88 static void si_pc_emit_shaders(struct si_context *sctx, unsigned shaders)
89 {
90 struct radeon_cmdbuf *cs = &sctx->gfx_cs;
91
92 radeon_begin(cs);
93 radeon_set_uconfig_reg_seq(R_036780_SQ_PERFCOUNTER_CTRL, 2, false);
94 radeon_emit(shaders & 0x7f);
95 radeon_emit(0xffffffff);
96 radeon_end();
97 }
98
si_pc_emit_select(struct si_context * sctx,struct ac_pc_block * block,unsigned count,unsigned * selectors)99 static void si_pc_emit_select(struct si_context *sctx, struct ac_pc_block *block, unsigned count,
100 unsigned *selectors)
101 {
102 struct ac_pc_block_base *regs = block->b->b;
103 struct radeon_cmdbuf *cs = &sctx->gfx_cs;
104 unsigned idx;
105
106 assert(count <= regs->num_counters);
107
108 /* Fake counters. */
109 if (!regs->select0)
110 return;
111
112 radeon_begin(cs);
113
114 for (idx = 0; idx < count; ++idx) {
115 radeon_set_uconfig_reg_seq(regs->select0[idx], 1, false);
116 radeon_emit(selectors[idx] | regs->select_or);
117 }
118
119 for (idx = 0; idx < regs->num_spm_counters; idx++) {
120 radeon_set_uconfig_reg_seq(regs->select1[idx], 1, false);
121 radeon_emit(0);
122 }
123
124 radeon_end();
125 }
126
si_pc_emit_start(struct si_context * sctx,struct si_resource * buffer,uint64_t va)127 static void si_pc_emit_start(struct si_context *sctx, struct si_resource *buffer, uint64_t va)
128 {
129 struct radeon_cmdbuf *cs = &sctx->gfx_cs;
130
131 si_cp_copy_data(sctx, &sctx->gfx_cs, COPY_DATA_DST_MEM, buffer, va - buffer->gpu_address,
132 COPY_DATA_IMM, NULL, 1);
133
134 radeon_begin(cs);
135 radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL,
136 S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET));
137 radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
138 radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_START) | EVENT_INDEX(0));
139 radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL,
140 S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_START_COUNTING));
141 radeon_end();
142 }
143
144 /* Note: The buffer was already added in si_pc_emit_start, so we don't have to
145 * do it again in here. */
si_pc_emit_stop(struct si_context * sctx,struct si_resource * buffer,uint64_t va)146 static void si_pc_emit_stop(struct si_context *sctx, struct si_resource *buffer, uint64_t va)
147 {
148 struct radeon_cmdbuf *cs = &sctx->gfx_cs;
149
150 si_cp_release_mem(sctx, cs, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_INT_SEL_NONE,
151 EOP_DATA_SEL_VALUE_32BIT, buffer, va, 0, SI_NOT_QUERY);
152 si_cp_wait_mem(sctx, cs, va, 0, 0xffffffff, WAIT_REG_MEM_EQUAL);
153
154 radeon_begin(cs);
155 radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
156 radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_SAMPLE) | EVENT_INDEX(0));
157 radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
158 radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_STOP) | EVENT_INDEX(0));
159 radeon_set_uconfig_reg(
160 R_036020_CP_PERFMON_CNTL,
161 S_036020_PERFMON_STATE(sctx->screen->info.never_stop_sq_perf_counters ?
162 V_036020_CP_PERFMON_STATE_START_COUNTING :
163 V_036020_CP_PERFMON_STATE_STOP_COUNTING) |
164 S_036020_PERFMON_SAMPLE_ENABLE(1));
165 radeon_end();
166 }
167
si_pc_emit_read(struct si_context * sctx,struct ac_pc_block * block,unsigned count,uint64_t va)168 static void si_pc_emit_read(struct si_context *sctx, struct ac_pc_block *block, unsigned count,
169 uint64_t va)
170 {
171 struct ac_pc_block_base *regs = block->b->b;
172 struct radeon_cmdbuf *cs = &sctx->gfx_cs;
173 unsigned idx;
174 unsigned reg = regs->counter0_lo;
175 unsigned reg_delta = 8;
176
177 radeon_begin(cs);
178
179 if (regs->select0) {
180 for (idx = 0; idx < count; ++idx) {
181 if (regs->counters)
182 reg = regs->counters[idx];
183
184 radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0));
185 radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_PERF) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
186 COPY_DATA_COUNT_SEL); /* 64 bits */
187 radeon_emit(reg >> 2);
188 radeon_emit(0); /* unused */
189 radeon_emit(va);
190 radeon_emit(va >> 32);
191 va += sizeof(uint64_t);
192 reg += reg_delta;
193 }
194 } else {
195 /* Fake counters. */
196 for (idx = 0; idx < count; ++idx) {
197 radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0));
198 radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
199 COPY_DATA_COUNT_SEL);
200 radeon_emit(0); /* immediate */
201 radeon_emit(0);
202 radeon_emit(va);
203 radeon_emit(va >> 32);
204 va += sizeof(uint64_t);
205 }
206 }
207 radeon_end();
208 }
209
si_pc_query_destroy(struct si_context * sctx,struct si_query * squery)210 static void si_pc_query_destroy(struct si_context *sctx, struct si_query *squery)
211 {
212 struct si_query_pc *query = (struct si_query_pc *)squery;
213
214 while (query->groups) {
215 struct si_query_group *group = query->groups;
216 query->groups = group->next;
217 FREE(group);
218 }
219
220 FREE(query->counters);
221
222 si_query_buffer_destroy(sctx->screen, &query->buffer);
223 FREE(query);
224 }
225
si_inhibit_clockgating(struct si_context * sctx,struct radeon_cmdbuf * cs,bool inhibit)226 void si_inhibit_clockgating(struct si_context *sctx, struct radeon_cmdbuf *cs, bool inhibit)
227 {
228 radeon_begin(&sctx->gfx_cs);
229
230 if (sctx->chip_class >= GFX10) {
231 radeon_set_uconfig_reg(R_037390_RLC_PERFMON_CLK_CNTL,
232 S_037390_PERFMON_CLOCK_STATE(inhibit));
233 } else if (sctx->chip_class >= GFX8) {
234 radeon_set_uconfig_reg(R_0372FC_RLC_PERFMON_CLK_CNTL,
235 S_0372FC_PERFMON_CLOCK_STATE(inhibit));
236 }
237 radeon_end();
238 }
239
si_pc_query_resume(struct si_context * sctx,struct si_query * squery)240 static void si_pc_query_resume(struct si_context *sctx, struct si_query *squery)
241 /*
242 struct si_query_hw *hwquery,
243 struct si_resource *buffer, uint64_t va)*/
244 {
245 struct si_query_pc *query = (struct si_query_pc *)squery;
246 int current_se = -1;
247 int current_instance = -1;
248
249 if (!si_query_buffer_alloc(sctx, &query->buffer, NULL, query->result_size))
250 return;
251 si_need_gfx_cs_space(sctx, 0);
252
253 if (query->shaders)
254 si_pc_emit_shaders(sctx, query->shaders);
255
256 si_inhibit_clockgating(sctx, &sctx->gfx_cs, true);
257
258 for (struct si_query_group *group = query->groups; group; group = group->next) {
259 struct ac_pc_block *block = group->block;
260
261 if (group->se != current_se || group->instance != current_instance) {
262 current_se = group->se;
263 current_instance = group->instance;
264 si_pc_emit_instance(sctx, group->se, group->instance);
265 }
266
267 si_pc_emit_select(sctx, block, group->num_counters, group->selectors);
268 }
269
270 if (current_se != -1 || current_instance != -1)
271 si_pc_emit_instance(sctx, -1, -1);
272
273 uint64_t va = query->buffer.buf->gpu_address + query->buffer.results_end;
274 si_pc_emit_start(sctx, query->buffer.buf, va);
275 }
276
si_pc_query_suspend(struct si_context * sctx,struct si_query * squery)277 static void si_pc_query_suspend(struct si_context *sctx, struct si_query *squery)
278 {
279 struct si_query_pc *query = (struct si_query_pc *)squery;
280
281 if (!query->buffer.buf)
282 return;
283
284 uint64_t va = query->buffer.buf->gpu_address + query->buffer.results_end;
285 query->buffer.results_end += query->result_size;
286
287 si_pc_emit_stop(sctx, query->buffer.buf, va);
288
289 for (struct si_query_group *group = query->groups; group; group = group->next) {
290 struct ac_pc_block *block = group->block;
291 unsigned se = group->se >= 0 ? group->se : 0;
292 unsigned se_end = se + 1;
293
294 if ((block->b->b->flags & AC_PC_BLOCK_SE) && (group->se < 0))
295 se_end = sctx->screen->info.max_se;
296
297 do {
298 unsigned instance = group->instance >= 0 ? group->instance : 0;
299
300 do {
301 si_pc_emit_instance(sctx, se, instance);
302 si_pc_emit_read(sctx, block, group->num_counters, va);
303 va += sizeof(uint64_t) * group->num_counters;
304 } while (group->instance < 0 && ++instance < block->num_instances);
305 } while (++se < se_end);
306 }
307
308 si_pc_emit_instance(sctx, -1, -1);
309
310 si_inhibit_clockgating(sctx, &sctx->gfx_cs, false);
311 }
312
si_pc_query_begin(struct si_context * ctx,struct si_query * squery)313 static bool si_pc_query_begin(struct si_context *ctx, struct si_query *squery)
314 {
315 struct si_query_pc *query = (struct si_query_pc *)squery;
316
317 si_query_buffer_reset(ctx, &query->buffer);
318
319 list_addtail(&query->b.active_list, &ctx->active_queries);
320 ctx->num_cs_dw_queries_suspend += query->b.num_cs_dw_suspend;
321
322 si_pc_query_resume(ctx, squery);
323
324 return true;
325 }
326
si_pc_query_end(struct si_context * ctx,struct si_query * squery)327 static bool si_pc_query_end(struct si_context *ctx, struct si_query *squery)
328 {
329 struct si_query_pc *query = (struct si_query_pc *)squery;
330
331 si_pc_query_suspend(ctx, squery);
332
333 list_del(&squery->active_list);
334 ctx->num_cs_dw_queries_suspend -= squery->num_cs_dw_suspend;
335
336 return query->buffer.buf != NULL;
337 }
338
si_pc_query_add_result(struct si_query_pc * query,void * buffer,union pipe_query_result * result)339 static void si_pc_query_add_result(struct si_query_pc *query, void *buffer,
340 union pipe_query_result *result)
341 {
342 uint64_t *results = buffer;
343 unsigned i, j;
344
345 for (i = 0; i < query->num_counters; ++i) {
346 struct si_query_counter *counter = &query->counters[i];
347
348 for (j = 0; j < counter->qwords; ++j) {
349 uint32_t value = results[counter->base + j * counter->stride];
350 result->batch[i].u64 += value;
351 }
352 }
353 }
354
si_pc_query_get_result(struct si_context * sctx,struct si_query * squery,bool wait,union pipe_query_result * result)355 static bool si_pc_query_get_result(struct si_context *sctx, struct si_query *squery, bool wait,
356 union pipe_query_result *result)
357 {
358 struct si_query_pc *query = (struct si_query_pc *)squery;
359
360 memset(result, 0, sizeof(result->batch[0]) * query->num_counters);
361
362 for (struct si_query_buffer *qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
363 unsigned usage = PIPE_MAP_READ | (wait ? 0 : PIPE_MAP_DONTBLOCK);
364 unsigned results_base = 0;
365 void *map;
366
367 if (squery->b.flushed)
368 map = sctx->ws->buffer_map(sctx->ws, qbuf->buf->buf, NULL, usage);
369 else
370 map = si_buffer_map(sctx, qbuf->buf, usage);
371
372 if (!map)
373 return false;
374
375 while (results_base != qbuf->results_end) {
376 si_pc_query_add_result(query, map + results_base, result);
377 results_base += query->result_size;
378 }
379 }
380
381 return true;
382 }
383
384 static const struct si_query_ops batch_query_ops = {
385 .destroy = si_pc_query_destroy,
386 .begin = si_pc_query_begin,
387 .end = si_pc_query_end,
388 .get_result = si_pc_query_get_result,
389
390 .suspend = si_pc_query_suspend,
391 .resume = si_pc_query_resume,
392 };
393
get_group_state(struct si_screen * screen,struct si_query_pc * query,struct ac_pc_block * block,unsigned sub_gid)394 static struct si_query_group *get_group_state(struct si_screen *screen, struct si_query_pc *query,
395 struct ac_pc_block *block, unsigned sub_gid)
396 {
397 struct si_perfcounters *pc = screen->perfcounters;
398 struct si_query_group *group = query->groups;
399
400 while (group) {
401 if (group->block == block && group->sub_gid == sub_gid)
402 return group;
403 group = group->next;
404 }
405
406 group = CALLOC_STRUCT(si_query_group);
407 if (!group)
408 return NULL;
409
410 group->block = block;
411 group->sub_gid = sub_gid;
412
413 if (block->b->b->flags & AC_PC_BLOCK_SHADER) {
414 unsigned sub_gids = block->num_instances;
415 unsigned shader_id;
416 unsigned shaders;
417 unsigned query_shaders;
418
419 if (ac_pc_block_has_per_se_groups(&pc->base, block))
420 sub_gids = sub_gids * screen->info.max_se;
421 shader_id = sub_gid / sub_gids;
422 sub_gid = sub_gid % sub_gids;
423
424 shaders = ac_pc_shader_type_bits[shader_id];
425
426 query_shaders = query->shaders & ~AC_PC_SHADERS_WINDOWING;
427 if (query_shaders && query_shaders != shaders) {
428 fprintf(stderr, "si_perfcounter: incompatible shader groups\n");
429 FREE(group);
430 return NULL;
431 }
432 query->shaders = shaders;
433 }
434
435 if (block->b->b->flags & AC_PC_BLOCK_SHADER_WINDOWED && !query->shaders) {
436 // A non-zero value in query->shaders ensures that the shader
437 // masking is reset unless the user explicitly requests one.
438 query->shaders = AC_PC_SHADERS_WINDOWING;
439 }
440
441 if (ac_pc_block_has_per_se_groups(&pc->base, block)) {
442 group->se = sub_gid / block->num_instances;
443 sub_gid = sub_gid % block->num_instances;
444 } else {
445 group->se = -1;
446 }
447
448 if (ac_pc_block_has_per_instance_groups(&pc->base, block)) {
449 group->instance = sub_gid;
450 } else {
451 group->instance = -1;
452 }
453
454 group->next = query->groups;
455 query->groups = group;
456
457 return group;
458 }
459
si_create_batch_query(struct pipe_context * ctx,unsigned num_queries,unsigned * query_types)460 struct pipe_query *si_create_batch_query(struct pipe_context *ctx, unsigned num_queries,
461 unsigned *query_types)
462 {
463 struct si_screen *screen = (struct si_screen *)ctx->screen;
464 struct si_perfcounters *pc = screen->perfcounters;
465 struct ac_pc_block *block;
466 struct si_query_group *group;
467 struct si_query_pc *query;
468 unsigned base_gid, sub_gid, sub_index;
469 unsigned i, j;
470
471 if (!pc)
472 return NULL;
473
474 query = CALLOC_STRUCT(si_query_pc);
475 if (!query)
476 return NULL;
477
478 query->b.ops = &batch_query_ops;
479
480 query->num_counters = num_queries;
481
482 /* Collect selectors per group */
483 for (i = 0; i < num_queries; ++i) {
484 unsigned sub_gid;
485
486 if (query_types[i] < SI_QUERY_FIRST_PERFCOUNTER)
487 goto error;
488
489 block =
490 ac_lookup_counter(&pc->base, query_types[i] - SI_QUERY_FIRST_PERFCOUNTER, &base_gid, &sub_index);
491 if (!block)
492 goto error;
493
494 sub_gid = sub_index / block->b->selectors;
495 sub_index = sub_index % block->b->selectors;
496
497 group = get_group_state(screen, query, block, sub_gid);
498 if (!group)
499 goto error;
500
501 if (group->num_counters >= block->b->b->num_counters) {
502 fprintf(stderr, "perfcounter group %s: too many selected\n", block->b->b->name);
503 goto error;
504 }
505 group->selectors[group->num_counters] = sub_index;
506 ++group->num_counters;
507 }
508
509 /* Compute result bases and CS size per group */
510 query->b.num_cs_dw_suspend = pc->num_stop_cs_dwords;
511 query->b.num_cs_dw_suspend += pc->num_instance_cs_dwords;
512
513 i = 0;
514 for (group = query->groups; group; group = group->next) {
515 struct ac_pc_block *block = group->block;
516 unsigned read_dw;
517 unsigned instances = 1;
518
519 if ((block->b->b->flags & AC_PC_BLOCK_SE) && group->se < 0)
520 instances = screen->info.max_se;
521 if (group->instance < 0)
522 instances *= block->num_instances;
523
524 group->result_base = i;
525 query->result_size += sizeof(uint64_t) * instances * group->num_counters;
526 i += instances * group->num_counters;
527
528 read_dw = 6 * group->num_counters;
529 query->b.num_cs_dw_suspend += instances * read_dw;
530 query->b.num_cs_dw_suspend += instances * pc->num_instance_cs_dwords;
531 }
532
533 if (query->shaders) {
534 if (query->shaders == AC_PC_SHADERS_WINDOWING)
535 query->shaders = 0xffffffff;
536 }
537
538 /* Map user-supplied query array to result indices */
539 query->counters = CALLOC(num_queries, sizeof(*query->counters));
540 for (i = 0; i < num_queries; ++i) {
541 struct si_query_counter *counter = &query->counters[i];
542 struct ac_pc_block *block;
543
544 block =
545 ac_lookup_counter(&pc->base, query_types[i] - SI_QUERY_FIRST_PERFCOUNTER, &base_gid, &sub_index);
546
547 sub_gid = sub_index / block->b->selectors;
548 sub_index = sub_index % block->b->selectors;
549
550 group = get_group_state(screen, query, block, sub_gid);
551 assert(group != NULL);
552
553 for (j = 0; j < group->num_counters; ++j) {
554 if (group->selectors[j] == sub_index)
555 break;
556 }
557
558 counter->base = group->result_base + j;
559 counter->stride = group->num_counters;
560
561 counter->qwords = 1;
562 if ((block->b->b->flags & AC_PC_BLOCK_SE) && group->se < 0)
563 counter->qwords = screen->info.max_se;
564 if (group->instance < 0)
565 counter->qwords *= block->num_instances;
566 }
567
568 return (struct pipe_query *)query;
569
570 error:
571 si_pc_query_destroy((struct si_context *)ctx, &query->b);
572 return NULL;
573 }
574
si_get_perfcounter_info(struct si_screen * screen,unsigned index,struct pipe_driver_query_info * info)575 int si_get_perfcounter_info(struct si_screen *screen, unsigned index,
576 struct pipe_driver_query_info *info)
577 {
578 struct si_perfcounters *pc = screen->perfcounters;
579 struct ac_pc_block *block;
580 unsigned base_gid, sub;
581
582 if (!pc)
583 return 0;
584
585 if (!info) {
586 unsigned bid, num_queries = 0;
587
588 for (bid = 0; bid < pc->base.num_blocks; ++bid) {
589 num_queries += pc->base.blocks[bid].b->selectors * pc->base.blocks[bid].num_groups;
590 }
591
592 return num_queries;
593 }
594
595 block = ac_lookup_counter(&pc->base, index, &base_gid, &sub);
596 if (!block)
597 return 0;
598
599 if (!block->selector_names) {
600 if (!ac_init_block_names(&screen->info, &pc->base, block))
601 return 0;
602 }
603 info->name = block->selector_names + sub * block->selector_name_stride;
604 info->query_type = SI_QUERY_FIRST_PERFCOUNTER + index;
605 info->max_value.u64 = 0;
606 info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
607 info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE;
608 info->group_id = base_gid + sub / block->b->selectors;
609 info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
610 if (sub > 0 && sub + 1 < block->b->selectors * block->num_groups)
611 info->flags |= PIPE_DRIVER_QUERY_FLAG_DONT_LIST;
612 return 1;
613 }
614
si_get_perfcounter_group_info(struct si_screen * screen,unsigned index,struct pipe_driver_query_group_info * info)615 int si_get_perfcounter_group_info(struct si_screen *screen, unsigned index,
616 struct pipe_driver_query_group_info *info)
617 {
618 struct si_perfcounters *pc = screen->perfcounters;
619 struct ac_pc_block *block;
620
621 if (!pc)
622 return 0;
623
624 if (!info)
625 return pc->base.num_groups;
626
627 block = ac_lookup_group(&pc->base, &index);
628 if (!block)
629 return 0;
630
631 if (!block->group_names) {
632 if (!ac_init_block_names(&screen->info, &pc->base, block))
633 return 0;
634 }
635 info->name = block->group_names + index * block->group_name_stride;
636 info->num_queries = block->b->selectors;
637 info->max_active_queries = block->b->b->num_counters;
638 return 1;
639 }
640
si_destroy_perfcounters(struct si_screen * screen)641 void si_destroy_perfcounters(struct si_screen *screen)
642 {
643 struct si_perfcounters *pc = screen->perfcounters;
644
645 if (!pc)
646 return;
647
648 ac_destroy_perfcounters(&pc->base);
649 FREE(pc);
650 screen->perfcounters = NULL;
651 }
652
si_init_perfcounters(struct si_screen * screen)653 void si_init_perfcounters(struct si_screen *screen)
654 {
655 bool separate_se, separate_instance;
656
657 separate_se = debug_get_bool_option("RADEON_PC_SEPARATE_SE", false);
658 separate_instance = debug_get_bool_option("RADEON_PC_SEPARATE_INSTANCE", false);
659
660 screen->perfcounters = CALLOC_STRUCT(si_perfcounters);
661 if (!screen->perfcounters)
662 return;
663
664 screen->perfcounters->num_stop_cs_dwords = 14 + si_cp_write_fence_dwords(screen);
665 screen->perfcounters->num_instance_cs_dwords = 3;
666
667 if (!ac_init_perfcounters(&screen->info, separate_se, separate_instance,
668 &screen->perfcounters->base)) {
669 si_destroy_perfcounters(screen);
670 }
671 }
672