1 /*
2  * Copyright 2015 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * on the rights to use, copy, modify, merge, publish, distribute, sub
9  * license, and/or sell copies of the Software, and to permit persons to whom
10  * the Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22  * USE OR OTHER DEALINGS IN THE SOFTWARE.
23  */
24 
25 #include "ac_gpu_info.h"
26 #include "ac_perfcounter.h"
27 
28 #include "util/u_memory.h"
29 #include "macros.h"
30 
31 /* cik_CB */
32 static unsigned cik_CB_select0[] = {
33    R_037004_CB_PERFCOUNTER0_SELECT,
34    R_03700C_CB_PERFCOUNTER1_SELECT,
35    R_037010_CB_PERFCOUNTER2_SELECT,
36    R_037014_CB_PERFCOUNTER3_SELECT,
37 };
38 static unsigned cik_CB_select1[] = {
39    R_037008_CB_PERFCOUNTER0_SELECT1,
40 };
41 static struct ac_pc_block_base cik_CB = {
42    .gpu_block = CB,
43    .name = "CB",
44    .num_counters = 4,
45    .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS,
46 
47    .select0 = cik_CB_select0,
48    .select1 = cik_CB_select1,
49    .counter0_lo = R_035018_CB_PERFCOUNTER0_LO,
50 
51    .num_spm_counters = 1,
52    .num_spm_wires = 2,
53    .spm_block_select = 0x0,
54 };
55 
56 /* cik_CPC */
57 static unsigned cik_CPC_select0[] = {
58    R_036024_CPC_PERFCOUNTER0_SELECT,
59    R_03600C_CPC_PERFCOUNTER1_SELECT,
60 };
61 static unsigned cik_CPC_select1[] = {
62    R_036010_CPC_PERFCOUNTER0_SELECT1,
63 };
64 static unsigned cik_CPC_counters[] = {
65    R_034018_CPC_PERFCOUNTER0_LO,
66    R_034010_CPC_PERFCOUNTER1_LO,
67 };
68 static struct ac_pc_block_base cik_CPC = {
69    .gpu_block = CPC,
70    .name = "CPC",
71    .num_counters = 2,
72 
73    .select0 = cik_CPC_select0,
74    .select1 = cik_CPC_select1,
75    .counters = cik_CPC_counters,
76 
77    .num_spm_counters = 1,
78    .num_spm_wires = 2,
79    .spm_block_select = 0x1,
80 };
81 
82 /* cik_CPF */
83 static unsigned cik_CPF_select0[] = {
84    R_03601C_CPF_PERFCOUNTER0_SELECT,
85    R_036014_CPF_PERFCOUNTER1_SELECT,
86 };
87 static unsigned cik_CPF_select1[] = {
88    R_036018_CPF_PERFCOUNTER0_SELECT1,
89 };
90 static unsigned cik_CPF_counters[] = {
91    R_034028_CPF_PERFCOUNTER0_LO,
92    R_034020_CPF_PERFCOUNTER1_LO,
93 };
94 static struct ac_pc_block_base cik_CPF = {
95    .gpu_block = CPF,
96    .name = "CPF",
97    .num_counters = 2,
98 
99    .select0 = cik_CPF_select0,
100    .select1 = cik_CPF_select1,
101    .counters = cik_CPF_counters,
102 
103    .num_spm_counters = 1,
104    .num_spm_wires = 2,
105    .spm_block_select = 0x2,
106 };
107 
108 /* cik_CPG */
109 static unsigned cik_CPG_select0[] = {
110    R_036008_CPG_PERFCOUNTER0_SELECT,
111    R_036000_CPG_PERFCOUNTER1_SELECT,
112 };
113 static unsigned cik_CPG_select1[] = {
114    R_036004_CPG_PERFCOUNTER0_SELECT1
115 };
116 static unsigned cik_CPG_counters[] = {
117    R_034008_CPG_PERFCOUNTER0_LO,
118    R_034000_CPG_PERFCOUNTER1_LO,
119 };
120 static struct ac_pc_block_base cik_CPG = {
121    .gpu_block = CPG,
122    .name = "CPG",
123    .num_counters = 2,
124 
125    .select0 = cik_CPG_select0,
126    .select1 = cik_CPG_select1,
127    .counters = cik_CPG_counters,
128 
129    .num_spm_counters = 1,
130    .num_spm_wires = 2,
131    .spm_block_select = 0x0,
132 };
133 
134 /* cik_DB */
135 static unsigned cik_DB_select0[] = {
136    R_037100_DB_PERFCOUNTER0_SELECT,
137    R_037108_DB_PERFCOUNTER1_SELECT,
138    R_037110_DB_PERFCOUNTER2_SELECT,
139    R_037118_DB_PERFCOUNTER3_SELECT,
140 };
141 static unsigned cik_DB_select1[] = {
142    R_037104_DB_PERFCOUNTER0_SELECT1,
143    R_03710C_DB_PERFCOUNTER1_SELECT1,
144 };
145 static struct ac_pc_block_base cik_DB = {
146    .gpu_block = DB,
147    .name = "DB",
148    .num_counters = 4,
149    .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS,
150 
151    .select0 = cik_DB_select0,
152    .select1 = cik_DB_select1,
153    .counter0_lo = R_035100_DB_PERFCOUNTER0_LO,
154 
155    .num_spm_counters = 2,
156    .num_spm_wires = 3,
157    .spm_block_select = 0x1,
158 };
159 
160 /* cik_GDS */
161 static unsigned cik_GDS_select0[] = {
162    R_036A00_GDS_PERFCOUNTER0_SELECT,
163    R_036A04_GDS_PERFCOUNTER1_SELECT,
164    R_036A08_GDS_PERFCOUNTER2_SELECT,
165    R_036A0C_GDS_PERFCOUNTER3_SELECT,
166 };
167 static unsigned cik_GDS_select1[] = {
168    R_036A10_GDS_PERFCOUNTER0_SELECT1,
169 };
170 static struct ac_pc_block_base cik_GDS = {
171    .gpu_block = GDS,
172    .name = "GDS",
173    .num_counters = 4,
174 
175    .select0 = cik_GDS_select0,
176    .select1 = cik_GDS_select1,
177    .counter0_lo = R_034A00_GDS_PERFCOUNTER0_LO,
178 
179    .num_spm_counters = 1,
180    .num_spm_wires = 2,
181    .spm_block_select = 0x3,
182 };
183 
184 /* cik_GRBM */
185 static unsigned cik_GRBM_select0[] = {
186    R_036100_GRBM_PERFCOUNTER0_SELECT,
187    R_036104_GRBM_PERFCOUNTER1_SELECT,
188 };
189 static unsigned cik_GRBM_counters[] = {
190    R_034100_GRBM_PERFCOUNTER0_LO,
191    R_03410C_GRBM_PERFCOUNTER1_LO,
192 };
193 static struct ac_pc_block_base cik_GRBM = {
194    .gpu_block = GRBM,
195    .name = "GRBM",
196    .num_counters = 2,
197 
198    .select0 = cik_GRBM_select0,
199    .counters = cik_GRBM_counters,
200 };
201 
202 /* cik_GRBMSE */
203 static unsigned cik_GRBMSE_select0[] = {
204    R_036108_GRBM_SE0_PERFCOUNTER_SELECT,
205    R_03610C_GRBM_SE1_PERFCOUNTER_SELECT,
206    R_036110_GRBM_SE2_PERFCOUNTER_SELECT,
207    R_036114_GRBM_SE3_PERFCOUNTER_SELECT,
208 };
209 static struct ac_pc_block_base cik_GRBMSE = {
210    .gpu_block = GRBMSE,
211    .name = "GRBMSE",
212    .num_counters = 4,
213 
214    .select0 = cik_GRBMSE_select0,
215    .counter0_lo = R_034114_GRBM_SE0_PERFCOUNTER_LO,
216 };
217 
218 /* cik_IA */
219 static unsigned cik_IA_select0[] = {
220    R_036210_IA_PERFCOUNTER0_SELECT,
221    R_036214_IA_PERFCOUNTER1_SELECT,
222    R_036218_IA_PERFCOUNTER2_SELECT,
223    R_03621C_IA_PERFCOUNTER3_SELECT,
224 };
225 static unsigned cik_IA_select1[] = {
226    R_036220_IA_PERFCOUNTER0_SELECT1,
227 };
228 static struct ac_pc_block_base cik_IA = {
229    .gpu_block = IA,
230    .name = "IA",
231    .num_counters = 4,
232 
233    .select0 = cik_IA_select0,
234    .select1 = cik_IA_select1,
235    .counter0_lo = R_034220_IA_PERFCOUNTER0_LO,
236 
237    .num_spm_counters = 1,
238    .num_spm_wires = 2,
239    .spm_block_select = 0x6,
240 };
241 
242 /* cik_PA_SC */
243 static unsigned cik_PA_SC_select0[] = {
244    R_036500_PA_SC_PERFCOUNTER0_SELECT,
245    R_036508_PA_SC_PERFCOUNTER1_SELECT,
246    R_03650C_PA_SC_PERFCOUNTER2_SELECT,
247    R_036510_PA_SC_PERFCOUNTER3_SELECT,
248    R_036514_PA_SC_PERFCOUNTER4_SELECT,
249    R_036518_PA_SC_PERFCOUNTER5_SELECT,
250    R_03651C_PA_SC_PERFCOUNTER6_SELECT,
251    R_036520_PA_SC_PERFCOUNTER7_SELECT,
252 };
253 static unsigned cik_PA_SC_select1[] = {
254    R_036504_PA_SC_PERFCOUNTER0_SELECT1,
255 };
256 static struct ac_pc_block_base cik_PA_SC = {
257    .gpu_block = PA_SC,
258    .name = "PA_SC",
259    .num_counters = 8,
260    .flags = AC_PC_BLOCK_SE,
261 
262    .select0 = cik_PA_SC_select0,
263    .select1 = cik_PA_SC_select1,
264    .counter0_lo = R_034500_PA_SC_PERFCOUNTER0_LO,
265 
266    .num_spm_counters = 1,
267    .num_spm_wires = 2,
268    .spm_block_select = 0x4,
269 };
270 
271 /* cik_PA_SU */
272 static unsigned cik_PA_SU_select0[] = {
273    R_036400_PA_SU_PERFCOUNTER0_SELECT,
274    R_036408_PA_SU_PERFCOUNTER1_SELECT,
275    R_036410_PA_SU_PERFCOUNTER2_SELECT,
276    R_036414_PA_SU_PERFCOUNTER3_SELECT,
277 };
278 static unsigned cik_PA_SU_select1[] = {
279    R_036404_PA_SU_PERFCOUNTER0_SELECT1,
280    R_03640C_PA_SU_PERFCOUNTER1_SELECT1,
281 };
282 /* According to docs, PA_SU counters are only 48 bits wide. */
283 static struct ac_pc_block_base cik_PA_SU = {
284    .gpu_block = PA_SU,
285    .name = "PA_SU",
286    .num_counters = 4,
287    .flags = AC_PC_BLOCK_SE,
288 
289    .select0 = cik_PA_SU_select0,
290    .select1 = cik_PA_SU_select1,
291    .counter0_lo = R_034400_PA_SU_PERFCOUNTER0_LO,
292 
293    .num_spm_counters = 2,
294    .num_spm_wires = 3,
295    .spm_block_select = 0x2,
296 };
297 
298 /* cik_SPI */
299 static unsigned cik_SPI_select0[] = {
300    R_036600_SPI_PERFCOUNTER0_SELECT,
301    R_036604_SPI_PERFCOUNTER1_SELECT,
302    R_036608_SPI_PERFCOUNTER2_SELECT,
303    R_03660C_SPI_PERFCOUNTER3_SELECT,
304    R_036620_SPI_PERFCOUNTER4_SELECT,
305    R_036624_SPI_PERFCOUNTER5_SELECT,
306 };
307 static unsigned cik_SPI_select1[] = {
308    R_036610_SPI_PERFCOUNTER0_SELECT1,
309    R_036614_SPI_PERFCOUNTER1_SELECT1,
310    R_036618_SPI_PERFCOUNTER2_SELECT1,
311    R_03661C_SPI_PERFCOUNTER3_SELECT1
312 };
313 static struct ac_pc_block_base cik_SPI = {
314    .gpu_block = SPI,
315    .name = "SPI",
316    .num_counters = 6,
317    .flags = AC_PC_BLOCK_SE,
318 
319    .select0 = cik_SPI_select0,
320    .select1 = cik_SPI_select1,
321    .counter0_lo = R_034604_SPI_PERFCOUNTER0_LO,
322 
323    .num_spm_counters = 4,
324    .num_spm_wires = 8,
325    .spm_block_select = 0x8,
326 };
327 
328 /* cik_SQ */
329 static unsigned cik_SQ_select0[] = {
330    R_036700_SQ_PERFCOUNTER0_SELECT,
331    R_036704_SQ_PERFCOUNTER1_SELECT,
332    R_036708_SQ_PERFCOUNTER2_SELECT,
333    R_03670C_SQ_PERFCOUNTER3_SELECT,
334    R_036710_SQ_PERFCOUNTER4_SELECT,
335    R_036714_SQ_PERFCOUNTER5_SELECT,
336    R_036718_SQ_PERFCOUNTER6_SELECT,
337    R_03671C_SQ_PERFCOUNTER7_SELECT,
338    R_036720_SQ_PERFCOUNTER8_SELECT,
339    R_036724_SQ_PERFCOUNTER9_SELECT,
340    R_036728_SQ_PERFCOUNTER10_SELECT,
341    R_03672C_SQ_PERFCOUNTER11_SELECT,
342    R_036730_SQ_PERFCOUNTER12_SELECT,
343    R_036734_SQ_PERFCOUNTER13_SELECT,
344    R_036738_SQ_PERFCOUNTER14_SELECT,
345    R_03673C_SQ_PERFCOUNTER15_SELECT,
346 };
347 static struct ac_pc_block_base cik_SQ = {
348    .gpu_block = SQ,
349    .name = "SQ",
350    .num_counters = 16,
351    .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_SHADER,
352 
353    .select0 = cik_SQ_select0,
354    .select_or = S_036700_SQC_BANK_MASK(15) | S_036700_SQC_CLIENT_MASK(15) | S_036700_SIMD_MASK(15),
355    .counter0_lo = R_034700_SQ_PERFCOUNTER0_LO,
356 
357    .num_spm_wires = 8,
358    .spm_block_select = 0x9,
359 };
360 
361 /* cik_SX */
362 static unsigned cik_SX_select0[] = {
363    R_036900_SX_PERFCOUNTER0_SELECT,
364    R_036904_SX_PERFCOUNTER1_SELECT,
365    R_036908_SX_PERFCOUNTER2_SELECT,
366    R_03690C_SX_PERFCOUNTER3_SELECT,
367 };
368 static unsigned cik_SX_select1[] = {
369    R_036910_SX_PERFCOUNTER0_SELECT1,
370    R_036914_SX_PERFCOUNTER1_SELECT1,
371 };
372 static struct ac_pc_block_base cik_SX = {
373    .gpu_block = SX,
374    .name = "SX",
375    .num_counters = 4,
376    .flags = AC_PC_BLOCK_SE,
377 
378    .select0 = cik_SX_select0,
379    .select1 = cik_SX_select1,
380    .counter0_lo = R_034900_SX_PERFCOUNTER0_LO,
381 
382    .num_spm_counters = 2,
383    .num_spm_wires = 4,
384    .spm_block_select = 0x3,
385 };
386 
387 /* cik_TA */
388 static unsigned cik_TA_select0[] = {
389    R_036B00_TA_PERFCOUNTER0_SELECT,
390    R_036B08_TA_PERFCOUNTER1_SELECT,
391 };
392 static unsigned cik_TA_select1[] = {
393    R_036B04_TA_PERFCOUNTER0_SELECT1,
394 };
395 static struct ac_pc_block_base cik_TA = {
396    .gpu_block = TA,
397    .name = "TA",
398    .num_counters = 2,
399    .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS | AC_PC_BLOCK_SHADER_WINDOWED,
400 
401    .select0 = cik_TA_select0,
402    .select1 = cik_TA_select1,
403    .counter0_lo = R_034B00_TA_PERFCOUNTER0_LO,
404 
405    .num_spm_counters = 1,
406    .num_spm_wires = 2,
407    .spm_block_select = 0x5,
408 };
409 
410 /* cik_TD */
411 static unsigned cik_TD_select0[] = {
412    R_036C00_TD_PERFCOUNTER0_SELECT,
413    R_036C08_TD_PERFCOUNTER1_SELECT,
414 };
415 static unsigned cik_TD_select1[] = {
416    R_036C04_TD_PERFCOUNTER0_SELECT1,
417 };
418 static struct ac_pc_block_base cik_TD = {
419    .gpu_block = TD,
420    .name = "TD",
421    .num_counters = 2,
422    .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS | AC_PC_BLOCK_SHADER_WINDOWED,
423 
424    .select0 = cik_TD_select0,
425    .select1 = cik_TD_select1,
426    .counter0_lo = R_034C00_TD_PERFCOUNTER0_LO,
427 
428    .num_spm_counters = 1,
429    .num_spm_wires = 2,
430    .spm_block_select = 0x6,
431 };
432 
433 /* cik_TCA */
434 static unsigned cik_TCA_select0[] = {
435    R_036E40_TCA_PERFCOUNTER0_SELECT,
436    R_036E48_TCA_PERFCOUNTER1_SELECT,
437    R_036E50_TCA_PERFCOUNTER2_SELECT,
438    R_036E54_TCA_PERFCOUNTER3_SELECT,
439 };
440 static unsigned cik_TCA_select1[] = {
441    R_036E44_TCA_PERFCOUNTER0_SELECT1,
442    R_036E4C_TCA_PERFCOUNTER1_SELECT1,
443 };
444 static struct ac_pc_block_base cik_TCA = {
445    .gpu_block = TCA,
446    .name = "TCA",
447    .num_counters = 4,
448    .flags = AC_PC_BLOCK_INSTANCE_GROUPS,
449 
450    .select0 = cik_TCA_select0,
451    .select1 = cik_TCA_select1,
452    .counter0_lo = R_034E40_TCA_PERFCOUNTER0_LO,
453 
454    .num_spm_counters = 2,
455    .num_spm_wires = 4,
456    .spm_block_select = 0x5,
457 };
458 
459 /* cik_TCC */
460 static unsigned cik_TCC_select0[] = {
461    R_036E00_TCC_PERFCOUNTER0_SELECT,
462    R_036E08_TCC_PERFCOUNTER1_SELECT,
463    R_036E10_TCC_PERFCOUNTER2_SELECT,
464    R_036E14_TCC_PERFCOUNTER3_SELECT,
465 };
466 static unsigned cik_TCC_select1[] = {
467    R_036E04_TCC_PERFCOUNTER0_SELECT1,
468    R_036E0C_TCC_PERFCOUNTER1_SELECT1,
469 };
470 static struct ac_pc_block_base cik_TCC = {
471    .gpu_block = TCC,
472    .name = "TCC",
473    .num_counters = 4,
474    .flags = AC_PC_BLOCK_INSTANCE_GROUPS,
475 
476    .select0 = cik_TCC_select0,
477    .select1 = cik_TCC_select1,
478    .counter0_lo = R_034E00_TCC_PERFCOUNTER0_LO,
479 
480    .num_spm_counters = 2,
481    .num_spm_wires = 4,
482    .spm_block_select = 0x4,
483 };
484 
485 /* cik_TCP */
486 static unsigned cik_TCP_select0[] = {
487    R_036D00_TCP_PERFCOUNTER0_SELECT,
488    R_036D08_TCP_PERFCOUNTER1_SELECT,
489    R_036D10_TCP_PERFCOUNTER2_SELECT,
490    R_036D14_TCP_PERFCOUNTER3_SELECT,
491 };
492 static unsigned cik_TCP_select1[] = {
493    R_036D04_TCP_PERFCOUNTER0_SELECT1,
494    R_036D0C_TCP_PERFCOUNTER1_SELECT1,
495 };
496 static struct ac_pc_block_base cik_TCP = {
497    .gpu_block = TCP,
498    .name = "TCP",
499    .num_counters = 4,
500    .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS | AC_PC_BLOCK_SHADER_WINDOWED,
501 
502    .select0 = cik_TCP_select0,
503    .select1 = cik_TCP_select1,
504    .counter0_lo = R_034D00_TCP_PERFCOUNTER0_LO,
505 
506    .num_spm_counters = 2,
507    .num_spm_wires = 3,
508    .spm_block_select = 0x7,
509 };
510 
511 /* cik_VGT */
512 static unsigned cik_VGT_select0[] = {
513    R_036230_VGT_PERFCOUNTER0_SELECT,
514    R_036234_VGT_PERFCOUNTER1_SELECT,
515    R_036238_VGT_PERFCOUNTER2_SELECT,
516    R_03623C_VGT_PERFCOUNTER3_SELECT,
517 };
518 static unsigned cik_VGT_select1[] = {
519    R_036240_VGT_PERFCOUNTER0_SELECT1,
520    R_036244_VGT_PERFCOUNTER1_SELECT1,
521 };
522 static struct ac_pc_block_base cik_VGT = {
523    .gpu_block = VGT,
524    .name = "VGT",
525    .num_counters = 4,
526    .flags = AC_PC_BLOCK_SE,
527 
528    .select0 = cik_VGT_select0,
529    .select1 = cik_VGT_select1,
530    .counter0_lo = R_034240_VGT_PERFCOUNTER0_LO,
531 
532    .num_spm_counters = 2,
533    .num_spm_wires = 3,
534    .spm_block_select = 0xa,
535 };
536 
537 /* cik_WD */
538 static unsigned cik_WD_select0[] = {
539    R_036200_WD_PERFCOUNTER0_SELECT,
540    R_036204_WD_PERFCOUNTER1_SELECT,
541    R_036208_WD_PERFCOUNTER2_SELECT,
542    R_03620C_WD_PERFCOUNTER3_SELECT,
543 };
544 static struct ac_pc_block_base cik_WD = {
545    .gpu_block = WD,
546    .name = "WD",
547    .num_counters = 4,
548 
549    .select0 = cik_WD_select0,
550    .counter0_lo = R_034200_WD_PERFCOUNTER0_LO,
551 };
552 
553 /* cik_MC */
554 static struct ac_pc_block_base cik_MC = {
555    .gpu_block = MC,
556    .name = "MC",
557    .num_counters = 4,
558 };
559 
560 /* cik_SRBM */
561 static struct ac_pc_block_base cik_SRBM = {
562    .gpu_block = SRBM,
563    .name = "SRBM",
564    .num_counters = 2,
565 };
566 
567 /* gfx10_CHA */
568 static unsigned gfx10_CHA_select0[] = {
569    R_037780_CHA_PERFCOUNTER0_SELECT,
570    R_037788_CHA_PERFCOUNTER1_SELECT,
571    R_03778C_CHA_PERFCOUNTER2_SELECT,
572    R_037790_CHA_PERFCOUNTER3_SELECT,
573 };
574 static unsigned gfx10_CHA_select1[] = {
575    R_037784_CHA_PERFCOUNTER0_SELECT1,
576 };
577 static struct ac_pc_block_base gfx10_CHA = {
578    .gpu_block = CHA,
579    .name = "CHA",
580    .num_counters = 4,
581 
582    .select0 = gfx10_CHA_select0,
583    .select1 = gfx10_CHA_select1,
584    .counter0_lo = R_035800_CHA_PERFCOUNTER0_LO,
585 
586    .num_spm_counters = 1,
587    .num_spm_wires = 2,
588    .spm_block_select = 0xc,
589 };
590 
591 /* gfx10_CHCG */
592 static unsigned gfx10_CHCG_select0[] = {
593    R_036F18_CHCG_PERFCOUNTER0_SELECT,
594    R_036F20_CHCG_PERFCOUNTER1_SELECT,
595    R_036F24_CHCG_PERFCOUNTER2_SELECT,
596    R_036F28_CHCG_PERFCOUNTER3_SELECT,
597 };
598 static unsigned gfx10_CHCG_select1[] = {
599    R_036F1C_CHCG_PERFCOUNTER0_SELECT1,
600 };
601 static struct ac_pc_block_base gfx10_CHCG = {
602    .gpu_block = CHCG,
603    .name = "CHCG",
604    .num_counters = 4,
605 
606    .select0 = gfx10_CHCG_select0,
607    .select1 = gfx10_CHCG_select1,
608    .counter0_lo = R_034F20_CHCG_PERFCOUNTER0_LO,
609 
610    .num_spm_counters = 1,
611    .num_spm_wires = 2,
612    .spm_block_select = 0xe,
613 };
614 
615 /* gfx10_CHC */
616 static unsigned gfx10_CHC_select0[] = {
617    R_036F00_CHC_PERFCOUNTER0_SELECT,
618    R_036F08_CHC_PERFCOUNTER1_SELECT,
619    R_036F0C_CHC_PERFCOUNTER2_SELECT,
620    R_036F10_CHC_PERFCOUNTER3_SELECT,
621 };
622 static unsigned gfx10_CHC_select1[] = {
623    R_036F04_CHC_PERFCOUNTER0_SELECT1,
624 };
625 static struct ac_pc_block_base gfx10_CHC = {
626    .gpu_block = CHC,
627    .name = "CHC",
628    .num_counters = 4,
629 
630    .select0 = gfx10_CHC_select0,
631    .select1 = gfx10_CHC_select1,
632    .counter0_lo = R_034F00_CHC_PERFCOUNTER0_LO,
633 
634    .num_spm_counters = 1,
635    .num_spm_wires = 2,
636    .spm_block_select = 0xd,
637 };
638 
639 /* gfx10_DB */
640 static struct ac_pc_block_base gfx10_DB = {
641    .gpu_block = DB,
642    .name = "DB",
643    .num_counters = 4,
644    .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS,
645 
646    .select0 = cik_DB_select0,
647    .select1 = cik_DB_select1,
648    .counter0_lo = R_035100_DB_PERFCOUNTER0_LO,
649 
650    .num_spm_counters = 2,
651    .num_spm_wires = 4,
652    .spm_block_select = 0x1,
653 };
654 
655 /* gfx10_GCR */
656 static unsigned gfx10_GCR_select0[] = {
657    R_037580_GCR_PERFCOUNTER0_SELECT,
658    R_037588_GCR_PERFCOUNTER1_SELECT,
659 };
660 static unsigned gfx10_GCR_select1[] = {
661    R_037584_GCR_PERFCOUNTER0_SELECT1,
662 };
663 static struct ac_pc_block_base gfx10_GCR = {
664    .gpu_block = GCR,
665    .name = "GCR",
666    .num_counters = 2,
667 
668    .select0 = gfx10_GCR_select0,
669    .select1 = gfx10_GCR_select1,
670    .counter0_lo = R_035480_GCR_PERFCOUNTER0_LO,
671 
672    .num_spm_counters = 1,
673    .num_spm_wires = 2,
674    .spm_block_select = 0x4,
675 };
676 
677 /* gfx10_GE */
678 static unsigned gfx10_GE_select0[] = {
679    R_036200_GE_PERFCOUNTER0_SELECT,
680    R_036208_GE_PERFCOUNTER1_SELECT,
681    R_036210_GE_PERFCOUNTER2_SELECT,
682    R_036218_GE_PERFCOUNTER3_SELECT,
683    R_036220_GE_PERFCOUNTER4_SELECT,
684    R_036228_GE_PERFCOUNTER5_SELECT,
685    R_036230_GE_PERFCOUNTER6_SELECT,
686    R_036238_GE_PERFCOUNTER7_SELECT,
687    R_036240_GE_PERFCOUNTER8_SELECT,
688    R_036248_GE_PERFCOUNTER9_SELECT,
689    R_036250_GE_PERFCOUNTER10_SELECT,
690    R_036258_GE_PERFCOUNTER11_SELECT,
691 };
692 static unsigned gfx10_GE_select1[] = {
693    R_036204_GE_PERFCOUNTER0_SELECT1,
694    R_03620C_GE_PERFCOUNTER1_SELECT1,
695    R_036214_GE_PERFCOUNTER2_SELECT1,
696    R_03621C_GE_PERFCOUNTER3_SELECT1,
697 };
698 static struct ac_pc_block_base gfx10_GE = {
699    .gpu_block = GE,
700    .name = "GE",
701    .num_counters = 12,
702 
703    .select0 = gfx10_GE_select0,
704    .select1 = gfx10_GE_select1,
705    .counter0_lo = R_034200_GE_PERFCOUNTER0_LO,
706 
707    .num_spm_counters = 4,
708    .num_spm_wires = 8,
709    .spm_block_select = 0x6,
710 };
711 
712 /* gfx10_GL1A */
713 static unsigned gfx10_GL1A_select0[] = {
714    R_037700_GL1A_PERFCOUNTER0_SELECT,
715    R_037708_GL1A_PERFCOUNTER1_SELECT,
716    R_03770C_GL1A_PERFCOUNTER2_SELECT,
717    R_037710_GL1A_PERFCOUNTER3_SELECT,
718 };
719 static unsigned gfx10_GL1A_select1[] = {
720    R_037704_GL1A_PERFCOUNTER0_SELECT1,
721 };
722 static struct ac_pc_block_base gfx10_GL1A = {
723    .gpu_block = GL1A,
724    .name = "GL1A",
725    .num_counters = 4,
726    .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_SHADER_WINDOWED,
727 
728    .select0 = gfx10_GL1A_select0,
729    .select1 = gfx10_GL1A_select1,
730    .counter0_lo = R_035700_GL1A_PERFCOUNTER0_LO,
731 
732    .num_spm_counters = 1,
733    .num_spm_wires = 2,
734    .spm_block_select = 0xa,
735 };
736 
737 /* gfx10_GL1C */
738 static unsigned gfx10_GL1C_select0[] = {
739    R_036E80_GL1C_PERFCOUNTER0_SELECT,
740    R_036E88_GL1C_PERFCOUNTER1_SELECT,
741    R_036E8C_GL1C_PERFCOUNTER2_SELECT,
742    R_036E90_GL1C_PERFCOUNTER3_SELECT,
743 };
744 static unsigned gfx10_GL1C_select1[] = {
745    R_036E84_GL1C_PERFCOUNTER0_SELECT1,
746 };
747 static struct ac_pc_block_base gfx10_GL1C = {
748    .gpu_block = GL1C,
749    .name = "GL1C",
750    .num_counters = 4,
751    .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_SHADER_WINDOWED,
752 
753    .select0 = gfx10_GL1C_select0,
754    .select1 = gfx10_GL1C_select1,
755    .counter0_lo = R_034E80_GL1C_PERFCOUNTER0_LO,
756 
757    .num_spm_counters = 1,
758    .num_spm_wires = 2,
759    .spm_block_select = 0xc
760 };
761 
762 /* gfx10_GL2A */
763 static unsigned gfx10_GL2A_select0[] = {
764    R_036E40_GL2A_PERFCOUNTER0_SELECT,
765    R_036E48_GL2A_PERFCOUNTER1_SELECT,
766    R_036E50_GL2A_PERFCOUNTER2_SELECT,
767    R_036E54_GL2A_PERFCOUNTER3_SELECT,
768 };
769 static unsigned gfx10_GL2A_select1[] = {
770    R_036E44_GL2A_PERFCOUNTER0_SELECT1,
771    R_036E4C_GL2A_PERFCOUNTER1_SELECT1,
772 };
773 static struct ac_pc_block_base gfx10_GL2A = {
774    .gpu_block = GL2A,
775    .name = "GL2A",
776    .num_counters = 4,
777 
778    .select0 = gfx10_GL2A_select0,
779    .select1 = gfx10_GL2A_select1,
780    .counter0_lo = R_034E40_GL2A_PERFCOUNTER0_LO,
781 
782    .num_spm_counters = 2,
783    .num_spm_wires = 4,
784    .spm_block_select = 0x7,
785 };
786 
787 /* gfx10_GL2C */
788 static unsigned gfx10_GL2C_select0[] = {
789    R_036E00_GL2C_PERFCOUNTER0_SELECT,
790    R_036E08_GL2C_PERFCOUNTER1_SELECT,
791    R_036E10_GL2C_PERFCOUNTER2_SELECT,
792    R_036E14_GL2C_PERFCOUNTER3_SELECT,
793 };
794 static unsigned gfx10_GL2C_select1[] = {
795    R_036E04_GL2C_PERFCOUNTER0_SELECT1,
796    R_036E0C_GL2C_PERFCOUNTER1_SELECT1,
797 };
798 static struct ac_pc_block_base gfx10_GL2C = {
799    .gpu_block = GL2C,
800    .name = "GL2C",
801    .num_counters = 4,
802 
803    .select0 = gfx10_GL2C_select0,
804    .select1 = gfx10_GL2C_select1,
805    .counter0_lo = R_034E00_GL2C_PERFCOUNTER0_LO,
806 
807    .num_spm_counters = 2,
808    .num_spm_wires = 4,
809    .spm_block_select = 0x8,
810 };
811 
812 /* gfx10_PA_PH */
813 static unsigned gfx10_PA_PH_select0[] = {
814    R_037600_PA_PH_PERFCOUNTER0_SELECT,
815    R_037608_PA_PH_PERFCOUNTER1_SELECT,
816    R_03760C_PA_PH_PERFCOUNTER2_SELECT,
817    R_037610_PA_PH_PERFCOUNTER3_SELECT,
818    R_037614_PA_PH_PERFCOUNTER4_SELECT,
819    R_037618_PA_PH_PERFCOUNTER5_SELECT,
820    R_03761C_PA_PH_PERFCOUNTER6_SELECT,
821    R_037620_PA_PH_PERFCOUNTER7_SELECT,
822 };
823 static unsigned gfx10_PA_PH_select1[] = {
824    R_037604_PA_PH_PERFCOUNTER0_SELECT1,
825    R_037640_PA_PH_PERFCOUNTER1_SELECT1,
826    R_037644_PA_PH_PERFCOUNTER2_SELECT1,
827    R_037648_PA_PH_PERFCOUNTER3_SELECT1,
828 };
829 static struct ac_pc_block_base gfx10_PA_PH = {
830    .gpu_block = PA_PH,
831    .name = "PA_PH",
832    .num_counters = 8,
833    .flags = AC_PC_BLOCK_SE,
834 
835    .select0 = gfx10_PA_PH_select0,
836    .select1 = gfx10_PA_PH_select1,
837    .counter0_lo = R_035600_PA_PH_PERFCOUNTER0_LO,
838 
839    .num_spm_counters = 4,
840    .num_spm_wires = 8,
841    .spm_block_select = 0x5,
842 };
843 
844 /* gfx10_PA_SU */
845 static unsigned gfx10_PA_SU_select0[] = {
846    R_036400_PA_SU_PERFCOUNTER0_SELECT,
847    R_036408_PA_SU_PERFCOUNTER1_SELECT,
848    R_036410_PA_SU_PERFCOUNTER2_SELECT,
849    R_036418_PA_SU_PERFCOUNTER3_SELECT,
850 };
851 static unsigned gfx10_PA_SU_select1[] = {
852    R_036404_PA_SU_PERFCOUNTER0_SELECT1,
853    R_03640C_PA_SU_PERFCOUNTER1_SELECT1,
854    R_036414_PA_SU_PERFCOUNTER2_SELECT1,
855    R_03641C_PA_SU_PERFCOUNTER3_SELECT1,
856 };
857 static struct ac_pc_block_base gfx10_PA_SU = {
858    .gpu_block = PA_SU,
859    .name = "PA_SU",
860    .num_counters = 4,
861    .flags = AC_PC_BLOCK_SE,
862 
863    .select0 = gfx10_PA_SU_select0,
864    .select1 = gfx10_PA_SU_select1,
865    .counter0_lo = R_034400_PA_SU_PERFCOUNTER0_LO,
866 
867    .num_spm_counters = 4,
868    .num_spm_wires = 8,
869    .spm_block_select = 0x2,
870 };
871 
872 /* gfx10_RLC */
873 static unsigned gfx10_RLC_select0[] = {
874    R_037304_RLC_PERFCOUNTER0_SELECT,
875    R_037308_RLC_PERFCOUNTER1_SELECT,
876 };
877 static struct ac_pc_block_base gfx10_RLC = {
878    .gpu_block = RLC,
879    .name = "RLC",
880    .num_counters = 2,
881 
882    .select0 = gfx10_RLC_select0,
883    .counter0_lo = R_035200_RLC_PERFCOUNTER0_LO,
884    .num_spm_counters = 0,
885 };
886 
887 /* gfx10_RMI */
888 static unsigned gfx10_RMI_select0[] = {
889    R_037400_RMI_PERFCOUNTER0_SELECT,
890    R_037408_RMI_PERFCOUNTER1_SELECT,
891    R_03740C_RMI_PERFCOUNTER2_SELECT,
892    R_037414_RMI_PERFCOUNTER3_SELECT,
893 };
894 static unsigned gfx10_RMI_select1[] = {
895    R_037404_RMI_PERFCOUNTER0_SELECT1,
896    R_037410_RMI_PERFCOUNTER2_SELECT1,
897 };
898 static struct ac_pc_block_base gfx10_RMI = {
899    .gpu_block = RMI,
900    .name = "RMI",
901    .num_counters = 4,
902    .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS,
903 
904    .select0 = gfx10_RMI_select0,
905    .select1 = gfx10_RMI_select1,
906    .counter0_lo = R_035300_RMI_PERFCOUNTER0_LO,
907 
908    .num_spm_counters = 2,
909    .num_spm_wires = 2,
910    .spm_block_select = 0xb,
911 };
912 
913 /* gfx10_SQ */
914 static struct ac_pc_block_base gfx10_SQ = {
915    .gpu_block = SQ,
916    .name = "SQ",
917    .num_counters = 16,
918    .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_SHADER,
919 
920    .select0 = cik_SQ_select0,
921    .select_or = S_036700_SQC_BANK_MASK(15),
922    .counter0_lo = R_034700_SQ_PERFCOUNTER0_LO,
923 
924    .num_spm_wires = 16,
925    .spm_block_select = 0x9,
926 };
927 
928 /* gfx10_TCP */
929 static struct ac_pc_block_base gfx10_TCP = {
930    .gpu_block = TCP,
931    .name = "TCP",
932    .num_counters = 4,
933    .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS | AC_PC_BLOCK_SHADER_WINDOWED,
934 
935    .select0 = cik_TCP_select0,
936    .select1 = cik_TCP_select1,
937    .counter0_lo = R_034D00_TCP_PERFCOUNTER0_LO,
938 
939    .num_spm_counters = 2,
940    .num_spm_wires = 4,
941    .spm_block_select = 0x7,
942 };
943 
944 /* gfx10_UTCL1 */
945 static unsigned gfx10_UTCL1_select0[] = {
946    R_03758C_UTCL1_PERFCOUNTER0_SELECT,
947    R_037590_UTCL1_PERFCOUNTER1_SELECT,
948 };
949 static struct ac_pc_block_base gfx10_UTCL1 = {
950    .gpu_block = UTCL1,
951    .name = "UTCL1",
952    .num_counters = 2,
953    .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_SHADER_WINDOWED,
954 
955    .select0 = gfx10_UTCL1_select0,
956    .counter0_lo = R_035470_UTCL1_PERFCOUNTER0_LO,
957    .num_spm_counters = 0,
958 };
959 
960 /* Both the number of instances and selectors varies between chips of the same
961  * class. We only differentiate by class here and simply expose the maximum
962  * number over all chips in a class.
963  *
964  * Unfortunately, GPUPerfStudio uses the order of performance counter groups
965  * blindly once it believes it has identified the hardware, so the order of
966  * blocks here matters.
967  */
968 static struct ac_pc_block_gfxdescr groups_CIK[] = {
969    {&cik_CB, 226},     {&cik_CPF, 17},    {&cik_DB, 257},  {&cik_GRBM, 34},   {&cik_GRBMSE, 15},
970    {&cik_PA_SU, 153},  {&cik_PA_SC, 395}, {&cik_SPI, 186}, {&cik_SQ, 252},    {&cik_SX, 32},
971    {&cik_TA, 111},     {&cik_TCA, 39, 2}, {&cik_TCC, 160}, {&cik_TD, 55},     {&cik_TCP, 154},
972    {&cik_GDS, 121},    {&cik_VGT, 140},   {&cik_IA, 22},   {&cik_MC, 22},     {&cik_SRBM, 19},
973    {&cik_WD, 22},      {&cik_CPG, 46},    {&cik_CPC, 22},
974 
975 };
976 
977 static struct ac_pc_block_gfxdescr groups_VI[] = {
978    {&cik_CB, 405},     {&cik_CPF, 19},    {&cik_DB, 257},  {&cik_GRBM, 34},   {&cik_GRBMSE, 15},
979    {&cik_PA_SU, 154},  {&cik_PA_SC, 397}, {&cik_SPI, 197}, {&cik_SQ, 273},    {&cik_SX, 34},
980    {&cik_TA, 119},     {&cik_TCA, 35, 2}, {&cik_TCC, 192}, {&cik_TD, 55},     {&cik_TCP, 180},
981    {&cik_GDS, 121},    {&cik_VGT, 147},   {&cik_IA, 24},   {&cik_MC, 22},     {&cik_SRBM, 27},
982    {&cik_WD, 37},      {&cik_CPG, 48},    {&cik_CPC, 24},
983 
984 };
985 
986 static struct ac_pc_block_gfxdescr groups_gfx9[] = {
987    {&cik_CB, 438},     {&cik_CPF, 32},    {&cik_DB, 328},  {&cik_GRBM, 38},   {&cik_GRBMSE, 16},
988    {&cik_PA_SU, 292},  {&cik_PA_SC, 491}, {&cik_SPI, 196}, {&cik_SQ, 374},    {&cik_SX, 208},
989    {&cik_TA, 119},     {&cik_TCA, 35, 2}, {&cik_TCC, 256}, {&cik_TD, 57},     {&cik_TCP, 85},
990    {&cik_GDS, 121},    {&cik_VGT, 148},   {&cik_IA, 32},   {&cik_WD, 58},     {&cik_CPG, 59},
991    {&cik_CPC, 35},
992 };
993 
994 static struct ac_pc_block_gfxdescr groups_gfx10[] = {
995    {&cik_CB, 461},
996    {&gfx10_CHA, 45},
997    {&gfx10_CHCG, 35},
998    {&gfx10_CHC, 35},
999    {&cik_CPC, 47},
1000    {&cik_CPF, 40},
1001    {&cik_CPG, 82},
1002    {&gfx10_DB, 370},
1003    {&gfx10_GCR, 94},
1004    {&cik_GDS, 123},
1005    {&gfx10_GE, 315},
1006    {&gfx10_GL1A, 36},
1007    {&gfx10_GL1C, 64},
1008    {&gfx10_GL2A, 91},
1009    {&gfx10_GL2C, 235},
1010    {&cik_GRBM, 47},
1011    {&cik_GRBMSE, 19},
1012    {&gfx10_PA_PH, 960},
1013    {&cik_PA_SC, 552},
1014    {&gfx10_PA_SU, 266},
1015    {&gfx10_RLC, 7},
1016    {&gfx10_RMI, 258},
1017    {&cik_SPI, 329},
1018    {&gfx10_SQ, 509},
1019    {&cik_SX, 225},
1020    {&cik_TA, 226},
1021    {&gfx10_TCP, 77},
1022    {&cik_TD, 61},
1023    {&gfx10_UTCL1, 15},
1024 };
1025 
ac_lookup_counter(const struct ac_perfcounters * pc,unsigned index,unsigned * base_gid,unsigned * sub_index)1026 struct ac_pc_block *ac_lookup_counter(const struct ac_perfcounters *pc,
1027                                       unsigned index, unsigned *base_gid,
1028                                       unsigned *sub_index)
1029 {
1030    struct ac_pc_block *block = pc->blocks;
1031    unsigned bid;
1032 
1033    *base_gid = 0;
1034    for (bid = 0; bid < pc->num_blocks; ++bid, ++block) {
1035       unsigned total = block->num_groups * block->b->selectors;
1036 
1037       if (index < total) {
1038          *sub_index = index;
1039          return block;
1040       }
1041 
1042       index -= total;
1043       *base_gid += block->num_groups;
1044    }
1045 
1046    return NULL;
1047 }
1048 
ac_lookup_group(const struct ac_perfcounters * pc,unsigned * index)1049 struct ac_pc_block *ac_lookup_group(const struct ac_perfcounters *pc,
1050                                     unsigned *index)
1051 {
1052    unsigned bid;
1053    struct ac_pc_block *block = pc->blocks;
1054 
1055    for (bid = 0; bid < pc->num_blocks; ++bid, ++block) {
1056       if (*index < block->num_groups)
1057          return block;
1058       *index -= block->num_groups;
1059    }
1060 
1061    return NULL;
1062 }
1063 
ac_init_block_names(const struct radeon_info * info,const struct ac_perfcounters * pc,struct ac_pc_block * block)1064 bool ac_init_block_names(const struct radeon_info *info,
1065                          const struct ac_perfcounters *pc,
1066                          struct ac_pc_block *block)
1067 {
1068    bool per_instance_groups = ac_pc_block_has_per_instance_groups(pc, block);
1069    bool per_se_groups = ac_pc_block_has_per_se_groups(pc, block);
1070    unsigned i, j, k;
1071    unsigned groups_shader = 1, groups_se = 1, groups_instance = 1;
1072    unsigned namelen;
1073    char *groupname;
1074    char *p;
1075 
1076    if (per_instance_groups)
1077       groups_instance = block->num_instances;
1078    if (per_se_groups)
1079       groups_se = info->max_se;
1080    if (block->b->b->flags & AC_PC_BLOCK_SHADER)
1081       groups_shader = ARRAY_SIZE(ac_pc_shader_type_bits);
1082 
1083    namelen = strlen(block->b->b->name);
1084    block->group_name_stride = namelen + 1;
1085    if (block->b->b->flags & AC_PC_BLOCK_SHADER)
1086       block->group_name_stride += 3;
1087    if (per_se_groups) {
1088       assert(groups_se <= 10);
1089       block->group_name_stride += 1;
1090 
1091       if (per_instance_groups)
1092          block->group_name_stride += 1;
1093    }
1094    if (per_instance_groups) {
1095       assert(groups_instance <= 100);
1096       block->group_name_stride += 2;
1097    }
1098 
1099    block->group_names = MALLOC(block->num_groups * block->group_name_stride);
1100    if (!block->group_names)
1101       return false;
1102 
1103    groupname = block->group_names;
1104    for (i = 0; i < groups_shader; ++i) {
1105       const char *shader_suffix = ac_pc_shader_type_suffixes[i];
1106       unsigned shaderlen = strlen(shader_suffix);
1107       for (j = 0; j < groups_se; ++j) {
1108          for (k = 0; k < groups_instance; ++k) {
1109             strcpy(groupname, block->b->b->name);
1110             p = groupname + namelen;
1111 
1112             if (block->b->b->flags & AC_PC_BLOCK_SHADER) {
1113                strcpy(p, shader_suffix);
1114                p += shaderlen;
1115             }
1116 
1117             if (per_se_groups) {
1118                p += sprintf(p, "%d", j);
1119                if (per_instance_groups)
1120                   *p++ = '_';
1121             }
1122 
1123             if (per_instance_groups)
1124                p += sprintf(p, "%d", k);
1125 
1126             groupname += block->group_name_stride;
1127          }
1128       }
1129    }
1130 
1131    assert(block->b->selectors <= 1000);
1132    block->selector_name_stride = block->group_name_stride + 4;
1133    block->selector_names =
1134       MALLOC(block->num_groups * block->b->selectors * block->selector_name_stride);
1135    if (!block->selector_names)
1136       return false;
1137 
1138    groupname = block->group_names;
1139    p = block->selector_names;
1140    for (i = 0; i < block->num_groups; ++i) {
1141       for (j = 0; j < block->b->selectors; ++j) {
1142          sprintf(p, "%s_%03d", groupname, j);
1143          p += block->selector_name_stride;
1144       }
1145       groupname += block->group_name_stride;
1146    }
1147 
1148    return true;
1149 }
1150 
ac_init_perfcounters(const struct radeon_info * info,bool separate_se,bool separate_instance,struct ac_perfcounters * pc)1151 bool ac_init_perfcounters(const struct radeon_info *info,
1152                           bool separate_se,
1153                           bool separate_instance,
1154                           struct ac_perfcounters *pc)
1155 {
1156    const struct ac_pc_block_gfxdescr *blocks;
1157    unsigned num_blocks;
1158 
1159    switch (info->chip_class) {
1160    case GFX7:
1161       blocks = groups_CIK;
1162       num_blocks = ARRAY_SIZE(groups_CIK);
1163       break;
1164    case GFX8:
1165       blocks = groups_VI;
1166       num_blocks = ARRAY_SIZE(groups_VI);
1167       break;
1168    case GFX9:
1169       blocks = groups_gfx9;
1170       num_blocks = ARRAY_SIZE(groups_gfx9);
1171       break;
1172    case GFX10:
1173    case GFX10_3:
1174       blocks = groups_gfx10;
1175       num_blocks = ARRAY_SIZE(groups_gfx10);
1176       break;
1177    case GFX6:
1178    default:
1179       return false; /* not implemented */
1180    }
1181 
1182    pc->separate_se = separate_se;
1183    pc->separate_instance = separate_instance;
1184 
1185    pc->blocks = CALLOC(num_blocks, sizeof(struct ac_pc_block));
1186    if (!pc->blocks)
1187       return false;
1188    pc->num_blocks = num_blocks;
1189 
1190    for (unsigned i = 0; i < num_blocks; i++) {
1191       struct ac_pc_block *block = &pc->blocks[i];
1192 
1193       block->b = &blocks[i];
1194       block->num_instances = MAX2(1, block->b->instances);
1195 
1196       if (!strcmp(block->b->b->name, "CB") ||
1197           !strcmp(block->b->b->name, "DB") ||
1198           !strcmp(block->b->b->name, "RMI"))
1199          block->num_instances = info->max_se;
1200       else if (!strcmp(block->b->b->name, "TCC"))
1201          block->num_instances = info->max_tcc_blocks;
1202       else if (!strcmp(block->b->b->name, "IA"))
1203          block->num_instances = MAX2(1, info->max_se / 2);
1204       else if (!strcmp(block->b->b->name, "TA") ||
1205                !strcmp(block->b->b->name, "TCP") ||
1206                !strcmp(block->b->b->name, "TD")) {
1207          block->num_instances = MAX2(1, info->max_good_cu_per_sa);
1208       }
1209 
1210       if (ac_pc_block_has_per_instance_groups(pc, block)) {
1211          block->num_groups = block->num_instances;
1212       } else {
1213          block->num_groups = 1;
1214       }
1215 
1216       if (ac_pc_block_has_per_se_groups(pc, block))
1217          block->num_groups *= info->max_se;
1218       if (block->b->b->flags & AC_PC_BLOCK_SHADER)
1219          block->num_groups *= ARRAY_SIZE(ac_pc_shader_type_bits);
1220 
1221       pc->num_groups += block->num_groups;
1222    }
1223 
1224    return true;
1225 }
1226 
ac_destroy_perfcounters(struct ac_perfcounters * pc)1227 void ac_destroy_perfcounters(struct ac_perfcounters *pc)
1228 {
1229    if (!pc)
1230       return;
1231 
1232    for (unsigned i = 0; i < pc->num_blocks; ++i) {
1233       FREE(pc->blocks[i].group_names);
1234       FREE(pc->blocks[i].selector_names);
1235    }
1236    FREE(pc->blocks);
1237 }
1238