1 /*
2  * Copyright 2015 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * on the rights to use, copy, modify, merge, publish, distribute, sub
9  * license, and/or sell copies of the Software, and to permit persons to whom
10  * the Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22  * USE OR OTHER DEALINGS IN THE SOFTWARE.
23  */
24 
25 #ifndef AC_PERFCOUNTER_H
26 #define AC_PERFCOUNTER_H
27 
28 #include <stdbool.h>
29 
30 #include "sid.h"
31 
32 #include "ac_gpu_info.h"
33 
34 /* Max counters per HW block */
35 #define AC_QUERY_MAX_COUNTERS 16
36 
37 #define AC_PC_SHADERS_WINDOWING (1u << 31)
38 
39 enum ac_pc_block_flags
40 {
41    /* This block is part of the shader engine */
42    AC_PC_BLOCK_SE = (1 << 0),
43 
44    /* Expose per-instance groups instead of summing all instances (within
45     * an SE). */
46    AC_PC_BLOCK_INSTANCE_GROUPS = (1 << 1),
47 
48    /* Expose per-SE groups instead of summing instances across SEs. */
49    AC_PC_BLOCK_SE_GROUPS = (1 << 2),
50 
51    /* Shader block */
52    AC_PC_BLOCK_SHADER = (1 << 3),
53 
54    /* Non-shader block with perfcounters windowed by shaders. */
55    AC_PC_BLOCK_SHADER_WINDOWED = (1 << 4),
56 };
57 
58 enum ac_pc_gpu_block {
59    CPF     = 0x0,
60    IA      = 0x1,
61    VGT     = 0x2,
62    PA_SU   = 0x3,
63    PA_SC   = 0x4,
64    SPI     = 0x5,
65    SQ      = 0x6,
66    SX      = 0x7,
67    TA      = 0x8,
68    TD      = 0x9,
69    TCP     = 0xA,
70    TCC     = 0xB,
71    TCA     = 0xC,
72    DB      = 0xD,
73    CB      = 0xE,
74    GDS     = 0xF,
75    SRBM    = 0x10,
76    GRBM    = 0x11,
77    GRBMSE  = 0x12,
78    RLC     = 0x13,
79    DMA     = 0x14,
80    MC      = 0x15,
81    CPG     = 0x16,
82    CPC     = 0x17,
83    WD      = 0x18,
84    TCS     = 0x19,
85    ATC     = 0x1A,
86    ATCL2   = 0x1B,
87    MCVML2  = 0x1C,
88    EA      = 0x1D,
89    RPB     = 0x1E,
90    RMI     = 0x1F,
91    UMCCH   = 0x20,
92    GE      = 0x21,
93    GE1     = GE,
94    GL1A    = 0x22,
95    GL1C    = 0x23,
96    GL1CG   = 0x24,
97    GL2A    = 0x25,
98    GL2C    = 0x26,
99    CHA     = 0x27,
100    CHC     = 0x28,
101    CHCG    = 0x29,
102    GUS     = 0x2A,
103    GCR     = 0x2B,
104    PA_PH   = 0x2C,
105    UTCL1   = 0x2D,
106    GEDIST  = 0x2E,
107    GESE    = 0x2F,
108    DF      = 0x30,
109    NUM_GPU_BLOCK,
110 };
111 
112 struct ac_pc_block_base {
113    enum ac_pc_gpu_block gpu_block;
114    const char *name;
115    unsigned num_counters;
116    unsigned flags;
117 
118    unsigned select_or;
119    unsigned *select0;
120    unsigned counter0_lo;
121    unsigned *counters;
122 
123    /* SPM */
124    unsigned num_spm_counters;
125    unsigned num_spm_wires;
126    unsigned *select1;
127    unsigned spm_block_select;
128 };
129 
130 struct ac_pc_block_gfxdescr {
131    struct ac_pc_block_base *b;
132    unsigned selectors;
133    unsigned instances;
134 };
135 
136 struct ac_pc_block {
137    const struct ac_pc_block_gfxdescr *b;
138    unsigned num_instances;
139 
140    unsigned num_groups;
141    char *group_names;
142    unsigned group_name_stride;
143 
144    char *selector_names;
145    unsigned selector_name_stride;
146 };
147 
148 struct ac_perfcounters {
149    unsigned num_groups;
150    unsigned num_blocks;
151    struct ac_pc_block *blocks;
152 
153    bool separate_se;
154    bool separate_instance;
155 };
156 
157 /* The order is chosen to be compatible with GPUPerfStudio's hardcoding of
158  * performance counter group IDs.
159  */
160 static const char *const ac_pc_shader_type_suffixes[] = {"",    "_ES", "_GS", "_VS",
161                                                          "_PS", "_LS", "_HS", "_CS"};
162 
163 static const unsigned ac_pc_shader_type_bits[] = {
164    0x7f,
165    S_036780_ES_EN(1),
166    S_036780_GS_EN(1),
167    S_036780_VS_EN(1),
168    S_036780_PS_EN(1),
169    S_036780_LS_EN(1),
170    S_036780_HS_EN(1),
171    S_036780_CS_EN(1),
172 };
173 
174 static inline bool
ac_pc_block_has_per_se_groups(const struct ac_perfcounters * pc,const struct ac_pc_block * block)175 ac_pc_block_has_per_se_groups(const struct ac_perfcounters *pc,
176                               const struct ac_pc_block *block)
177 {
178    return block->b->b->flags & AC_PC_BLOCK_SE_GROUPS ||
179           (block->b->b->flags & AC_PC_BLOCK_SE && pc->separate_se);
180 }
181 
182 static inline bool
ac_pc_block_has_per_instance_groups(const struct ac_perfcounters * pc,const struct ac_pc_block * block)183 ac_pc_block_has_per_instance_groups(const struct ac_perfcounters *pc,
184                                     const struct ac_pc_block *block)
185 {
186    return block->b->b->flags & AC_PC_BLOCK_INSTANCE_GROUPS ||
187           (block->num_instances > 1 && pc->separate_instance);
188 }
189 
190 struct ac_pc_block *ac_lookup_counter(const struct ac_perfcounters *pc,
191                                       unsigned index, unsigned *base_gid,
192                                       unsigned *sub_index);
193 struct ac_pc_block *ac_lookup_group(const struct ac_perfcounters *pc,
194                                     unsigned *index);
195 
196 struct ac_pc_block *ac_pc_get_block(const struct ac_perfcounters *pc,
197                                     enum ac_pc_gpu_block gpu_block);
198 
199 bool ac_init_block_names(const struct radeon_info *info,
200                          const struct ac_perfcounters *pc,
201                          struct ac_pc_block *block);
202 
203 bool ac_init_perfcounters(const struct radeon_info *info,
204                           bool separate_se,
205                           bool separate_instance,
206                           struct ac_perfcounters *pc);
207 void ac_destroy_perfcounters(struct ac_perfcounters *pc);
208 
209 #endif
210