1 /*
2  * Copyright © 2021 Raspberry Pi
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /**
25  * Gallium query object support for performance counters
26  *
27  * This contains the performance V3D counters queries.
28  */
29 
30 #include "v3d_query.h"
31 
32 struct v3d_query_perfcnt
33 {
34         struct v3d_query base;
35 
36         unsigned num_queries;
37         struct v3d_perfmon_state *perfmon;
38 };
39 
40 static const char *v3d_counter_names[] = {
41         "FEP-valid-primitives-no-rendered-pixels",
42         "FEP-valid-primitives-rendered-pixels",
43         "FEP-clipped-quads",
44         "FEP-valid-quads",
45         "TLB-quads-not-passing-stencil-test",
46         "TLB-quads-not-passing-z-and-stencil-test",
47         "TLB-quads-passing-z-and-stencil-test",
48         "TLB-quads-with-zero-coverage",
49         "TLB-quads-with-non-zero-coverage",
50         "TLB-quads-written-to-color-buffer",
51         "PTB-primitives-discarded-outside-viewport",
52         "PTB-primitives-need-clipping",
53         "PTB-primitives-discared-reversed",
54         "QPU-total-idle-clk-cycles",
55         "QPU-total-active-clk-cycles-vertex-coord-shading",
56         "QPU-total-active-clk-cycles-fragment-shading",
57         "QPU-total-clk-cycles-executing-valid-instr",
58         "QPU-total-clk-cycles-waiting-TMU",
59         "QPU-total-clk-cycles-waiting-scoreboard",
60         "QPU-total-clk-cycles-waiting-varyings",
61         "QPU-total-instr-cache-hit",
62         "QPU-total-instr-cache-miss",
63         "QPU-total-uniform-cache-hit",
64         "QPU-total-uniform-cache-miss",
65         "TMU-total-text-quads-access",
66         "TMU-total-text-cache-miss",
67         "VPM-total-clk-cycles-VDW-stalled",
68         "VPM-total-clk-cycles-VCD-stalled",
69         "CLE-bin-thread-active-cycles",
70         "CLE-render-thread-active-cycles",
71         "L2T-total-cache-hit",
72         "L2T-total-cache-miss",
73         "cycle-count",
74         "QPU-total-clk-cycles-waiting-vertex-coord-shading",
75         "QPU-total-clk-cycles-waiting-fragment-shading",
76         "PTB-primitives-binned",
77         "AXI-writes-seen-watch-0",
78         "AXI-reads-seen-watch-0",
79         "AXI-writes-stalled-seen-watch-0",
80         "AXI-reads-stalled-seen-watch-0",
81         "AXI-write-bytes-seen-watch-0",
82         "AXI-read-bytes-seen-watch-0",
83         "AXI-writes-seen-watch-1",
84         "AXI-reads-seen-watch-1",
85         "AXI-writes-stalled-seen-watch-1",
86         "AXI-reads-stalled-seen-watch-1",
87         "AXI-write-bytes-seen-watch-1",
88         "AXI-read-bytes-seen-watch-1",
89         "TLB-partial-quads-written-to-color-buffer",
90         "TMU-total-config-access",
91         "L2T-no-id-stalled",
92         "L2T-command-queue-stalled",
93         "L2T-TMU-writes",
94         "TMU-active-cycles",
95         "TMU-stalled-cycles",
96         "CLE-thread-active-cycles",
97         "L2T-TMU-reads",
98         "L2T-CLE-reads",
99         "L2T-VCD-reads",
100         "L2T-TMU-config-reads",
101         "L2T-SLC0-reads",
102         "L2T-SLC1-reads",
103         "L2T-SLC2-reads",
104         "L2T-TMU-write-miss",
105         "L2T-TMU-read-miss",
106         "L2T-CLE-read-miss",
107         "L2T-VCD-read-miss",
108         "L2T-TMU-config-read-miss",
109         "L2T-SLC0-read-miss",
110         "L2T-SLC1-read-miss",
111         "L2T-SLC2-read-miss",
112         "core-memory-writes",
113         "L2T-memory-writes",
114         "PTB-memory-writes",
115         "TLB-memory-writes",
116         "core-memory-reads",
117         "L2T-memory-reads",
118         "PTB-memory-reads",
119         "PSE-memory-reads",
120         "TLB-memory-reads",
121         "GMP-memory-reads",
122         "PTB-memory-words-writes",
123         "TLB-memory-words-writes",
124         "PSE-memory-words-reads",
125         "TLB-memory-words-reads",
126         "TMU-MRU-hits",
127         "compute-active-cycles",
128 };
129 
130 static void
kperfmon_destroy(struct v3d_context * v3d,struct v3d_perfmon_state * perfmon)131 kperfmon_destroy(struct v3d_context *v3d, struct v3d_perfmon_state *perfmon)
132 {
133         struct drm_v3d_perfmon_destroy destroyreq;
134 
135         destroyreq.id = perfmon->kperfmon_id;
136         int ret = v3d_ioctl(v3d->fd, DRM_IOCTL_V3D_PERFMON_DESTROY, &destroyreq);
137         if (ret != 0)
138                 fprintf(stderr, "failed to destroy perfmon %d: %s\n",
139                         perfmon->kperfmon_id, strerror(errno));
140 }
141 
142 int
v3d_get_driver_query_group_info_perfcnt(struct v3d_screen * screen,unsigned index,struct pipe_driver_query_group_info * info)143 v3d_get_driver_query_group_info_perfcnt(struct v3d_screen *screen, unsigned index,
144                                         struct pipe_driver_query_group_info *info)
145 {
146         if (!screen->has_perfmon)
147                 return 0;
148 
149         if (!info)
150                 return 1;
151 
152         if (index > 0)
153                 return 0;
154 
155         info->name = "V3D counters";
156         info->max_active_queries = DRM_V3D_MAX_PERF_COUNTERS;
157         info->num_queries = ARRAY_SIZE(v3d_counter_names);
158 
159         return 1;
160 }
161 
162 int
v3d_get_driver_query_info_perfcnt(struct v3d_screen * screen,unsigned index,struct pipe_driver_query_info * info)163 v3d_get_driver_query_info_perfcnt(struct v3d_screen *screen, unsigned index,
164                                   struct pipe_driver_query_info *info)
165 {
166         if (!screen->has_perfmon)
167                 return 0;
168 
169         if (!info)
170                 return ARRAY_SIZE(v3d_counter_names);
171 
172         if (index >= ARRAY_SIZE(v3d_counter_names))
173                 return 0;
174 
175         info->group_id = 0;
176         info->name = v3d_counter_names[index];
177         info->query_type = PIPE_QUERY_DRIVER_SPECIFIC + index;
178         info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE;
179         info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
180         info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
181 
182         return 1;
183 }
184 
185 static void
v3d_destroy_query_perfcnt(struct v3d_context * v3d,struct v3d_query * query)186 v3d_destroy_query_perfcnt(struct v3d_context *v3d, struct v3d_query *query)
187 {
188         struct v3d_query_perfcnt *pquery = (struct v3d_query_perfcnt *)query;
189 
190         assert(pquery->perfmon);
191 
192         if (v3d->active_perfmon == pquery->perfmon) {
193                 fprintf(stderr, "Query is active; end query before destroying\n");
194                 return;
195         }
196         if (pquery->perfmon->kperfmon_id)
197                 kperfmon_destroy(v3d, pquery->perfmon);
198 
199         v3d_fence_unreference(&pquery->perfmon->last_job_fence);
200         free(pquery->perfmon);
201         free(query);
202 }
203 
204 static bool
v3d_begin_query_perfcnt(struct v3d_context * v3d,struct v3d_query * query)205 v3d_begin_query_perfcnt(struct v3d_context *v3d, struct v3d_query *query)
206 {
207         struct v3d_query_perfcnt *pquery = (struct v3d_query_perfcnt *)query;
208         struct drm_v3d_perfmon_create createreq = { 0 };
209         int i, ret;
210 
211         /* Only one perfmon can be activated per context */
212         if (v3d->active_perfmon) {
213                 fprintf(stderr,
214                         "Another query is already active; "
215                         "finish it before starting a new one\n");
216                 return false;
217         }
218 
219         assert(pquery->perfmon);
220 
221         /* Reset the counters by destroying the previously allocated perfmon */
222         if (pquery->perfmon->kperfmon_id)
223                 kperfmon_destroy(v3d, pquery->perfmon);
224 
225         for (i = 0; i < pquery->num_queries; i++)
226                 createreq.counters[i] = pquery->perfmon->counters[i];
227 
228         createreq.ncounters = pquery->num_queries;
229         ret = v3d_ioctl(v3d->fd, DRM_IOCTL_V3D_PERFMON_CREATE, &createreq);
230         if (ret != 0)
231                 return false;
232 
233         pquery->perfmon->kperfmon_id = createreq.id;
234         pquery->perfmon->job_submitted = false;
235         v3d_fence_unreference(&pquery->perfmon->last_job_fence);
236 
237         /* Ensure all pending jobs are flushed before activating the
238          * perfmon
239          */
240         v3d_flush((struct pipe_context *)v3d);
241         v3d->active_perfmon = pquery->perfmon;
242 
243         return true;
244 }
245 
246 static bool
v3d_end_query_perfcnt(struct v3d_context * v3d,struct v3d_query * query)247 v3d_end_query_perfcnt(struct v3d_context *v3d, struct v3d_query *query)
248 {
249         struct v3d_query_perfcnt *pquery = (struct v3d_query_perfcnt *)query;
250 
251         assert(pquery->perfmon);
252 
253         if (v3d->active_perfmon != pquery->perfmon) {
254                 fprintf(stderr, "This query is not active\n");
255                 return false;
256         }
257 
258         /* Ensure all pending jobs are flushed before deactivating the
259          * perfmon
260          */
261         v3d_flush((struct pipe_context *)v3d);
262 
263         /* Get a copy of latest submitted job's fence to wait for its
264          * completion
265          */
266         if (v3d->active_perfmon->job_submitted)
267                 v3d->active_perfmon->last_job_fence = v3d_fence_create(v3d);
268 
269         v3d->active_perfmon = NULL;
270 
271         return true;
272 }
273 
274 static bool
v3d_get_query_result_perfcnt(struct v3d_context * v3d,struct v3d_query * query,bool wait,union pipe_query_result * vresult)275 v3d_get_query_result_perfcnt(struct v3d_context *v3d, struct v3d_query *query,
276                              bool wait, union pipe_query_result *vresult)
277 {
278         struct v3d_query_perfcnt *pquery = (struct v3d_query_perfcnt *)query;
279         struct drm_v3d_perfmon_get_values req = { 0 };
280         int i, ret;
281 
282         assert(pquery->perfmon);
283 
284         if (pquery->perfmon->job_submitted) {
285                 if (!v3d_fence_wait(v3d->screen,
286                                     pquery->perfmon->last_job_fence,
287                                     wait ? PIPE_TIMEOUT_INFINITE : 0))
288                         return false;
289 
290                 req.id = pquery->perfmon->kperfmon_id;
291                 req.values_ptr = (uintptr_t)pquery->perfmon->values;
292                 ret = v3d_ioctl(v3d->fd, DRM_IOCTL_V3D_PERFMON_GET_VALUES, &req);
293                 if (ret != 0) {
294                         fprintf(stderr, "Can't request perfmon counters values\n");
295                         return false;
296                 }
297         }
298 
299         for (i = 0; i < pquery->num_queries; i++)
300                 vresult->batch[i].u64 = pquery->perfmon->values[i];
301 
302         return true;
303 }
304 
305 static const struct v3d_query_funcs perfcnt_query_funcs = {
306         .destroy_query = v3d_destroy_query_perfcnt,
307         .begin_query = v3d_begin_query_perfcnt,
308         .end_query = v3d_end_query_perfcnt,
309         .get_query_result = v3d_get_query_result_perfcnt,
310 };
311 
312 struct pipe_query *
v3d_create_batch_query_perfcnt(struct v3d_context * v3d,unsigned num_queries,unsigned * query_types)313 v3d_create_batch_query_perfcnt(struct v3d_context *v3d, unsigned num_queries,
314                                unsigned *query_types)
315 {
316         struct v3d_query_perfcnt *pquery = NULL;
317         struct v3d_query *query;
318         struct v3d_perfmon_state *perfmon = NULL;
319         int i;
320 
321         /* Validate queries */
322         for (i = 0; i < num_queries; i++) {
323                 if (query_types[i] < PIPE_QUERY_DRIVER_SPECIFIC ||
324                     query_types[i] >= PIPE_QUERY_DRIVER_SPECIFIC +
325                     ARRAY_SIZE(v3d_counter_names)) {
326                         fprintf(stderr, "Invalid query type\n");
327                         return NULL;
328                 }
329         }
330 
331         pquery = calloc(1, sizeof(*pquery));
332         if (!pquery)
333                 return NULL;
334 
335         perfmon = calloc(1, sizeof(*perfmon));
336         if (!perfmon) {
337                 free(pquery);
338                 return NULL;
339         }
340 
341         for (i = 0; i < num_queries; i++)
342                 perfmon->counters[i] = query_types[i] - PIPE_QUERY_DRIVER_SPECIFIC;
343 
344         pquery->perfmon = perfmon;
345         pquery->num_queries = num_queries;
346 
347         query = &pquery->base;
348         query->funcs = &perfcnt_query_funcs;
349 
350         /* Note that struct pipe_query isn't actually defined anywhere. */
351          return (struct pipe_query *)query;
352 }
353