1 /*
2  * Copyright © 2014 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /**
25  * Expose V3D HW perf counters.
26  *
27  * We also have code to fake support for occlusion queries.
28  * Since we expose support for GL 2.0, we have to expose occlusion queries,
29  * but the spec allows you to expose 0 query counter bits, so we just return 0
30  * as the result of all our queries.
31  */
32 #include "vc4_context.h"
33 
34 struct vc4_query
35 {
36         unsigned num_queries;
37         struct vc4_hwperfmon *hwperfmon;
38 };
39 
40 static const char *v3d_counter_names[] = {
41         "FEP-valid-primitives-no-rendered-pixels",
42         "FEP-valid-primitives-rendered-pixels",
43         "FEP-clipped-quads",
44         "FEP-valid-quads",
45         "TLB-quads-not-passing-stencil-test",
46         "TLB-quads-not-passing-z-and-stencil-test",
47         "TLB-quads-passing-z-and-stencil-test",
48         "TLB-quads-with-zero-coverage",
49         "TLB-quads-with-non-zero-coverage",
50         "TLB-quads-written-to-color-buffer",
51         "PTB-primitives-discarded-outside-viewport",
52         "PTB-primitives-need-clipping",
53         "PTB-primitives-discared-reversed",
54         "QPU-total-idle-clk-cycles",
55         "QPU-total-clk-cycles-vertex-coord-shading",
56         "QPU-total-clk-cycles-fragment-shading",
57         "QPU-total-clk-cycles-executing-valid-instr",
58         "QPU-total-clk-cycles-waiting-TMU",
59         "QPU-total-clk-cycles-waiting-scoreboard",
60         "QPU-total-clk-cycles-waiting-varyings",
61         "QPU-total-instr-cache-hit",
62         "QPU-total-instr-cache-miss",
63         "QPU-total-uniform-cache-hit",
64         "QPU-total-uniform-cache-miss",
65         "TMU-total-text-quads-processed",
66         "TMU-total-text-cache-miss",
67         "VPM-total-clk-cycles-VDW-stalled",
68         "VPM-total-clk-cycles-VCD-stalled",
69         "L2C-total-cache-hit",
70         "L2C-total-cache-miss",
71 };
72 
vc4_get_driver_query_group_info(struct pipe_screen * pscreen,unsigned index,struct pipe_driver_query_group_info * info)73 int vc4_get_driver_query_group_info(struct pipe_screen *pscreen,
74                                     unsigned index,
75                                     struct pipe_driver_query_group_info *info)
76 {
77         struct vc4_screen *screen = vc4_screen(pscreen);
78 
79         if (!screen->has_perfmon_ioctl)
80                 return 0;
81 
82         if (!info)
83                 return 1;
84 
85         if (index > 0)
86                 return 0;
87 
88         info->name = "V3D counters";
89         info->max_active_queries = DRM_VC4_MAX_PERF_COUNTERS;
90         info->num_queries = ARRAY_SIZE(v3d_counter_names);
91         return 1;
92 }
93 
vc4_get_driver_query_info(struct pipe_screen * pscreen,unsigned index,struct pipe_driver_query_info * info)94 int vc4_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
95                               struct pipe_driver_query_info *info)
96 {
97         struct vc4_screen *screen = vc4_screen(pscreen);
98 
99         if (!screen->has_perfmon_ioctl)
100                 return 0;
101 
102         if (!info)
103                 return ARRAY_SIZE(v3d_counter_names);
104 
105         if (index >= ARRAY_SIZE(v3d_counter_names))
106                 return 0;
107 
108         info->group_id = 0;
109         info->name = v3d_counter_names[index];
110         info->query_type = PIPE_QUERY_DRIVER_SPECIFIC + index;
111         info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE;
112         info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
113         info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
114         return 1;
115 }
116 
117 static struct pipe_query *
vc4_create_batch_query(struct pipe_context * pctx,unsigned num_queries,unsigned * query_types)118 vc4_create_batch_query(struct pipe_context *pctx, unsigned num_queries,
119                        unsigned *query_types)
120 {
121         struct vc4_query *query = calloc(1, sizeof(*query));
122         struct vc4_hwperfmon *hwperfmon;
123         unsigned i, nhwqueries = 0;
124 
125         if (!query)
126                 return NULL;
127 
128         for (i = 0; i < num_queries; i++) {
129                 if (query_types[i] >= PIPE_QUERY_DRIVER_SPECIFIC)
130                         nhwqueries++;
131         }
132 
133         /* We can't mix HW and non-HW queries. */
134         if (nhwqueries && nhwqueries != num_queries)
135                 goto err_free_query;
136 
137         if (!nhwqueries)
138                 return (struct pipe_query *)query;
139 
140         hwperfmon = calloc(1, sizeof(*hwperfmon));
141         if (!hwperfmon)
142                 goto err_free_query;
143 
144         for (i = 0; i < num_queries; i++)
145                 hwperfmon->events[i] = query_types[i] -
146                                        PIPE_QUERY_DRIVER_SPECIFIC;
147 
148         query->hwperfmon = hwperfmon;
149         query->num_queries = num_queries;
150 
151         /* Note that struct pipe_query isn't actually defined anywhere. */
152         return (struct pipe_query *)query;
153 
154 err_free_query:
155         free(query);
156 
157         return NULL;
158 }
159 
160 static struct pipe_query *
vc4_create_query(struct pipe_context * ctx,unsigned query_type,unsigned index)161 vc4_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index)
162 {
163         return vc4_create_batch_query(ctx, 1, &query_type);
164 }
165 
166 static void
vc4_destroy_query(struct pipe_context * pctx,struct pipe_query * pquery)167 vc4_destroy_query(struct pipe_context *pctx, struct pipe_query *pquery)
168 {
169         struct vc4_context *ctx = vc4_context(pctx);
170         struct vc4_query *query = (struct vc4_query *)pquery;
171 
172         if (query->hwperfmon && query->hwperfmon->id) {
173                 if (query->hwperfmon->id) {
174                         struct drm_vc4_perfmon_destroy req = { };
175 
176                         req.id = query->hwperfmon->id;
177                         vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_DESTROY,
178                                   &req);
179                 }
180 
181                 free(query->hwperfmon);
182         }
183 
184         free(query);
185 }
186 
187 static bool
vc4_begin_query(struct pipe_context * pctx,struct pipe_query * pquery)188 vc4_begin_query(struct pipe_context *pctx, struct pipe_query *pquery)
189 {
190         struct vc4_query *query = (struct vc4_query *)pquery;
191         struct vc4_context *ctx = vc4_context(pctx);
192         struct drm_vc4_perfmon_create req = { };
193         unsigned i;
194         int ret;
195 
196         if (!query->hwperfmon)
197                 return true;
198 
199         /* Only one perfmon can be activated per context. */
200         if (ctx->perfmon)
201                 return false;
202 
203         /* Reset the counters by destroying the previously allocated perfmon */
204         if (query->hwperfmon->id) {
205                 struct drm_vc4_perfmon_destroy destroyreq = { };
206 
207                 destroyreq.id = query->hwperfmon->id;
208                 vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_DESTROY, &destroyreq);
209         }
210 
211         for (i = 0; i < query->num_queries; i++)
212                 req.events[i] = query->hwperfmon->events[i];
213 
214         req.ncounters = query->num_queries;
215         ret = vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_CREATE, &req);
216         if (ret)
217                 return false;
218 
219         query->hwperfmon->id = req.id;
220 
221         /* Make sure all pendings jobs are flushed before activating the
222          * perfmon.
223          */
224         vc4_flush(pctx);
225         ctx->perfmon = query->hwperfmon;
226         return true;
227 }
228 
229 static bool
vc4_end_query(struct pipe_context * pctx,struct pipe_query * pquery)230 vc4_end_query(struct pipe_context *pctx, struct pipe_query *pquery)
231 {
232         struct vc4_query *query = (struct vc4_query *)pquery;
233         struct vc4_context *ctx = vc4_context(pctx);
234 
235         if (!query->hwperfmon)
236                 return true;
237 
238         if (ctx->perfmon != query->hwperfmon)
239                 return false;
240 
241         /* Make sure all pendings jobs are flushed before deactivating the
242          * perfmon.
243          */
244         vc4_flush(pctx);
245         ctx->perfmon = NULL;
246         return true;
247 }
248 
249 static bool
vc4_get_query_result(struct pipe_context * pctx,struct pipe_query * pquery,bool wait,union pipe_query_result * vresult)250 vc4_get_query_result(struct pipe_context *pctx, struct pipe_query *pquery,
251                      bool wait, union pipe_query_result *vresult)
252 {
253         struct vc4_context *ctx = vc4_context(pctx);
254         struct vc4_query *query = (struct vc4_query *)pquery;
255         struct drm_vc4_perfmon_get_values req;
256         unsigned i;
257         int ret;
258 
259         if (!query->hwperfmon) {
260                 vresult->u64 = 0;
261                 return true;
262         }
263 
264         if (!vc4_wait_seqno(ctx->screen, query->hwperfmon->last_seqno,
265                             wait ? PIPE_TIMEOUT_INFINITE : 0, "perfmon"))
266                 return false;
267 
268         req.id = query->hwperfmon->id;
269         req.values_ptr = (uintptr_t)query->hwperfmon->counters;
270         ret = vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_GET_VALUES, &req);
271         if (ret)
272                 return false;
273 
274         for (i = 0; i < query->num_queries; i++)
275                 vresult->batch[i].u64 = query->hwperfmon->counters[i];
276 
277         return true;
278 }
279 
280 static void
vc4_set_active_query_state(struct pipe_context * pctx,bool enable)281 vc4_set_active_query_state(struct pipe_context *pctx, bool enable)
282 {
283 }
284 
285 void
vc4_query_init(struct pipe_context * pctx)286 vc4_query_init(struct pipe_context *pctx)
287 {
288         pctx->create_query = vc4_create_query;
289         pctx->create_batch_query = vc4_create_batch_query;
290         pctx->destroy_query = vc4_destroy_query;
291         pctx->begin_query = vc4_begin_query;
292         pctx->end_query = vc4_end_query;
293         pctx->get_query_result = vc4_get_query_result;
294         pctx->set_active_query_state = vc4_set_active_query_state;
295 }
296