1 /*
2 * Copyright © 2019 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "crocus_monitor.h"
24
25 #include <xf86drm.h>
26
27 #include "crocus_screen.h"
28 #include "crocus_context.h"
29
30 #include "perf/intel_perf.h"
31 #include "perf/intel_perf_query.h"
32 #include "perf/intel_perf_regs.h"
33
34 struct crocus_monitor_object {
35 int num_active_counters;
36 int *active_counters;
37
38 size_t result_size;
39 unsigned char *result_buffer;
40
41 struct intel_perf_query_object *query;
42 };
43
44 int
crocus_get_monitor_info(struct pipe_screen * pscreen,unsigned index,struct pipe_driver_query_info * info)45 crocus_get_monitor_info(struct pipe_screen *pscreen, unsigned index,
46 struct pipe_driver_query_info *info)
47 {
48 const struct crocus_screen *screen = (struct crocus_screen *)pscreen;
49 assert(screen->monitor_cfg);
50 if (!screen->monitor_cfg)
51 return 0;
52
53 const struct crocus_monitor_config *monitor_cfg = screen->monitor_cfg;
54
55 if (!info) {
56 /* return the number of metrics */
57 return monitor_cfg->num_counters;
58 }
59
60 const struct intel_perf_config *perf_cfg = monitor_cfg->perf_cfg;
61 const int group = monitor_cfg->counters[index].group;
62 const int counter_index = monitor_cfg->counters[index].counter;
63 struct intel_perf_query_counter *counter =
64 &perf_cfg->queries[group].counters[counter_index];
65
66 info->group_id = group;
67 info->name = counter->name;
68 info->query_type = PIPE_QUERY_DRIVER_SPECIFIC + index;
69
70 if (counter->type == INTEL_PERF_COUNTER_TYPE_THROUGHPUT)
71 info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE;
72 else
73 info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE;
74 switch (counter->data_type) {
75 case INTEL_PERF_COUNTER_DATA_TYPE_BOOL32:
76 case INTEL_PERF_COUNTER_DATA_TYPE_UINT32:
77 info->type = PIPE_DRIVER_QUERY_TYPE_UINT;
78 info->max_value.u32 = 0;
79 break;
80 case INTEL_PERF_COUNTER_DATA_TYPE_UINT64:
81 info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
82 info->max_value.u64 = 0;
83 break;
84 case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT:
85 case INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE:
86 info->type = PIPE_DRIVER_QUERY_TYPE_FLOAT;
87 info->max_value.u64 = -1;
88 break;
89 default:
90 assert(false);
91 break;
92 }
93
94 /* indicates that this is an OA query, not a pipeline statistics query */
95 info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
96 return 1;
97 }
98
99 typedef void (*bo_unreference_t)(void *);
100 typedef void *(*bo_map_t)(void *, void *, unsigned flags);
101 typedef void (*bo_unmap_t)(void *);
102 typedef void (*emit_mi_report_t)(void *, void *, uint32_t, uint32_t);
103 typedef void (*emit_mi_flush_t)(void *);
104 typedef void (*capture_frequency_stat_register_t)(void *, void *,
105 uint32_t );
106 typedef void (*store_register_mem64_t)(void *ctx, void *bo,
107 uint32_t reg, uint32_t offset);
108 typedef bool (*batch_references_t)(void *batch, void *bo);
109 typedef void (*bo_wait_rendering_t)(void *bo);
110 typedef int (*bo_busy_t)(void *bo);
111
112 static void *
crocus_oa_bo_alloc(void * bufmgr,const char * name,uint64_t size)113 crocus_oa_bo_alloc(void *bufmgr, const char *name, uint64_t size)
114 {
115 return crocus_bo_alloc(bufmgr, name, size);
116 }
117
118 #if 0
119 static void
120 crocus_monitor_emit_mi_flush(struct crocus_context *ice)
121 {
122 const int flags = PIPE_CONTROL_RENDER_TARGET_FLUSH |
123 PIPE_CONTROL_INSTRUCTION_INVALIDATE |
124 PIPE_CONTROL_CONST_CACHE_INVALIDATE |
125 PIPE_CONTROL_DATA_CACHE_FLUSH |
126 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
127 PIPE_CONTROL_VF_CACHE_INVALIDATE |
128 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
129 PIPE_CONTROL_CS_STALL;
130 crocus_emit_pipe_control_flush(&ice->batches[CROCUS_BATCH_RENDER],
131 "OA metrics", flags);
132 }
133 #endif
134
135 static void
crocus_monitor_emit_mi_report_perf_count(void * c,void * bo,uint32_t offset_in_bytes,uint32_t report_id)136 crocus_monitor_emit_mi_report_perf_count(void *c,
137 void *bo,
138 uint32_t offset_in_bytes,
139 uint32_t report_id)
140 {
141 struct crocus_context *ice = c;
142 struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
143 struct crocus_screen *screen = batch->screen;
144 screen->vtbl.emit_mi_report_perf_count(batch, bo, offset_in_bytes, report_id);
145 }
146
147 static void
crocus_monitor_batchbuffer_flush(void * c,const char * file,int line)148 crocus_monitor_batchbuffer_flush(void *c, const char *file, int line)
149 {
150 struct crocus_context *ice = c;
151 _crocus_batch_flush(&ice->batches[CROCUS_BATCH_RENDER], __FILE__, __LINE__);
152 }
153
154 #if 0
155 static void
156 crocus_monitor_capture_frequency_stat_register(void *ctx,
157 void *bo,
158 uint32_t bo_offset)
159 {
160 struct crocus_context *ice = ctx;
161 struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
162 ice->vtbl.store_register_mem32(batch, GEN9_RPSTAT0, bo, bo_offset, false);
163 }
164
165 static void
166 crocus_monitor_store_register_mem64(void *ctx, void *bo,
167 uint32_t reg, uint32_t offset)
168 {
169 struct crocus_context *ice = ctx;
170 struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
171 ice->vtbl.store_register_mem64(batch, reg, bo, offset, false);
172 }
173 #endif
174
175 static bool
crocus_monitor_init_metrics(struct crocus_screen * screen)176 crocus_monitor_init_metrics(struct crocus_screen *screen)
177 {
178 struct crocus_monitor_config *monitor_cfg =
179 rzalloc(screen, struct crocus_monitor_config);
180 struct intel_perf_config *perf_cfg = NULL;
181 if (unlikely(!monitor_cfg))
182 goto allocation_error;
183 perf_cfg = intel_perf_new(monitor_cfg);
184 if (unlikely(!perf_cfg))
185 goto allocation_error;
186
187 monitor_cfg->perf_cfg = perf_cfg;
188
189 perf_cfg->vtbl.bo_alloc = crocus_oa_bo_alloc;
190 perf_cfg->vtbl.bo_unreference = (bo_unreference_t)crocus_bo_unreference;
191 perf_cfg->vtbl.bo_map = (bo_map_t)crocus_bo_map;
192 perf_cfg->vtbl.bo_unmap = (bo_unmap_t)crocus_bo_unmap;
193
194 perf_cfg->vtbl.emit_mi_report_perf_count =
195 (emit_mi_report_t)crocus_monitor_emit_mi_report_perf_count;
196 perf_cfg->vtbl.batchbuffer_flush = crocus_monitor_batchbuffer_flush;
197 perf_cfg->vtbl.batch_references = (batch_references_t)crocus_batch_references;
198 perf_cfg->vtbl.bo_wait_rendering =
199 (bo_wait_rendering_t)crocus_bo_wait_rendering;
200 perf_cfg->vtbl.bo_busy = (bo_busy_t)crocus_bo_busy;
201
202 intel_perf_init_metrics(perf_cfg, &screen->devinfo, screen->fd, false, false);
203 screen->monitor_cfg = monitor_cfg;
204
205 /* a gallium "group" is equivalent to a gen "query"
206 * a gallium "query" is equivalent to a gen "query_counter"
207 *
208 * Each gen_query supports a specific number of query_counters. To
209 * allocate the array of crocus_monitor_counter, we need an upper bound
210 * (ignoring duplicate query_counters).
211 */
212 int gen_query_counters_count = 0;
213 for (int gen_query_id = 0;
214 gen_query_id < perf_cfg->n_queries;
215 ++gen_query_id) {
216 gen_query_counters_count += perf_cfg->queries[gen_query_id].n_counters;
217 }
218
219 monitor_cfg->counters = rzalloc_size(monitor_cfg,
220 sizeof(struct crocus_monitor_counter) *
221 gen_query_counters_count);
222 if (unlikely(!monitor_cfg->counters))
223 goto allocation_error;
224
225 int crocus_monitor_id = 0;
226 for (int group = 0; group < perf_cfg->n_queries; ++group) {
227 for (int counter = 0;
228 counter < perf_cfg->queries[group].n_counters;
229 ++counter) {
230 /* Check previously identified metrics to filter out duplicates. The
231 * user is not helped by having the same metric available in several
232 * groups. (n^2 algorithm).
233 */
234 bool duplicate = false;
235 for (int existing_group = 0;
236 existing_group < group && !duplicate;
237 ++existing_group) {
238 for (int existing_counter = 0;
239 existing_counter < perf_cfg->queries[existing_group].n_counters && !duplicate;
240 ++existing_counter) {
241 const char *current_name =
242 perf_cfg->queries[group].counters[counter].name;
243 const char *existing_name =
244 perf_cfg->queries[existing_group].counters[existing_counter].name;
245 if (strcmp(current_name, existing_name) == 0) {
246 duplicate = true;
247 }
248 }
249 }
250 if (duplicate)
251 continue;
252 monitor_cfg->counters[crocus_monitor_id].group = group;
253 monitor_cfg->counters[crocus_monitor_id].counter = counter;
254 ++crocus_monitor_id;
255 }
256 }
257 monitor_cfg->num_counters = crocus_monitor_id;
258 return monitor_cfg->num_counters;
259
260 allocation_error:
261 if (monitor_cfg)
262 free(monitor_cfg->counters);
263 free(perf_cfg);
264 free(monitor_cfg);
265 return false;
266 }
267
268 int
crocus_get_monitor_group_info(struct pipe_screen * pscreen,unsigned group_index,struct pipe_driver_query_group_info * info)269 crocus_get_monitor_group_info(struct pipe_screen *pscreen,
270 unsigned group_index,
271 struct pipe_driver_query_group_info *info)
272 {
273 struct crocus_screen *screen = (struct crocus_screen *)pscreen;
274 if (!screen->monitor_cfg) {
275 if (!crocus_monitor_init_metrics(screen))
276 return 0;
277 }
278
279 const struct crocus_monitor_config *monitor_cfg = screen->monitor_cfg;
280 const struct intel_perf_config *perf_cfg = monitor_cfg->perf_cfg;
281
282 if (!info) {
283 /* return the count that can be queried */
284 return perf_cfg->n_queries;
285 }
286
287 if (group_index >= perf_cfg->n_queries) {
288 /* out of range */
289 return 0;
290 }
291
292 struct intel_perf_query_info *query = &perf_cfg->queries[group_index];
293
294 info->name = query->name;
295 info->max_active_queries = query->n_counters;
296 info->num_queries = query->n_counters;
297
298 return 1;
299 }
300
301 static void
crocus_init_monitor_ctx(struct crocus_context * ice)302 crocus_init_monitor_ctx(struct crocus_context *ice)
303 {
304 struct crocus_screen *screen = (struct crocus_screen *) ice->ctx.screen;
305 struct crocus_monitor_config *monitor_cfg = screen->monitor_cfg;
306
307 ice->perf_ctx = intel_perf_new_context(ice);
308 if (unlikely(!ice->perf_ctx))
309 return;
310
311 struct intel_perf_context *perf_ctx = ice->perf_ctx;
312 struct intel_perf_config *perf_cfg = monitor_cfg->perf_cfg;
313 intel_perf_init_context(perf_ctx,
314 perf_cfg,
315 ice,
316 ice,
317 screen->bufmgr,
318 &screen->devinfo,
319 ice->batches[CROCUS_BATCH_RENDER].hw_ctx_id,
320 screen->fd);
321 }
322
323 /* entry point for GenPerfMonitorsAMD */
324 struct crocus_monitor_object *
crocus_create_monitor_object(struct crocus_context * ice,unsigned num_queries,unsigned * query_types)325 crocus_create_monitor_object(struct crocus_context *ice,
326 unsigned num_queries,
327 unsigned *query_types)
328 {
329 struct crocus_screen *screen = (struct crocus_screen *) ice->ctx.screen;
330 struct crocus_monitor_config *monitor_cfg = screen->monitor_cfg;
331 struct intel_perf_config *perf_cfg = monitor_cfg->perf_cfg;
332 struct intel_perf_query_object *query_obj = NULL;
333
334 /* initialize perf context if this has not already been done. This
335 * function is the first entry point that carries the gl context.
336 */
337 if (ice->perf_ctx == NULL) {
338 crocus_init_monitor_ctx(ice);
339 }
340 struct intel_perf_context *perf_ctx = ice->perf_ctx;
341
342 assert(num_queries > 0);
343 int query_index = query_types[0] - PIPE_QUERY_DRIVER_SPECIFIC;
344 assert(query_index <= monitor_cfg->num_counters);
345 const int group = monitor_cfg->counters[query_index].group;
346
347 struct crocus_monitor_object *monitor =
348 calloc(1, sizeof(struct crocus_monitor_object));
349 if (unlikely(!monitor))
350 goto allocation_failure;
351
352 monitor->num_active_counters = num_queries;
353 monitor->active_counters = calloc(num_queries, sizeof(int));
354 if (unlikely(!monitor->active_counters))
355 goto allocation_failure;
356
357 for (int i = 0; i < num_queries; ++i) {
358 unsigned current_query = query_types[i];
359 unsigned current_query_index = current_query - PIPE_QUERY_DRIVER_SPECIFIC;
360
361 /* all queries must be in the same group */
362 assert(current_query_index <= monitor_cfg->num_counters);
363 assert(monitor_cfg->counters[current_query_index].group == group);
364 monitor->active_counters[i] =
365 monitor_cfg->counters[current_query_index].counter;
366 }
367
368 /* create the intel_perf_query */
369 query_obj = intel_perf_new_query(perf_ctx, group);
370 if (unlikely(!query_obj))
371 goto allocation_failure;
372
373 monitor->query = query_obj;
374 monitor->result_size = perf_cfg->queries[group].data_size;
375 monitor->result_buffer = calloc(1, monitor->result_size);
376 if (unlikely(!monitor->result_buffer))
377 goto allocation_failure;
378
379 return monitor;
380
381 allocation_failure:
382 if (monitor) {
383 free(monitor->active_counters);
384 free(monitor->result_buffer);
385 }
386 free(query_obj);
387 free(monitor);
388 return NULL;
389 }
390
391 void
crocus_destroy_monitor_object(struct pipe_context * ctx,struct crocus_monitor_object * monitor)392 crocus_destroy_monitor_object(struct pipe_context *ctx,
393 struct crocus_monitor_object *monitor)
394 {
395 struct crocus_context *ice = (struct crocus_context *)ctx;
396
397 intel_perf_delete_query(ice->perf_ctx, monitor->query);
398 free(monitor->result_buffer);
399 monitor->result_buffer = NULL;
400 free(monitor->active_counters);
401 monitor->active_counters = NULL;
402 free(monitor);
403 }
404
405 bool
crocus_begin_monitor(struct pipe_context * ctx,struct crocus_monitor_object * monitor)406 crocus_begin_monitor(struct pipe_context *ctx,
407 struct crocus_monitor_object *monitor)
408 {
409 struct crocus_context *ice = (void *) ctx;
410 struct intel_perf_context *perf_ctx = ice->perf_ctx;
411
412 return intel_perf_begin_query(perf_ctx, monitor->query);
413 }
414
415 bool
crocus_end_monitor(struct pipe_context * ctx,struct crocus_monitor_object * monitor)416 crocus_end_monitor(struct pipe_context *ctx,
417 struct crocus_monitor_object *monitor)
418 {
419 struct crocus_context *ice = (void *) ctx;
420 struct intel_perf_context *perf_ctx = ice->perf_ctx;
421
422 intel_perf_end_query(perf_ctx, monitor->query);
423 return true;
424 }
425
426 bool
crocus_get_monitor_result(struct pipe_context * ctx,struct crocus_monitor_object * monitor,bool wait,union pipe_numeric_type_union * result)427 crocus_get_monitor_result(struct pipe_context *ctx,
428 struct crocus_monitor_object *monitor,
429 bool wait,
430 union pipe_numeric_type_union *result)
431 {
432 struct crocus_context *ice = (void *) ctx;
433 struct intel_perf_context *perf_ctx = ice->perf_ctx;
434 struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
435
436 bool monitor_ready =
437 intel_perf_is_query_ready(perf_ctx, monitor->query, batch);
438
439 if (!monitor_ready) {
440 if (!wait)
441 return false;
442 intel_perf_wait_query(perf_ctx, monitor->query, batch);
443 }
444
445 assert(intel_perf_is_query_ready(perf_ctx, monitor->query, batch));
446
447 unsigned bytes_written;
448 intel_perf_get_query_data(perf_ctx, monitor->query, batch,
449 monitor->result_size,
450 (unsigned*) monitor->result_buffer,
451 &bytes_written);
452 if (bytes_written != monitor->result_size)
453 return false;
454
455 /* copy metrics into the batch result */
456 for (int i = 0; i < monitor->num_active_counters; ++i) {
457 int current_counter = monitor->active_counters[i];
458 const struct intel_perf_query_info *info =
459 intel_perf_query_info(monitor->query);
460 const struct intel_perf_query_counter *counter =
461 &info->counters[current_counter];
462 assert(intel_perf_query_counter_get_size(counter));
463 switch (counter->data_type) {
464 case INTEL_PERF_COUNTER_DATA_TYPE_UINT64:
465 result[i].u64 = *(uint64_t*)(monitor->result_buffer + counter->offset);
466 break;
467 case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT:
468 result[i].f = *(float*)(monitor->result_buffer + counter->offset);
469 break;
470 case INTEL_PERF_COUNTER_DATA_TYPE_UINT32:
471 case INTEL_PERF_COUNTER_DATA_TYPE_BOOL32:
472 result[i].u64 = *(uint32_t*)(monitor->result_buffer + counter->offset);
473 break;
474 case INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE: {
475 double v = *(double*)(monitor->result_buffer + counter->offset);
476 result[i].f = v;
477 break;
478 }
479 default:
480 unreachable("unexpected counter data type");
481 }
482 }
483 return true;
484 }
485