1 /*
2  * Copyright © 2019 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include "crocus_monitor.h"
24 
25 #include <xf86drm.h>
26 
27 #include "crocus_screen.h"
28 #include "crocus_context.h"
29 
30 #include "perf/intel_perf.h"
31 #include "perf/intel_perf_query.h"
32 #include "perf/intel_perf_regs.h"
33 
34 struct crocus_monitor_object {
35    int num_active_counters;
36    int *active_counters;
37 
38    size_t result_size;
39    unsigned char *result_buffer;
40 
41    struct intel_perf_query_object *query;
42 };
43 
44 int
crocus_get_monitor_info(struct pipe_screen * pscreen,unsigned index,struct pipe_driver_query_info * info)45 crocus_get_monitor_info(struct pipe_screen *pscreen, unsigned index,
46                         struct pipe_driver_query_info *info)
47 {
48    const struct crocus_screen *screen = (struct crocus_screen *)pscreen;
49    assert(screen->monitor_cfg);
50    if (!screen->monitor_cfg)
51       return 0;
52 
53    const struct crocus_monitor_config *monitor_cfg = screen->monitor_cfg;
54 
55    if (!info) {
56       /* return the number of metrics */
57       return monitor_cfg->num_counters;
58    }
59 
60    const struct intel_perf_config *perf_cfg = monitor_cfg->perf_cfg;
61    const int group = monitor_cfg->counters[index].group;
62    const int counter_index = monitor_cfg->counters[index].counter;
63    struct intel_perf_query_counter *counter =
64       &perf_cfg->queries[group].counters[counter_index];
65 
66    info->group_id = group;
67    info->name = counter->name;
68    info->query_type = PIPE_QUERY_DRIVER_SPECIFIC + index;
69 
70    if (counter->type == INTEL_PERF_COUNTER_TYPE_THROUGHPUT)
71       info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE;
72    else
73       info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE;
74    switch (counter->data_type) {
75    case INTEL_PERF_COUNTER_DATA_TYPE_BOOL32:
76    case INTEL_PERF_COUNTER_DATA_TYPE_UINT32:
77       info->type = PIPE_DRIVER_QUERY_TYPE_UINT;
78       info->max_value.u32 = 0;
79       break;
80    case INTEL_PERF_COUNTER_DATA_TYPE_UINT64:
81       info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
82       info->max_value.u64 = 0;
83       break;
84    case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT:
85    case INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE:
86       info->type = PIPE_DRIVER_QUERY_TYPE_FLOAT;
87       info->max_value.u64 = -1;
88       break;
89    default:
90       assert(false);
91       break;
92    }
93 
94    /* indicates that this is an OA query, not a pipeline statistics query */
95    info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
96    return 1;
97 }
98 
99 typedef void (*bo_unreference_t)(void *);
100 typedef void *(*bo_map_t)(void *, void *, unsigned flags);
101 typedef void (*bo_unmap_t)(void *);
102 typedef void (*emit_mi_report_t)(void *, void *, uint32_t, uint32_t);
103 typedef void (*emit_mi_flush_t)(void *);
104 typedef void (*capture_frequency_stat_register_t)(void *, void *,
105                                                   uint32_t );
106 typedef void (*store_register_mem64_t)(void *ctx, void *bo,
107                                        uint32_t reg, uint32_t offset);
108 typedef bool (*batch_references_t)(void *batch, void *bo);
109 typedef void (*bo_wait_rendering_t)(void *bo);
110 typedef int (*bo_busy_t)(void *bo);
111 
112 static void *
crocus_oa_bo_alloc(void * bufmgr,const char * name,uint64_t size)113 crocus_oa_bo_alloc(void *bufmgr, const char *name, uint64_t size)
114 {
115    return crocus_bo_alloc(bufmgr, name, size);
116 }
117 
118 #if 0
119 static void
120 crocus_monitor_emit_mi_flush(struct crocus_context *ice)
121 {
122    const int flags = PIPE_CONTROL_RENDER_TARGET_FLUSH |
123                      PIPE_CONTROL_INSTRUCTION_INVALIDATE |
124                      PIPE_CONTROL_CONST_CACHE_INVALIDATE |
125                      PIPE_CONTROL_DATA_CACHE_FLUSH |
126                      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
127                      PIPE_CONTROL_VF_CACHE_INVALIDATE |
128                      PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
129                      PIPE_CONTROL_CS_STALL;
130    crocus_emit_pipe_control_flush(&ice->batches[CROCUS_BATCH_RENDER],
131                                   "OA metrics", flags);
132 }
133 #endif
134 
135 static void
crocus_monitor_emit_mi_report_perf_count(void * c,void * bo,uint32_t offset_in_bytes,uint32_t report_id)136 crocus_monitor_emit_mi_report_perf_count(void *c,
137                                          void *bo,
138                                          uint32_t offset_in_bytes,
139                                          uint32_t report_id)
140 {
141    struct crocus_context *ice = c;
142    struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
143    struct crocus_screen *screen = batch->screen;
144    screen->vtbl.emit_mi_report_perf_count(batch, bo, offset_in_bytes, report_id);
145 }
146 
147 static void
crocus_monitor_batchbuffer_flush(void * c,const char * file,int line)148 crocus_monitor_batchbuffer_flush(void *c, const char *file, int line)
149 {
150    struct crocus_context *ice = c;
151    _crocus_batch_flush(&ice->batches[CROCUS_BATCH_RENDER], __FILE__, __LINE__);
152 }
153 
154 #if 0
155 static void
156 crocus_monitor_capture_frequency_stat_register(void *ctx,
157                                                void *bo,
158                                                uint32_t bo_offset)
159 {
160    struct crocus_context *ice = ctx;
161    struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
162    ice->vtbl.store_register_mem32(batch, GEN9_RPSTAT0, bo, bo_offset, false);
163 }
164 
165 static void
166 crocus_monitor_store_register_mem64(void *ctx, void *bo,
167                                     uint32_t reg, uint32_t offset)
168 {
169    struct crocus_context *ice = ctx;
170    struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
171    ice->vtbl.store_register_mem64(batch, reg, bo, offset, false);
172 }
173 #endif
174 
175 static bool
crocus_monitor_init_metrics(struct crocus_screen * screen)176 crocus_monitor_init_metrics(struct crocus_screen *screen)
177 {
178    struct crocus_monitor_config *monitor_cfg =
179       rzalloc(screen, struct crocus_monitor_config);
180    struct intel_perf_config *perf_cfg = NULL;
181    if (unlikely(!monitor_cfg))
182       goto allocation_error;
183    perf_cfg = intel_perf_new(monitor_cfg);
184    if (unlikely(!perf_cfg))
185       goto allocation_error;
186 
187    monitor_cfg->perf_cfg = perf_cfg;
188 
189    perf_cfg->vtbl.bo_alloc = crocus_oa_bo_alloc;
190    perf_cfg->vtbl.bo_unreference = (bo_unreference_t)crocus_bo_unreference;
191    perf_cfg->vtbl.bo_map = (bo_map_t)crocus_bo_map;
192    perf_cfg->vtbl.bo_unmap = (bo_unmap_t)crocus_bo_unmap;
193 
194    perf_cfg->vtbl.emit_mi_report_perf_count =
195       (emit_mi_report_t)crocus_monitor_emit_mi_report_perf_count;
196    perf_cfg->vtbl.batchbuffer_flush = crocus_monitor_batchbuffer_flush;
197    perf_cfg->vtbl.batch_references = (batch_references_t)crocus_batch_references;
198    perf_cfg->vtbl.bo_wait_rendering =
199       (bo_wait_rendering_t)crocus_bo_wait_rendering;
200    perf_cfg->vtbl.bo_busy = (bo_busy_t)crocus_bo_busy;
201 
202    intel_perf_init_metrics(perf_cfg, &screen->devinfo, screen->fd, false, false);
203    screen->monitor_cfg = monitor_cfg;
204 
205    /* a gallium "group" is equivalent to a gen "query"
206     * a gallium "query" is equivalent to a gen "query_counter"
207     *
208     * Each gen_query supports a specific number of query_counters.  To
209     * allocate the array of crocus_monitor_counter, we need an upper bound
210     * (ignoring duplicate query_counters).
211     */
212    int gen_query_counters_count = 0;
213    for (int gen_query_id = 0;
214         gen_query_id < perf_cfg->n_queries;
215         ++gen_query_id) {
216       gen_query_counters_count += perf_cfg->queries[gen_query_id].n_counters;
217    }
218 
219    monitor_cfg->counters = rzalloc_size(monitor_cfg,
220                                         sizeof(struct crocus_monitor_counter) *
221                                         gen_query_counters_count);
222    if (unlikely(!monitor_cfg->counters))
223       goto allocation_error;
224 
225    int crocus_monitor_id = 0;
226    for (int group = 0; group < perf_cfg->n_queries; ++group) {
227       for (int counter = 0;
228            counter < perf_cfg->queries[group].n_counters;
229            ++counter) {
230          /* Check previously identified metrics to filter out duplicates. The
231           * user is not helped by having the same metric available in several
232           * groups. (n^2 algorithm).
233           */
234          bool duplicate = false;
235          for (int existing_group = 0;
236               existing_group < group && !duplicate;
237               ++existing_group) {
238             for (int existing_counter = 0;
239                  existing_counter < perf_cfg->queries[existing_group].n_counters && !duplicate;
240                  ++existing_counter) {
241                const char *current_name =
242                   perf_cfg->queries[group].counters[counter].name;
243                const char *existing_name =
244                   perf_cfg->queries[existing_group].counters[existing_counter].name;
245                if (strcmp(current_name, existing_name) == 0) {
246                   duplicate = true;
247                }
248             }
249          }
250          if (duplicate)
251             continue;
252          monitor_cfg->counters[crocus_monitor_id].group = group;
253          monitor_cfg->counters[crocus_monitor_id].counter = counter;
254          ++crocus_monitor_id;
255       }
256    }
257    monitor_cfg->num_counters = crocus_monitor_id;
258    return monitor_cfg->num_counters;
259 
260 allocation_error:
261    if (monitor_cfg)
262       free(monitor_cfg->counters);
263    free(perf_cfg);
264    free(monitor_cfg);
265    return false;
266 }
267 
268 int
crocus_get_monitor_group_info(struct pipe_screen * pscreen,unsigned group_index,struct pipe_driver_query_group_info * info)269 crocus_get_monitor_group_info(struct pipe_screen *pscreen,
270                               unsigned group_index,
271                               struct pipe_driver_query_group_info *info)
272 {
273    struct crocus_screen *screen = (struct crocus_screen *)pscreen;
274    if (!screen->monitor_cfg) {
275       if (!crocus_monitor_init_metrics(screen))
276          return 0;
277    }
278 
279    const struct crocus_monitor_config *monitor_cfg = screen->monitor_cfg;
280    const struct intel_perf_config *perf_cfg = monitor_cfg->perf_cfg;
281 
282    if (!info) {
283       /* return the count that can be queried */
284       return perf_cfg->n_queries;
285    }
286 
287    if (group_index >= perf_cfg->n_queries) {
288       /* out of range */
289       return 0;
290    }
291 
292    struct intel_perf_query_info *query = &perf_cfg->queries[group_index];
293 
294    info->name = query->name;
295    info->max_active_queries = query->n_counters;
296    info->num_queries = query->n_counters;
297 
298    return 1;
299 }
300 
301 static void
crocus_init_monitor_ctx(struct crocus_context * ice)302 crocus_init_monitor_ctx(struct crocus_context *ice)
303 {
304    struct crocus_screen *screen = (struct crocus_screen *) ice->ctx.screen;
305    struct crocus_monitor_config *monitor_cfg = screen->monitor_cfg;
306 
307    ice->perf_ctx = intel_perf_new_context(ice);
308    if (unlikely(!ice->perf_ctx))
309       return;
310 
311    struct intel_perf_context *perf_ctx = ice->perf_ctx;
312    struct intel_perf_config *perf_cfg = monitor_cfg->perf_cfg;
313    intel_perf_init_context(perf_ctx,
314                            perf_cfg,
315                            ice,
316                            ice,
317                            screen->bufmgr,
318                            &screen->devinfo,
319                            ice->batches[CROCUS_BATCH_RENDER].hw_ctx_id,
320                            screen->fd);
321 }
322 
323 /* entry point for GenPerfMonitorsAMD */
324 struct crocus_monitor_object *
crocus_create_monitor_object(struct crocus_context * ice,unsigned num_queries,unsigned * query_types)325 crocus_create_monitor_object(struct crocus_context *ice,
326                              unsigned num_queries,
327                              unsigned *query_types)
328 {
329    struct crocus_screen *screen = (struct crocus_screen *) ice->ctx.screen;
330    struct crocus_monitor_config *monitor_cfg = screen->monitor_cfg;
331    struct intel_perf_config *perf_cfg = monitor_cfg->perf_cfg;
332    struct intel_perf_query_object *query_obj = NULL;
333 
334    /* initialize perf context if this has not already been done.  This
335     * function is the first entry point that carries the gl context.
336     */
337    if (ice->perf_ctx == NULL) {
338       crocus_init_monitor_ctx(ice);
339    }
340    struct intel_perf_context *perf_ctx = ice->perf_ctx;
341 
342    assert(num_queries > 0);
343    int query_index = query_types[0] - PIPE_QUERY_DRIVER_SPECIFIC;
344    assert(query_index <= monitor_cfg->num_counters);
345    const int group = monitor_cfg->counters[query_index].group;
346 
347    struct crocus_monitor_object *monitor =
348       calloc(1, sizeof(struct crocus_monitor_object));
349    if (unlikely(!monitor))
350       goto allocation_failure;
351 
352    monitor->num_active_counters = num_queries;
353    monitor->active_counters = calloc(num_queries, sizeof(int));
354    if (unlikely(!monitor->active_counters))
355       goto allocation_failure;
356 
357    for (int i = 0; i < num_queries; ++i) {
358       unsigned current_query = query_types[i];
359       unsigned current_query_index = current_query - PIPE_QUERY_DRIVER_SPECIFIC;
360 
361       /* all queries must be in the same group */
362       assert(current_query_index <= monitor_cfg->num_counters);
363       assert(monitor_cfg->counters[current_query_index].group == group);
364       monitor->active_counters[i] =
365          monitor_cfg->counters[current_query_index].counter;
366    }
367 
368    /* create the intel_perf_query */
369    query_obj = intel_perf_new_query(perf_ctx, group);
370    if (unlikely(!query_obj))
371       goto allocation_failure;
372 
373    monitor->query = query_obj;
374    monitor->result_size = perf_cfg->queries[group].data_size;
375    monitor->result_buffer = calloc(1, monitor->result_size);
376    if (unlikely(!monitor->result_buffer))
377       goto allocation_failure;
378 
379    return monitor;
380 
381 allocation_failure:
382    if (monitor) {
383       free(monitor->active_counters);
384       free(monitor->result_buffer);
385    }
386    free(query_obj);
387    free(monitor);
388    return NULL;
389 }
390 
391 void
crocus_destroy_monitor_object(struct pipe_context * ctx,struct crocus_monitor_object * monitor)392 crocus_destroy_monitor_object(struct pipe_context *ctx,
393                               struct crocus_monitor_object *monitor)
394 {
395    struct crocus_context *ice = (struct crocus_context *)ctx;
396 
397    intel_perf_delete_query(ice->perf_ctx, monitor->query);
398    free(monitor->result_buffer);
399    monitor->result_buffer = NULL;
400    free(monitor->active_counters);
401    monitor->active_counters = NULL;
402    free(monitor);
403 }
404 
405 bool
crocus_begin_monitor(struct pipe_context * ctx,struct crocus_monitor_object * monitor)406 crocus_begin_monitor(struct pipe_context *ctx,
407                      struct crocus_monitor_object *monitor)
408 {
409    struct crocus_context *ice = (void *) ctx;
410    struct intel_perf_context *perf_ctx = ice->perf_ctx;
411 
412    return intel_perf_begin_query(perf_ctx, monitor->query);
413 }
414 
415 bool
crocus_end_monitor(struct pipe_context * ctx,struct crocus_monitor_object * monitor)416 crocus_end_monitor(struct pipe_context *ctx,
417                    struct crocus_monitor_object *monitor)
418 {
419    struct crocus_context *ice = (void *) ctx;
420    struct intel_perf_context *perf_ctx = ice->perf_ctx;
421 
422    intel_perf_end_query(perf_ctx, monitor->query);
423    return true;
424 }
425 
426 bool
crocus_get_monitor_result(struct pipe_context * ctx,struct crocus_monitor_object * monitor,bool wait,union pipe_numeric_type_union * result)427 crocus_get_monitor_result(struct pipe_context *ctx,
428                           struct crocus_monitor_object *monitor,
429                           bool wait,
430                           union pipe_numeric_type_union *result)
431 {
432    struct crocus_context *ice = (void *) ctx;
433    struct intel_perf_context *perf_ctx = ice->perf_ctx;
434    struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
435 
436    bool monitor_ready =
437       intel_perf_is_query_ready(perf_ctx, monitor->query, batch);
438 
439    if (!monitor_ready) {
440       if (!wait)
441          return false;
442       intel_perf_wait_query(perf_ctx, monitor->query, batch);
443    }
444 
445    assert(intel_perf_is_query_ready(perf_ctx, monitor->query, batch));
446 
447    unsigned bytes_written;
448    intel_perf_get_query_data(perf_ctx, monitor->query, batch,
449                              monitor->result_size,
450                              (unsigned*) monitor->result_buffer,
451                              &bytes_written);
452    if (bytes_written != monitor->result_size)
453       return false;
454 
455    /* copy metrics into the batch result */
456    for (int i = 0; i < monitor->num_active_counters; ++i) {
457       int current_counter = monitor->active_counters[i];
458       const struct intel_perf_query_info *info =
459          intel_perf_query_info(monitor->query);
460       const struct intel_perf_query_counter *counter =
461          &info->counters[current_counter];
462       assert(intel_perf_query_counter_get_size(counter));
463       switch (counter->data_type) {
464       case INTEL_PERF_COUNTER_DATA_TYPE_UINT64:
465          result[i].u64 = *(uint64_t*)(monitor->result_buffer + counter->offset);
466          break;
467       case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT:
468          result[i].f = *(float*)(monitor->result_buffer + counter->offset);
469          break;
470       case INTEL_PERF_COUNTER_DATA_TYPE_UINT32:
471       case INTEL_PERF_COUNTER_DATA_TYPE_BOOL32:
472          result[i].u64 = *(uint32_t*)(monitor->result_buffer + counter->offset);
473          break;
474       case INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE: {
475          double v = *(double*)(monitor->result_buffer + counter->offset);
476          result[i].f = v;
477          break;
478       }
479       default:
480          unreachable("unexpected counter data type");
481       }
482    }
483    return true;
484 }
485