1 /*
2  * Copyright © 2018 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "intel_perf.h"
25 #include "intel_perf_mdapi.h"
26 #include "intel_perf_private.h"
27 #include "intel_perf_regs.h"
28 
29 #include "dev/intel_device_info.h"
30 
31 #include <drm-uapi/i915_drm.h>
32 
33 
34 int
intel_perf_query_result_write_mdapi(void * data,uint32_t data_size,const struct intel_device_info * devinfo,const struct intel_perf_query_info * query,const struct intel_perf_query_result * result)35 intel_perf_query_result_write_mdapi(void *data, uint32_t data_size,
36                                     const struct intel_device_info *devinfo,
37                                     const struct intel_perf_query_info *query,
38                                     const struct intel_perf_query_result *result)
39 {
40    switch (devinfo->ver) {
41    case 7: {
42       struct gfx7_mdapi_metrics *mdapi_data = (struct gfx7_mdapi_metrics *) data;
43 
44       if (data_size < sizeof(*mdapi_data))
45          return 0;
46 
47       assert(devinfo->is_haswell);
48 
49       for (int i = 0; i < ARRAY_SIZE(mdapi_data->ACounters); i++)
50          mdapi_data->ACounters[i] = result->accumulator[1 + i];
51 
52       for (int i = 0; i < ARRAY_SIZE(mdapi_data->NOACounters); i++) {
53          mdapi_data->NOACounters[i] =
54             result->accumulator[1 + ARRAY_SIZE(mdapi_data->ACounters) + i];
55       }
56 
57       mdapi_data->PerfCounter1 = result->accumulator[query->perfcnt_offset + 0];
58       mdapi_data->PerfCounter2 = result->accumulator[query->perfcnt_offset + 1];
59 
60       mdapi_data->ReportsCount = result->reports_accumulated;
61       mdapi_data->TotalTime =
62          intel_device_info_timebase_scale(devinfo, result->accumulator[0]);
63       mdapi_data->CoreFrequency = result->gt_frequency[1];
64       mdapi_data->CoreFrequencyChanged = result->gt_frequency[1] != result->gt_frequency[0];
65       mdapi_data->SplitOccured = result->query_disjoint;
66       return sizeof(*mdapi_data);
67    }
68    case 8: {
69       struct gfx8_mdapi_metrics *mdapi_data = (struct gfx8_mdapi_metrics *) data;
70 
71       if (data_size < sizeof(*mdapi_data))
72          return 0;
73 
74       for (int i = 0; i < ARRAY_SIZE(mdapi_data->OaCntr); i++)
75          mdapi_data->OaCntr[i] = result->accumulator[2 + i];
76       for (int i = 0; i < ARRAY_SIZE(mdapi_data->NoaCntr); i++) {
77          mdapi_data->NoaCntr[i] =
78             result->accumulator[2 + ARRAY_SIZE(mdapi_data->OaCntr) + i];
79       }
80 
81       mdapi_data->PerfCounter1 = result->accumulator[query->perfcnt_offset + 0];
82       mdapi_data->PerfCounter2 = result->accumulator[query->perfcnt_offset + 1];
83 
84       mdapi_data->ReportId = result->hw_id;
85       mdapi_data->ReportsCount = result->reports_accumulated;
86       mdapi_data->TotalTime =
87          intel_device_info_timebase_scale(devinfo, result->accumulator[0]);
88       mdapi_data->BeginTimestamp =
89          intel_device_info_timebase_scale(devinfo, result->begin_timestamp);
90       mdapi_data->GPUTicks = result->accumulator[1];
91       mdapi_data->CoreFrequency = result->gt_frequency[1];
92       mdapi_data->CoreFrequencyChanged = result->gt_frequency[1] != result->gt_frequency[0];
93       mdapi_data->SliceFrequency =
94          (result->slice_frequency[0] + result->slice_frequency[1]) / 2ULL;
95       mdapi_data->UnsliceFrequency =
96          (result->unslice_frequency[0] + result->unslice_frequency[1]) / 2ULL;
97       mdapi_data->SplitOccured = result->query_disjoint;
98       return sizeof(*mdapi_data);
99    }
100    case 9:
101    case 11:
102    case 12:{
103       struct gfx9_mdapi_metrics *mdapi_data = (struct gfx9_mdapi_metrics *) data;
104 
105       if (data_size < sizeof(*mdapi_data))
106          return 0;
107 
108       for (int i = 0; i < ARRAY_SIZE(mdapi_data->OaCntr); i++)
109          mdapi_data->OaCntr[i] = result->accumulator[2 + i];
110       for (int i = 0; i < ARRAY_SIZE(mdapi_data->NoaCntr); i++) {
111          mdapi_data->NoaCntr[i] =
112             result->accumulator[2 + ARRAY_SIZE(mdapi_data->OaCntr) + i];
113       }
114 
115       mdapi_data->PerfCounter1 = result->accumulator[query->perfcnt_offset + 0];
116       mdapi_data->PerfCounter2 = result->accumulator[query->perfcnt_offset + 1];
117 
118       mdapi_data->ReportId = result->hw_id;
119       mdapi_data->ReportsCount = result->reports_accumulated;
120       mdapi_data->TotalTime =
121          intel_device_info_timebase_scale(devinfo, result->accumulator[0]);
122       mdapi_data->BeginTimestamp =
123          intel_device_info_timebase_scale(devinfo, result->begin_timestamp);
124       mdapi_data->GPUTicks = result->accumulator[1];
125       mdapi_data->CoreFrequency = result->gt_frequency[1];
126       mdapi_data->CoreFrequencyChanged = result->gt_frequency[1] != result->gt_frequency[0];
127       mdapi_data->SliceFrequency =
128          (result->slice_frequency[0] + result->slice_frequency[1]) / 2ULL;
129       mdapi_data->UnsliceFrequency =
130          (result->unslice_frequency[0] + result->unslice_frequency[1]) / 2ULL;
131       mdapi_data->SplitOccured = result->query_disjoint;
132       return sizeof(*mdapi_data);
133    }
134    default:
135       unreachable("unexpected gen");
136    }
137 }
138 
139 void
intel_perf_register_mdapi_statistic_query(struct intel_perf_config * perf_cfg,const struct intel_device_info * devinfo)140 intel_perf_register_mdapi_statistic_query(struct intel_perf_config *perf_cfg,
141                                           const struct intel_device_info *devinfo)
142 {
143    if (!(devinfo->ver >= 7 && devinfo->ver <= 12))
144       return;
145 
146    struct intel_perf_query_info *query =
147       intel_perf_append_query_info(perf_cfg, MAX_STAT_COUNTERS);
148 
149    query->kind = INTEL_PERF_QUERY_TYPE_PIPELINE;
150    query->name = "Intel_Raw_Pipeline_Statistics_Query";
151 
152    /* The order has to match mdapi_pipeline_metrics. */
153    intel_perf_query_add_basic_stat_reg(query, IA_VERTICES_COUNT,
154                                      "N vertices submitted");
155    intel_perf_query_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT,
156                                      "N primitives submitted");
157    intel_perf_query_add_basic_stat_reg(query, VS_INVOCATION_COUNT,
158                                      "N vertex shader invocations");
159    intel_perf_query_add_basic_stat_reg(query, GS_INVOCATION_COUNT,
160                                      "N geometry shader invocations");
161    intel_perf_query_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT,
162                                      "N geometry shader primitives emitted");
163    intel_perf_query_add_basic_stat_reg(query, CL_INVOCATION_COUNT,
164                                      "N primitives entering clipping");
165    intel_perf_query_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT,
166                                      "N primitives leaving clipping");
167    if (devinfo->is_haswell || devinfo->ver == 8) {
168       intel_perf_query_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4,
169                                   "N fragment shader invocations",
170                                   "N fragment shader invocations");
171    } else {
172       intel_perf_query_add_basic_stat_reg(query, PS_INVOCATION_COUNT,
173                                         "N fragment shader invocations");
174    }
175    intel_perf_query_add_basic_stat_reg(query, HS_INVOCATION_COUNT,
176                                      "N TCS shader invocations");
177    intel_perf_query_add_basic_stat_reg(query, DS_INVOCATION_COUNT,
178                                      "N TES shader invocations");
179    if (devinfo->ver >= 7) {
180       intel_perf_query_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
181                                         "N compute shader invocations");
182    }
183 
184    if (devinfo->ver >= 10) {
185       /* Reuse existing CS invocation register until we can expose this new
186        * one.
187        */
188       intel_perf_query_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
189                                         "Reserved1");
190    }
191 
192    query->data_size = sizeof(uint64_t) * query->n_counters;
193 }
194 
195 static void
fill_mdapi_perf_query_counter(struct intel_perf_query_info * query,const char * name,uint32_t data_offset,uint32_t data_size,enum intel_perf_counter_data_type data_type)196 fill_mdapi_perf_query_counter(struct intel_perf_query_info *query,
197                               const char *name,
198                               uint32_t data_offset,
199                               uint32_t data_size,
200                               enum intel_perf_counter_data_type data_type)
201 {
202    struct intel_perf_query_counter *counter = &query->counters[query->n_counters];
203 
204    assert(query->n_counters <= query->max_counters);
205 
206    counter->name = name;
207    counter->desc = "Raw counter value";
208    counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
209    counter->data_type = data_type;
210    counter->offset = data_offset;
211 
212    query->n_counters++;
213 
214    assert(counter->offset + intel_perf_query_counter_get_size(counter) <= query->data_size);
215 }
216 
217 #define MDAPI_QUERY_ADD_COUNTER(query, struct_name, field_name, type_name) \
218    fill_mdapi_perf_query_counter(query, #field_name,                    \
219                                  (uint8_t *) &struct_name.field_name -  \
220                                  (uint8_t *) &struct_name,              \
221                                  sizeof(struct_name.field_name),        \
222                                  INTEL_PERF_COUNTER_DATA_TYPE_##type_name)
223 #define MDAPI_QUERY_ADD_ARRAY_COUNTER(ctx, query, struct_name, field_name, idx, type_name) \
224    fill_mdapi_perf_query_counter(query,                                 \
225                                  ralloc_asprintf(ctx, "%s%i", #field_name, idx), \
226                                  (uint8_t *) &struct_name.field_name[idx] - \
227                                  (uint8_t *) &struct_name,              \
228                                  sizeof(struct_name.field_name[0]),     \
229                                  INTEL_PERF_COUNTER_DATA_TYPE_##type_name)
230 
231 void
intel_perf_register_mdapi_oa_query(struct intel_perf_config * perf,const struct intel_device_info * devinfo)232 intel_perf_register_mdapi_oa_query(struct intel_perf_config *perf,
233                                    const struct intel_device_info *devinfo)
234 {
235    struct intel_perf_query_info *query = NULL;
236 
237    /* MDAPI requires different structures for pretty much every generation
238     * (right now we have definitions for gen 7 to 12).
239     */
240    if (!(devinfo->ver >= 7 && devinfo->ver <= 12))
241       return;
242 
243    switch (devinfo->ver) {
244    case 7: {
245       query = intel_perf_append_query_info(perf, 1 + 45 + 16 + 7);
246       query->oa_format = I915_OA_FORMAT_A45_B8_C8;
247 
248       struct gfx7_mdapi_metrics metric_data;
249       query->data_size = sizeof(metric_data);
250 
251       MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
252       for (int i = 0; i < ARRAY_SIZE(metric_data.ACounters); i++) {
253          MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
254                                        metric_data, ACounters, i, UINT64);
255       }
256       for (int i = 0; i < ARRAY_SIZE(metric_data.NOACounters); i++) {
257          MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
258                                        metric_data, NOACounters, i, UINT64);
259       }
260       MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
261       MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
262       MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
263       MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
264       MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
265       MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
266       MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
267       break;
268    }
269    case 8: {
270       query = intel_perf_append_query_info(perf, 2 + 36 + 16 + 16);
271       query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
272 
273       struct gfx8_mdapi_metrics metric_data;
274       query->data_size = sizeof(metric_data);
275 
276       MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
277       MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64);
278       for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) {
279          MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
280                                        metric_data, OaCntr, i, UINT64);
281       }
282       for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) {
283          MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
284                                        metric_data, NoaCntr, i, UINT64);
285       }
286       MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64);
287       MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64);
288       MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved2, UINT64);
289       MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved3, UINT32);
290       MDAPI_QUERY_ADD_COUNTER(query, metric_data, OverrunOccured, BOOL32);
291       MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerUser, UINT64);
292       MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerDriver, UINT64);
293       MDAPI_QUERY_ADD_COUNTER(query, metric_data, SliceFrequency, UINT64);
294       MDAPI_QUERY_ADD_COUNTER(query, metric_data, UnsliceFrequency, UINT64);
295       MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
296       MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
297       MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
298       MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
299       MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
300       MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
301       MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
302       break;
303    }
304    case 9:
305    case 11:
306    case 12: {
307       query = intel_perf_append_query_info(perf, 2 + 36 + 16 + 16 + 16 + 2);
308       query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
309 
310       struct gfx9_mdapi_metrics metric_data;
311       query->data_size = sizeof(metric_data);
312 
313       MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
314       MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64);
315       for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) {
316          MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
317                                        metric_data, OaCntr, i, UINT64);
318       }
319       for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) {
320          MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
321                                        metric_data, NoaCntr, i, UINT64);
322       }
323       MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64);
324       MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64);
325       MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved2, UINT64);
326       MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved3, UINT32);
327       MDAPI_QUERY_ADD_COUNTER(query, metric_data, OverrunOccured, BOOL32);
328       MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerUser, UINT64);
329       MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerDriver, UINT64);
330       MDAPI_QUERY_ADD_COUNTER(query, metric_data, SliceFrequency, UINT64);
331       MDAPI_QUERY_ADD_COUNTER(query, metric_data, UnsliceFrequency, UINT64);
332       MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
333       MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
334       MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
335       MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
336       MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
337       MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
338       MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
339       for (int i = 0; i < ARRAY_SIZE(metric_data.UserCntr); i++) {
340          MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
341                                        metric_data, UserCntr, i, UINT64);
342       }
343       MDAPI_QUERY_ADD_COUNTER(query, metric_data, UserCntrCfgId, UINT32);
344       MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved4, UINT32);
345       break;
346    }
347    default:
348       unreachable("Unsupported gen");
349       break;
350    }
351 
352    query->kind = INTEL_PERF_QUERY_TYPE_RAW;
353    query->name = "Intel_Raw_Hardware_Counters_Set_0_Query";
354    query->guid = INTEL_PERF_QUERY_GUID_MDAPI;
355 
356    {
357       /* Accumulation buffer offsets copied from an actual query... */
358       const struct intel_perf_query_info *copy_query =
359          &perf->queries[0];
360 
361       query->gpu_time_offset = copy_query->gpu_time_offset;
362       query->gpu_clock_offset = copy_query->gpu_clock_offset;
363       query->a_offset = copy_query->a_offset;
364       query->b_offset = copy_query->b_offset;
365       query->c_offset = copy_query->c_offset;
366       query->perfcnt_offset = copy_query->perfcnt_offset;
367    }
368 }
369