1 /*
2  * Copyright © 2018 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <stdint.h>
27 
28 #include "anv_private.h"
29 #include "vk_util.h"
30 
31 #include "perf/intel_perf.h"
32 #include "perf/intel_perf_mdapi.h"
33 
34 #include "util/mesa-sha1.h"
35 
36 void
anv_physical_device_init_perf(struct anv_physical_device * device,int fd)37 anv_physical_device_init_perf(struct anv_physical_device *device, int fd)
38 {
39    const struct intel_device_info *devinfo = &device->info;
40 
41    device->perf = NULL;
42 
43    /* We need self modifying batches. The i915 parser prevents it on
44     * Gfx7.5 :( maybe one day.
45     */
46    if (devinfo->ver < 8)
47       return;
48 
49    struct intel_perf_config *perf = intel_perf_new(NULL);
50 
51    intel_perf_init_metrics(perf, &device->info, fd,
52                            false /* pipeline statistics */,
53                            true /* register snapshots */);
54 
55    if (!perf->n_queries) {
56       if (perf->platform_supported) {
57          static bool warned_once = false;
58 
59          if (!warned_once) {
60             mesa_logw("Performance support disabled, "
61                       "consider sysctl dev.i915.perf_stream_paranoid=0\n");
62             warned_once = true;
63          }
64       }
65       goto err;
66    }
67 
68    /* We need DRM_I915_PERF_PROP_HOLD_PREEMPTION support, only available in
69     * perf revision 2.
70     */
71    if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
72       if (!intel_perf_has_hold_preemption(perf))
73          goto err;
74    }
75 
76    device->perf = perf;
77 
78    /* Compute the number of commands we need to implement a performance
79     * query.
80     */
81    const struct intel_perf_query_field_layout *layout = &perf->query_layout;
82    device->n_perf_query_commands = 0;
83    for (uint32_t f = 0; f < layout->n_fields; f++) {
84       struct intel_perf_query_field *field = &layout->fields[f];
85 
86       switch (field->type) {
87       case INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC:
88          device->n_perf_query_commands++;
89          break;
90       case INTEL_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT:
91       case INTEL_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT:
92       case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B:
93       case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C:
94          device->n_perf_query_commands += field->size / 4;
95          break;
96       }
97    }
98    device->n_perf_query_commands *= 2; /* Begin & End */
99    device->n_perf_query_commands += 1; /* availability */
100 
101    return;
102 
103  err:
104    ralloc_free(perf);
105 }
106 
107 void
anv_device_perf_init(struct anv_device * device)108 anv_device_perf_init(struct anv_device *device)
109 {
110    device->perf_fd = -1;
111 }
112 
113 static int
anv_device_perf_open(struct anv_device * device,uint64_t metric_id)114 anv_device_perf_open(struct anv_device *device, uint64_t metric_id)
115 {
116    uint64_t properties[DRM_I915_PERF_PROP_MAX * 2];
117    struct drm_i915_perf_open_param param;
118    int p = 0, stream_fd;
119 
120    properties[p++] = DRM_I915_PERF_PROP_SAMPLE_OA;
121    properties[p++] = true;
122 
123    properties[p++] = DRM_I915_PERF_PROP_OA_METRICS_SET;
124    properties[p++] = metric_id;
125 
126    properties[p++] = DRM_I915_PERF_PROP_OA_FORMAT;
127    properties[p++] = device->info.ver >= 8 ?
128       I915_OA_FORMAT_A32u40_A4u32_B8_C8 :
129       I915_OA_FORMAT_A45_B8_C8;
130 
131    properties[p++] = DRM_I915_PERF_PROP_OA_EXPONENT;
132    properties[p++] = 31; /* slowest sampling period */
133 
134    properties[p++] = DRM_I915_PERF_PROP_CTX_HANDLE;
135    properties[p++] = device->context_id;
136 
137    properties[p++] = DRM_I915_PERF_PROP_HOLD_PREEMPTION;
138    properties[p++] = true;
139 
140    /* If global SSEU is available, pin it to the default. This will ensure on
141     * Gfx11 for instance we use the full EU array. Initially when perf was
142     * enabled we would use only half on Gfx11 because of functional
143     * requirements.
144     */
145    if (intel_perf_has_global_sseu(device->physical->perf)) {
146       properties[p++] = DRM_I915_PERF_PROP_GLOBAL_SSEU;
147       properties[p++] = (uintptr_t) &device->physical->perf->sseu;
148    }
149 
150    memset(&param, 0, sizeof(param));
151    param.flags = 0;
152    param.flags |= I915_PERF_FLAG_FD_CLOEXEC | I915_PERF_FLAG_FD_NONBLOCK;
153    param.properties_ptr = (uintptr_t)properties;
154    param.num_properties = p / 2;
155 
156    stream_fd = intel_ioctl(device->fd, DRM_IOCTL_I915_PERF_OPEN, &param);
157    return stream_fd;
158 }
159 
160 /* VK_INTEL_performance_query */
anv_InitializePerformanceApiINTEL(VkDevice _device,const VkInitializePerformanceApiInfoINTEL * pInitializeInfo)161 VkResult anv_InitializePerformanceApiINTEL(
162     VkDevice                                    _device,
163     const VkInitializePerformanceApiInfoINTEL*  pInitializeInfo)
164 {
165    ANV_FROM_HANDLE(anv_device, device, _device);
166 
167    if (!device->physical->perf)
168       return VK_ERROR_EXTENSION_NOT_PRESENT;
169 
170    /* Not much to do here */
171    return VK_SUCCESS;
172 }
173 
anv_GetPerformanceParameterINTEL(VkDevice _device,VkPerformanceParameterTypeINTEL parameter,VkPerformanceValueINTEL * pValue)174 VkResult anv_GetPerformanceParameterINTEL(
175     VkDevice                                    _device,
176     VkPerformanceParameterTypeINTEL             parameter,
177     VkPerformanceValueINTEL*                    pValue)
178 {
179       ANV_FROM_HANDLE(anv_device, device, _device);
180 
181       if (!device->physical->perf)
182          return VK_ERROR_EXTENSION_NOT_PRESENT;
183 
184       VkResult result = VK_SUCCESS;
185       switch (parameter) {
186       case VK_PERFORMANCE_PARAMETER_TYPE_HW_COUNTERS_SUPPORTED_INTEL:
187          pValue->type = VK_PERFORMANCE_VALUE_TYPE_BOOL_INTEL;
188          pValue->data.valueBool = VK_TRUE;
189          break;
190 
191       case VK_PERFORMANCE_PARAMETER_TYPE_STREAM_MARKER_VALID_BITS_INTEL:
192          pValue->type = VK_PERFORMANCE_VALUE_TYPE_UINT32_INTEL;
193          pValue->data.value32 = 25;
194          break;
195 
196       default:
197          result = VK_ERROR_FEATURE_NOT_PRESENT;
198          break;
199       }
200 
201       return result;
202 }
203 
anv_CmdSetPerformanceMarkerINTEL(VkCommandBuffer commandBuffer,const VkPerformanceMarkerInfoINTEL * pMarkerInfo)204 VkResult anv_CmdSetPerformanceMarkerINTEL(
205     VkCommandBuffer                             commandBuffer,
206     const VkPerformanceMarkerInfoINTEL*         pMarkerInfo)
207 {
208    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
209 
210    cmd_buffer->intel_perf_marker = pMarkerInfo->marker;
211 
212    return VK_SUCCESS;
213 }
214 
anv_AcquirePerformanceConfigurationINTEL(VkDevice _device,const VkPerformanceConfigurationAcquireInfoINTEL * pAcquireInfo,VkPerformanceConfigurationINTEL * pConfiguration)215 VkResult anv_AcquirePerformanceConfigurationINTEL(
216     VkDevice                                    _device,
217     const VkPerformanceConfigurationAcquireInfoINTEL* pAcquireInfo,
218     VkPerformanceConfigurationINTEL*            pConfiguration)
219 {
220    ANV_FROM_HANDLE(anv_device, device, _device);
221    struct anv_performance_configuration_intel *config;
222 
223    config = vk_object_alloc(&device->vk, NULL, sizeof(*config),
224                             VK_OBJECT_TYPE_PERFORMANCE_CONFIGURATION_INTEL);
225    if (!config)
226       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
227 
228    if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
229       config->register_config =
230          intel_perf_load_configuration(device->physical->perf, device->fd,
231                                      INTEL_PERF_QUERY_GUID_MDAPI);
232       if (!config->register_config) {
233          vk_object_free(&device->vk, NULL, config);
234          return VK_INCOMPLETE;
235       }
236 
237       int ret =
238          intel_perf_store_configuration(device->physical->perf, device->fd,
239                                       config->register_config, NULL /* guid */);
240       if (ret < 0) {
241          ralloc_free(config->register_config);
242          vk_object_free(&device->vk, NULL, config);
243          return VK_INCOMPLETE;
244       }
245 
246       config->config_id = ret;
247    }
248 
249    *pConfiguration = anv_performance_configuration_intel_to_handle(config);
250 
251    return VK_SUCCESS;
252 }
253 
anv_ReleasePerformanceConfigurationINTEL(VkDevice _device,VkPerformanceConfigurationINTEL _configuration)254 VkResult anv_ReleasePerformanceConfigurationINTEL(
255     VkDevice                                    _device,
256     VkPerformanceConfigurationINTEL             _configuration)
257 {
258    ANV_FROM_HANDLE(anv_device, device, _device);
259    ANV_FROM_HANDLE(anv_performance_configuration_intel, config, _configuration);
260 
261    if (!INTEL_DEBUG(DEBUG_NO_OACONFIG))
262       intel_ioctl(device->fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, &config->config_id);
263 
264    ralloc_free(config->register_config);
265 
266    vk_object_free(&device->vk, NULL, config);
267 
268    return VK_SUCCESS;
269 }
270 
anv_QueueSetPerformanceConfigurationINTEL(VkQueue _queue,VkPerformanceConfigurationINTEL _configuration)271 VkResult anv_QueueSetPerformanceConfigurationINTEL(
272     VkQueue                                     _queue,
273     VkPerformanceConfigurationINTEL             _configuration)
274 {
275    ANV_FROM_HANDLE(anv_queue, queue, _queue);
276    ANV_FROM_HANDLE(anv_performance_configuration_intel, config, _configuration);
277    struct anv_device *device = queue->device;
278 
279    if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
280       if (device->perf_fd < 0) {
281          device->perf_fd = anv_device_perf_open(device, config->config_id);
282          if (device->perf_fd < 0)
283             return VK_ERROR_INITIALIZATION_FAILED;
284       } else {
285          int ret = intel_ioctl(device->perf_fd, I915_PERF_IOCTL_CONFIG,
286                                (void *)(uintptr_t) config->config_id);
287          if (ret < 0)
288             return vk_device_set_lost(&device->vk, "i915-perf config failed: %m");
289       }
290    }
291 
292    return VK_SUCCESS;
293 }
294 
anv_UninitializePerformanceApiINTEL(VkDevice _device)295 void anv_UninitializePerformanceApiINTEL(
296     VkDevice                                    _device)
297 {
298    ANV_FROM_HANDLE(anv_device, device, _device);
299 
300    if (device->perf_fd >= 0) {
301       close(device->perf_fd);
302       device->perf_fd = -1;
303    }
304 }
305 
306 /* VK_KHR_performance_query */
307 static const VkPerformanceCounterUnitKHR
308 intel_perf_counter_unit_to_vk_unit[] = {
309    [INTEL_PERF_COUNTER_UNITS_BYTES]                                = VK_PERFORMANCE_COUNTER_UNIT_BYTES_KHR,
310    [INTEL_PERF_COUNTER_UNITS_HZ]                                   = VK_PERFORMANCE_COUNTER_UNIT_HERTZ_KHR,
311    [INTEL_PERF_COUNTER_UNITS_NS]                                   = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR,
312    [INTEL_PERF_COUNTER_UNITS_US]                                   = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR, /* todo */
313    [INTEL_PERF_COUNTER_UNITS_PIXELS]                               = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
314    [INTEL_PERF_COUNTER_UNITS_TEXELS]                               = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
315    [INTEL_PERF_COUNTER_UNITS_THREADS]                              = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
316    [INTEL_PERF_COUNTER_UNITS_PERCENT]                              = VK_PERFORMANCE_COUNTER_UNIT_PERCENTAGE_KHR,
317    [INTEL_PERF_COUNTER_UNITS_MESSAGES]                             = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
318    [INTEL_PERF_COUNTER_UNITS_NUMBER]                               = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
319    [INTEL_PERF_COUNTER_UNITS_CYCLES]                               = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
320    [INTEL_PERF_COUNTER_UNITS_EVENTS]                               = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
321    [INTEL_PERF_COUNTER_UNITS_UTILIZATION]                          = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
322    [INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES]           = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
323    [INTEL_PERF_COUNTER_UNITS_EU_ATOMIC_REQUESTS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
324    [INTEL_PERF_COUNTER_UNITS_EU_REQUESTS_TO_L3_CACHE_LINES]        = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
325    [INTEL_PERF_COUNTER_UNITS_EU_BYTES_PER_L3_CACHE_LINE]           = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
326 };
327 
328 static const VkPerformanceCounterStorageKHR
329 intel_perf_counter_data_type_to_vk_storage[] = {
330    [INTEL_PERF_COUNTER_DATA_TYPE_BOOL32] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR,
331    [INTEL_PERF_COUNTER_DATA_TYPE_UINT32] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR,
332    [INTEL_PERF_COUNTER_DATA_TYPE_UINT64] = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR,
333    [INTEL_PERF_COUNTER_DATA_TYPE_FLOAT]  = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT32_KHR,
334    [INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE] = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT64_KHR,
335 };
336 
anv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(VkPhysicalDevice physicalDevice,uint32_t queueFamilyIndex,uint32_t * pCounterCount,VkPerformanceCounterKHR * pCounters,VkPerformanceCounterDescriptionKHR * pCounterDescriptions)337 VkResult anv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
338     VkPhysicalDevice                            physicalDevice,
339     uint32_t                                    queueFamilyIndex,
340     uint32_t*                                   pCounterCount,
341     VkPerformanceCounterKHR*                    pCounters,
342     VkPerformanceCounterDescriptionKHR*         pCounterDescriptions)
343 {
344    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
345    struct intel_perf_config *perf = pdevice->perf;
346 
347    uint32_t desc_count = *pCounterCount;
348 
349    VK_OUTARRAY_MAKE_TYPED(VkPerformanceCounterKHR, out, pCounters, pCounterCount);
350    VK_OUTARRAY_MAKE_TYPED(VkPerformanceCounterDescriptionKHR, out_desc,
351                           pCounterDescriptions, &desc_count);
352 
353    for (int c = 0; c < (perf ? perf->n_counters : 0); c++) {
354       const struct intel_perf_query_counter *intel_counter = perf->counter_infos[c].counter;
355 
356       vk_outarray_append_typed(VkPerformanceCounterKHR, &out, counter) {
357          counter->unit = intel_perf_counter_unit_to_vk_unit[intel_counter->units];
358          counter->scope = VK_QUERY_SCOPE_COMMAND_KHR;
359          counter->storage = intel_perf_counter_data_type_to_vk_storage[intel_counter->data_type];
360 
361          unsigned char sha1_result[20];
362          _mesa_sha1_compute(intel_counter->symbol_name,
363                             strlen(intel_counter->symbol_name),
364                             sha1_result);
365          memcpy(counter->uuid, sha1_result, sizeof(counter->uuid));
366       }
367 
368       vk_outarray_append_typed(VkPerformanceCounterDescriptionKHR, &out_desc, desc) {
369          desc->flags = 0; /* None so far. */
370          snprintf(desc->name, sizeof(desc->name), "%s", intel_counter->name);
371          snprintf(desc->category, sizeof(desc->category), "%s", intel_counter->category);
372          snprintf(desc->description, sizeof(desc->description), "%s", intel_counter->desc);
373       }
374    }
375 
376    return vk_outarray_status(&out);
377 }
378 
anv_GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(VkPhysicalDevice physicalDevice,const VkQueryPoolPerformanceCreateInfoKHR * pPerformanceQueryCreateInfo,uint32_t * pNumPasses)379 void anv_GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(
380     VkPhysicalDevice                            physicalDevice,
381     const VkQueryPoolPerformanceCreateInfoKHR*  pPerformanceQueryCreateInfo,
382     uint32_t*                                   pNumPasses)
383 {
384    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
385    struct intel_perf_config *perf = pdevice->perf;
386 
387    if (!perf) {
388       *pNumPasses = 0;
389       return;
390    }
391 
392    *pNumPasses = intel_perf_get_n_passes(perf,
393                                        pPerformanceQueryCreateInfo->pCounterIndices,
394                                        pPerformanceQueryCreateInfo->counterIndexCount,
395                                        NULL);
396 }
397 
anv_AcquireProfilingLockKHR(VkDevice _device,const VkAcquireProfilingLockInfoKHR * pInfo)398 VkResult anv_AcquireProfilingLockKHR(
399     VkDevice                                    _device,
400     const VkAcquireProfilingLockInfoKHR*        pInfo)
401 {
402    ANV_FROM_HANDLE(anv_device, device, _device);
403    struct intel_perf_config *perf = device->physical->perf;
404    struct intel_perf_query_info *first_metric_set = &perf->queries[0];
405    int fd = -1;
406 
407    assert(device->perf_fd == -1);
408 
409    if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
410       fd = anv_device_perf_open(device, first_metric_set->oa_metrics_set_id);
411       if (fd < 0)
412          return VK_TIMEOUT;
413    }
414 
415    device->perf_fd = fd;
416    return VK_SUCCESS;
417 }
418 
anv_ReleaseProfilingLockKHR(VkDevice _device)419 void anv_ReleaseProfilingLockKHR(
420     VkDevice                                    _device)
421 {
422    ANV_FROM_HANDLE(anv_device, device, _device);
423 
424    if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
425       assert(device->perf_fd >= 0);
426       close(device->perf_fd);
427    }
428    device->perf_fd = -1;
429 }
430 
431 void
anv_perf_write_pass_results(struct intel_perf_config * perf,struct anv_query_pool * pool,uint32_t pass,const struct intel_perf_query_result * accumulated_results,union VkPerformanceCounterResultKHR * results)432 anv_perf_write_pass_results(struct intel_perf_config *perf,
433                             struct anv_query_pool *pool, uint32_t pass,
434                             const struct intel_perf_query_result *accumulated_results,
435                             union VkPerformanceCounterResultKHR *results)
436 {
437    for (uint32_t c = 0; c < pool->n_counters; c++) {
438       const struct intel_perf_counter_pass *counter_pass = &pool->counter_pass[c];
439 
440       if (counter_pass->pass != pass)
441          continue;
442 
443       switch (pool->pass_query[pass]->kind) {
444       case INTEL_PERF_QUERY_TYPE_PIPELINE: {
445          assert(counter_pass->counter->data_type == INTEL_PERF_COUNTER_DATA_TYPE_UINT64);
446          uint32_t accu_offset = counter_pass->counter->offset / sizeof(uint64_t);
447          results[c].uint64 = accumulated_results->accumulator[accu_offset];
448          break;
449       }
450 
451       case INTEL_PERF_QUERY_TYPE_OA:
452       case INTEL_PERF_QUERY_TYPE_RAW:
453          switch (counter_pass->counter->data_type) {
454          case INTEL_PERF_COUNTER_DATA_TYPE_UINT64:
455             results[c].uint64 =
456                counter_pass->counter->oa_counter_read_uint64(perf,
457                                                              counter_pass->query,
458                                                              accumulated_results);
459             break;
460          case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT:
461             results[c].float32 =
462                counter_pass->counter->oa_counter_read_float(perf,
463                                                             counter_pass->query,
464                                                             accumulated_results);
465             break;
466          default:
467             /* So far we aren't using uint32, double or bool32... */
468             unreachable("unexpected counter data type");
469          }
470          break;
471 
472       default:
473          unreachable("invalid query type");
474       }
475 
476       /* The Vulkan extension only has nanoseconds as a unit */
477       if (counter_pass->counter->units == INTEL_PERF_COUNTER_UNITS_US) {
478          assert(counter_pass->counter->data_type == INTEL_PERF_COUNTER_DATA_TYPE_UINT64);
479          results[c].uint64 *= 1000;
480       }
481    }
482 }
483