1 /*
2  * Copyright (C) 2020-2021 Intel Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  */
7 
8 #include "level_zero/tools/source/metrics/metric_query_imp.h"
9 
10 #include "shared/source/command_stream/command_stream_receiver.h"
11 #include "shared/source/device/device.h"
12 #include "shared/source/helpers/engine_node_helper.h"
13 #include "shared/source/memory_manager/allocation_properties.h"
14 #include "shared/source/memory_manager/memory_manager.h"
15 #include "shared/source/os_interface/os_context.h"
16 #include "shared/source/os_interface/os_library.h"
17 
18 #include "level_zero/core/source/cmdlist/cmdlist.h"
19 #include "level_zero/core/source/cmdlist/cmdlist_imp.h"
20 #include "level_zero/core/source/device/device.h"
21 #include "level_zero/core/source/device/device_imp.h"
22 #include "level_zero/tools/source/metrics/metric_enumeration_imp.h"
23 
24 using namespace MetricsLibraryApi;
25 
26 namespace L0 {
27 
MetricsLibrary(MetricContext & metricContextInput)28 MetricsLibrary::MetricsLibrary(MetricContext &metricContextInput)
29     : metricContext(metricContextInput) {}
30 
~MetricsLibrary()31 MetricsLibrary::~MetricsLibrary() {
32     release();
33 }
34 
getInitializationState()35 ze_result_t MetricsLibrary::getInitializationState() {
36     return initializationState;
37 }
38 
isInitialized()39 bool MetricsLibrary::isInitialized() {
40     // Try to initialize metrics library only once.
41     if (initializationState == ZE_RESULT_ERROR_UNINITIALIZED) {
42         initialize();
43     }
44 
45     return initializationState == ZE_RESULT_SUCCESS;
46 }
47 
getQueryReportGpuSize()48 uint32_t MetricsLibrary::getQueryReportGpuSize() {
49 
50     TypedValue_1_0 gpuReportSize = {};
51 
52     // Obtain gpu report size.
53     if (!isInitialized() ||
54         api.GetParameter(ParameterType::QueryHwCountersReportGpuSize, &gpuReportSize.Type, &gpuReportSize) != StatusCode::Success) {
55 
56         DEBUG_BREAK_IF(true);
57         return 0;
58     }
59 
60     // Validate gpu report size.
61     if (!gpuReportSize.ValueUInt32) {
62         DEBUG_BREAK_IF(true);
63         return 0;
64     }
65 
66     return gpuReportSize.ValueUInt32;
67 }
68 
createMetricQuery(const uint32_t slotsCount,QueryHandle_1_0 & query,NEO::GraphicsAllocation * & pAllocation)69 bool MetricsLibrary::createMetricQuery(const uint32_t slotsCount, QueryHandle_1_0 &query,
70                                        NEO::GraphicsAllocation *&pAllocation) {
71 
72     std::lock_guard<std::mutex> lock(mutex);
73 
74     // Validate metrics library state.
75     if (!isInitialized()) {
76         DEBUG_BREAK_IF(true);
77         return false;
78     }
79 
80     QueryCreateData_1_0 queryData = {};
81     queryData.HandleContext = context;
82     queryData.Type = ObjectType::QueryHwCounters;
83     queryData.Slots = slotsCount;
84 
85     // Create query pool within metrics library.
86     if (api.QueryCreate(&queryData, &query) != StatusCode::Success) {
87         DEBUG_BREAK_IF(true);
88         return false;
89     }
90 
91     // Register created query.
92     queries.push_back(query);
93 
94     return true;
95 }
96 
getMetricQueryCount()97 uint32_t MetricsLibrary::getMetricQueryCount() {
98     std::lock_guard<std::mutex> lock(mutex);
99     return static_cast<uint32_t>(queries.size());
100 }
101 
destroyMetricQuery(QueryHandle_1_0 & query)102 bool MetricsLibrary::destroyMetricQuery(QueryHandle_1_0 &query) {
103     std::lock_guard<std::mutex> lock(mutex);
104     DEBUG_BREAK_IF(!query.IsValid());
105 
106     const bool result = isInitialized() && (api.QueryDelete(query) == StatusCode::Success);
107     auto iter = std::find_if(queries.begin(), queries.end(), [&](const QueryHandle_1_0 &element) { return element.data == query.data; });
108 
109     // Unregister query.
110     if (iter != queries.end()) {
111         queries.erase(iter);
112     }
113 
114     return result;
115 }
116 
getMetricQueryReportSize(size_t & rawDataSize)117 bool MetricsLibrary::getMetricQueryReportSize(size_t &rawDataSize) {
118     ValueType valueType = ValueType::Last;
119     TypedValue_1_0 value = {};
120 
121     const bool result = isInitialized() && (api.GetParameter(ParameterType::QueryHwCountersReportApiSize, &valueType, &value) == StatusCode::Success);
122     rawDataSize = static_cast<size_t>(value.ValueUInt32);
123     DEBUG_BREAK_IF(!result);
124     return result;
125 }
126 
getMetricQueryReport(QueryHandle_1_0 & query,const uint32_t slot,const size_t rawDataSize,uint8_t * pData)127 bool MetricsLibrary::getMetricQueryReport(QueryHandle_1_0 &query, const uint32_t slot,
128                                           const size_t rawDataSize, uint8_t *pData) {
129 
130     GetReportData_1_0 report = {};
131     report.Type = ObjectType::QueryHwCounters;
132     report.Query.Handle = query;
133     report.Query.Slot = slot;
134     report.Query.SlotsCount = 1;
135     report.Query.Data = pData;
136     report.Query.DataSize = static_cast<uint32_t>(rawDataSize);
137 
138     const bool result = isInitialized() && (api.GetData(&report) == StatusCode::Success);
139     DEBUG_BREAK_IF(!result);
140     return result;
141 }
142 
initialize()143 void MetricsLibrary::initialize() {
144     auto &metricsEnumeration = metricContext.getMetricEnumeration();
145 
146     // Function should be called only once.
147     DEBUG_BREAK_IF(initializationState != ZE_RESULT_ERROR_UNINITIALIZED);
148 
149     // Metrics Enumeration needs to be initialized before Metrics Library
150     const bool validMetricsEnumeration = metricsEnumeration.isInitialized();
151     const bool validMetricsLibrary = validMetricsEnumeration && handle && createContext();
152 
153     // Load metrics library and exported functions.
154     initializationState = validMetricsLibrary ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_UNKNOWN;
155     DEBUG_BREAK_IF(initializationState != ZE_RESULT_SUCCESS);
156 }
157 
release()158 void MetricsLibrary::release() {
159 
160     // Delete metric group configurations.
161     deleteAllConfigurations();
162 
163     // Destroy context.
164     if (context.IsValid() && contextDeleteFunction) {
165         contextDeleteFunction(context);
166     }
167 
168     // Reset metric query state to not initialized.
169     api = {};
170     callbacks = {};
171     context = {};
172     isWorkloadPartitionEnabled = false;
173     initializationState = ZE_RESULT_ERROR_UNINITIALIZED;
174 }
175 
load()176 bool MetricsLibrary::load() {
177     // Load library.
178     handle = NEO::OsLibrary::load(getFilename());
179 
180     // Load exported functions.
181     if (handle) {
182         contextCreateFunction = reinterpret_cast<ContextCreateFunction_1_0>(
183             handle->getProcAddress(METRICS_LIBRARY_CONTEXT_CREATE_1_0));
184         contextDeleteFunction = reinterpret_cast<ContextDeleteFunction_1_0>(
185             handle->getProcAddress(METRICS_LIBRARY_CONTEXT_DELETE_1_0));
186     }
187 
188     if (contextCreateFunction == nullptr || contextDeleteFunction == nullptr) {
189         PRINT_DEBUG_STRING(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "cannot load %s exported functions\n", MetricsLibrary::getFilename());
190         return false;
191     }
192 
193     // Return success if exported functions have been loaded.
194     return true;
195 }
196 
enableWorkloadPartition()197 void MetricsLibrary::enableWorkloadPartition() {
198     isWorkloadPartitionEnabled = true;
199 }
200 
getSubDeviceClientOptions(ClientOptionsData_1_0 & subDevice,ClientOptionsData_1_0 & subDeviceIndex,ClientOptionsData_1_0 & subDeviceCount,ClientOptionsData_1_0 & workloadPartition)201 void MetricsLibrary::getSubDeviceClientOptions(
202     ClientOptionsData_1_0 &subDevice,
203     ClientOptionsData_1_0 &subDeviceIndex,
204     ClientOptionsData_1_0 &subDeviceCount,
205     ClientOptionsData_1_0 &workloadPartition) {
206 
207     const auto &deviceImp = *static_cast<DeviceImp *>(&metricContext.getDevice());
208 
209     if (!deviceImp.isSubdevice) {
210 
211         // Root device.
212         subDevice.Type = ClientOptionsType::SubDevice;
213         subDevice.SubDevice.Enabled = false;
214 
215         subDeviceIndex.Type = ClientOptionsType::SubDeviceIndex;
216         subDeviceIndex.SubDeviceIndex.Index = 0;
217 
218         subDeviceCount.Type = ClientOptionsType::SubDeviceCount;
219         subDeviceCount.SubDeviceCount.Count = std::max(deviceImp.getNEODevice()->getRootDevice()->getNumSubDevices(), 1u);
220 
221         workloadPartition.Type = ClientOptionsType::WorkloadPartition;
222         workloadPartition.WorkloadPartition.Enabled = false;
223 
224     } else {
225 
226         // Sub device.
227         subDevice.Type = ClientOptionsType::SubDevice;
228         subDevice.SubDevice.Enabled = true;
229 
230         subDeviceIndex.Type = ClientOptionsType::SubDeviceIndex;
231         subDeviceIndex.SubDeviceIndex.Index = static_cast<NEO::SubDevice *>(deviceImp.getNEODevice())->getSubDeviceIndex();
232 
233         subDeviceCount.Type = ClientOptionsType::SubDeviceCount;
234         subDeviceCount.SubDeviceCount.Count = std::max(deviceImp.getNEODevice()->getRootDevice()->getNumSubDevices(), 1u);
235 
236         workloadPartition.Type = ClientOptionsType::WorkloadPartition;
237         workloadPartition.WorkloadPartition.Enabled = isWorkloadPartitionEnabled;
238     }
239 }
240 
createContext()241 bool MetricsLibrary::createContext() {
242     auto &device = metricContext.getDevice();
243     const auto &hwHelper = device.getHwHelper();
244     const auto &asyncComputeEngines = hwHelper.getGpgpuEngineInstances(device.getHwInfo());
245     ContextCreateData_1_0 createData = {};
246     ClientOptionsData_1_0 clientOptions[6] = {};
247     ClientData_1_0 clientData = {};
248     ClientType_1_0 clientType = {};
249     ClientDataLinuxAdapter_1_0 adapter = {};
250 
251     // Check if compute command streamer is used.
252     auto asyncComputeEngine = std::find_if(asyncComputeEngines.begin(), asyncComputeEngines.end(), [&](const auto &engine) {
253         return engine.first == aub_stream::ENGINE_CCS;
254     });
255 
256     const auto &deviceImp = *static_cast<DeviceImp *>(&device);
257     const auto &commandStreamReceiver = *deviceImp.getNEODevice()->getDefaultEngine().commandStreamReceiver;
258     const auto engineType = commandStreamReceiver.getOsContext().getEngineType();
259     const bool isComputeUsed = NEO::EngineHelpers::isCcs(engineType);
260 
261     metricContext.setUseCompute(isComputeUsed);
262 
263     // Create metrics library context.
264     DEBUG_BREAK_IF(!contextCreateFunction);
265     clientType.Api = ClientApi::OneApi;
266     clientType.Gen = getGenType(device.getPlatformInfo());
267 
268     clientOptions[0].Type = ClientOptionsType::Compute;
269     clientOptions[0].Compute.Asynchronous = asyncComputeEngine != asyncComputeEngines.end();
270 
271     clientOptions[1].Type = ClientOptionsType::Tbs;
272     clientOptions[1].Tbs.Enabled = metricContext.getMetricStreamer() != nullptr;
273 
274     // Sub device client options #2
275     getSubDeviceClientOptions(clientOptions[2], clientOptions[3], clientOptions[4], clientOptions[5]);
276 
277     clientData.Linux.Adapter = &adapter;
278     clientData.ClientOptions = clientOptions;
279     clientData.ClientOptionsCount = sizeof(clientOptions) / sizeof(ClientOptionsData_1_0);
280 
281     createData.Api = &api;
282     createData.ClientCallbacks = &callbacks;
283     createData.ClientData = &clientData;
284 
285     const bool result =
286         getContextData(device, createData) &&
287         contextCreateFunction(clientType, &createData, &context) == StatusCode::Success;
288 
289     DEBUG_BREAK_IF(!result);
290     return result;
291 }
292 
getGenType(const uint32_t gen) const293 ClientGen MetricsLibrary::getGenType(const uint32_t gen) const {
294     auto &hwHelper = NEO::HwHelper::get(static_cast<GFXCORE_FAMILY>(gen));
295     return static_cast<MetricsLibraryApi::ClientGen>(hwHelper.getMetricsLibraryGenId());
296 }
297 
getGpuCommandsSize(CommandBufferData_1_0 & commandBuffer)298 uint32_t MetricsLibrary::getGpuCommandsSize(CommandBufferData_1_0 &commandBuffer) {
299     CommandBufferSize_1_0 commandBufferSize = {};
300 
301     bool result = isInitialized();
302 
303     // Validate metrics library initialization state.
304     if (result) {
305         commandBuffer.HandleContext = context;
306         result = api.CommandBufferGetSize(&commandBuffer, &commandBufferSize) == StatusCode::Success;
307     }
308 
309     DEBUG_BREAK_IF(!result);
310     return result ? commandBufferSize.GpuMemorySize : 0;
311 }
312 
getGpuCommands(CommandBufferData_1_0 & commandBuffer)313 bool MetricsLibrary::getGpuCommands(CommandBufferData_1_0 &commandBuffer) {
314 
315     // Obtain gpu commands from metrics library.
316     const bool result =
317         isInitialized() && (api.CommandBufferGet(&commandBuffer) == StatusCode::Success);
318     DEBUG_BREAK_IF(!result);
319     return result;
320 }
321 
getGpuCommands(CommandList & commandList,CommandBufferData_1_0 & commandBuffer)322 bool MetricsLibrary::getGpuCommands(CommandList &commandList,
323                                     CommandBufferData_1_0 &commandBuffer) {
324 
325     // Obtain required command buffer size.
326     commandBuffer.Size = getGpuCommandsSize(commandBuffer);
327 
328     // Validate gpu commands size.
329     if (!commandBuffer.Size) {
330         DEBUG_BREAK_IF(true);
331         return false;
332     }
333 
334     // Allocate command buffer.
335     auto stream = commandList.commandContainer.getCommandStream();
336     auto buffer = stream->getSpace(commandBuffer.Size);
337 
338     // Fill attached command buffer with gpu commands.
339     commandBuffer.Data = buffer;
340 
341     // Obtain gpu commands from metrics library.
342     const bool result =
343         isInitialized() && (api.CommandBufferGet(&commandBuffer) == StatusCode::Success);
344     DEBUG_BREAK_IF(!result);
345     return result;
346 }
347 
348 ConfigurationHandle_1_0
createConfiguration(const zet_metric_group_handle_t metricGroupHandle,const zet_metric_group_properties_t properties)349 MetricsLibrary::createConfiguration(const zet_metric_group_handle_t metricGroupHandle,
350                                     const zet_metric_group_properties_t properties) {
351     // Metric group internal data.
352     auto metricGroup = MetricGroup::fromHandle(metricGroupHandle);
353     auto metricGroupDummy = ConfigurationHandle_1_0{};
354     DEBUG_BREAK_IF(!metricGroup);
355 
356     // Metrics library configuration creation data.
357     ConfigurationHandle_1_0 handle = {};
358     ConfigurationCreateData_1_0 handleData = {};
359     handleData.HandleContext = context;
360     handleData.Type = ObjectType::ConfigurationHwCountersOa;
361 
362     // Check supported sampling types.
363     const bool validSampling =
364         properties.samplingType == ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED ||
365         properties.samplingType == ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_TIME_BASED;
366 
367     // Activate metric group through metrics discovery to send metric group
368     // configuration to kernel driver.
369     const bool validActivate = isInitialized() && validSampling && metricGroup->activate();
370 
371     if (validActivate) {
372         // Use metrics library to create configuration for the activated metric group.
373         api.ConfigurationCreate(&handleData, &handle);
374 
375         // Use metrics discovery to deactivate metric group.
376         metricGroup->deactivate();
377     }
378 
379     return validActivate ? handle : metricGroupDummy;
380 }
381 
getConfiguration(zet_metric_group_handle_t handle)382 ConfigurationHandle_1_0 MetricsLibrary::getConfiguration(zet_metric_group_handle_t handle) {
383 
384     auto iter = configurations.find(handle);
385     auto configuration = (iter != end(configurations)) ? iter->second : addConfiguration(handle);
386 
387     DEBUG_BREAK_IF(!configuration.IsValid());
388     return configuration;
389 }
390 
addConfiguration(zet_metric_group_handle_t handle)391 ConfigurationHandle_1_0 MetricsLibrary::addConfiguration(zet_metric_group_handle_t handle) {
392     ConfigurationHandle_1_0 libraryHandle = {};
393     DEBUG_BREAK_IF(!handle);
394 
395     // Create metrics library configuration.
396     auto metricGroup = MetricGroup::fromHandle(handle);
397     auto properties = MetricGroup::getProperties(handle);
398     auto configuration = createConfiguration(metricGroup, properties);
399 
400     // Cache configuration if valid.
401     if (configuration.IsValid()) {
402         libraryHandle = configuration;
403         cacheConfiguration(handle, libraryHandle);
404     }
405 
406     DEBUG_BREAK_IF(!libraryHandle.IsValid());
407     return libraryHandle;
408 }
409 
deleteAllConfigurations()410 void MetricsLibrary::deleteAllConfigurations() {
411 
412     if (api.ConfigurationDelete) {
413         for (auto &configuration : configurations) {
414             if (configuration.second.IsValid()) {
415                 api.ConfigurationDelete(configuration.second);
416             }
417         }
418     }
419 
420     configurations.clear();
421 }
422 
metricQueryPoolCreate(zet_context_handle_t hContext,zet_device_handle_t hDevice,zet_metric_group_handle_t hMetricGroup,const zet_metric_query_pool_desc_t * pDesc,zet_metric_query_pool_handle_t * phMetricQueryPool)423 ze_result_t metricQueryPoolCreate(zet_context_handle_t hContext, zet_device_handle_t hDevice, zet_metric_group_handle_t hMetricGroup,
424                                   const zet_metric_query_pool_desc_t *pDesc, zet_metric_query_pool_handle_t *phMetricQueryPool) {
425 
426     auto device = Device::fromHandle(hDevice);
427     auto &metricContext = device->getMetricContext();
428 
429     // Metric query cannot be used with streamer simultaneously
430     // (due to oa buffer usage constraints).
431     if (metricContext.getMetricStreamer() != nullptr) {
432         return ZE_RESULT_ERROR_NOT_AVAILABLE;
433     }
434 
435     const auto &deviceImp = *static_cast<DeviceImp *>(device);
436     auto metricPoolImp = new MetricQueryPoolImp(device->getMetricContext(), hMetricGroup, *pDesc);
437 
438     if (metricContext.isImplicitScalingCapable()) {
439 
440         auto emptyMetricGroups = std::vector<zet_metric_group_handle_t>();
441         auto &metricGroups = hMetricGroup
442                                  ? static_cast<MetricGroupImp *>(MetricGroup::fromHandle(hMetricGroup))->getMetricGroups()
443                                  : emptyMetricGroups;
444 
445         const bool useMetricGroupSubDevice = metricGroups.size() > 0;
446 
447         auto &metricPools = metricPoolImp->getMetricQueryPools();
448 
449         for (size_t i = 0; i < deviceImp.numSubDevices; ++i) {
450 
451             auto &subDevice = deviceImp.subDevices[i];
452             auto &subDeviceMetricContext = subDevice->getMetricContext();
453 
454             subDeviceMetricContext.getMetricsLibrary().enableWorkloadPartition();
455 
456             zet_metric_group_handle_t metricGroupHandle = useMetricGroupSubDevice
457                                                               ? metricGroups[subDeviceMetricContext.getSubDeviceIndex()]
458                                                               : hMetricGroup;
459 
460             auto metricPoolSubdeviceImp = new MetricQueryPoolImp(subDeviceMetricContext, metricGroupHandle, *pDesc);
461 
462             // Create metric query pool.
463             if (!metricPoolSubdeviceImp->create()) {
464                 metricPoolSubdeviceImp->destroy();
465                 metricPoolImp->destroy();
466                 metricPoolSubdeviceImp = nullptr;
467                 metricPoolImp = nullptr;
468                 *phMetricQueryPool = nullptr;
469                 return ZE_RESULT_ERROR_INVALID_ARGUMENT;
470             }
471 
472             metricPools.push_back(metricPoolSubdeviceImp);
473         }
474 
475     } else {
476 
477         // Create metric query pool.
478         if (!metricPoolImp->create()) {
479             metricPoolImp->destroy();
480             metricPoolImp = nullptr;
481             *phMetricQueryPool = nullptr;
482             return ZE_RESULT_ERROR_INVALID_ARGUMENT;
483         }
484     }
485 
486     // Allocate gpu memory.
487     if (!metricPoolImp->allocateGpuMemory()) {
488         metricPoolImp->destroy();
489         metricPoolImp = nullptr;
490         *phMetricQueryPool = nullptr;
491         return ZE_RESULT_ERROR_INVALID_ARGUMENT;
492     }
493 
494     *phMetricQueryPool = metricPoolImp;
495 
496     return ZE_RESULT_SUCCESS;
497 }
498 
MetricQueryPoolImp(MetricContext & metricContextInput,zet_metric_group_handle_t hEventMetricGroupInput,const zet_metric_query_pool_desc_t & poolDescription)499 MetricQueryPoolImp::MetricQueryPoolImp(MetricContext &metricContextInput,
500                                        zet_metric_group_handle_t hEventMetricGroupInput,
501                                        const zet_metric_query_pool_desc_t &poolDescription)
502     : metricContext(metricContextInput), metricsLibrary(metricContext.getMetricsLibrary()),
503       description(poolDescription),
504       hMetricGroup(hEventMetricGroupInput) {}
505 
create()506 bool MetricQueryPoolImp::create() {
507     switch (description.type) {
508     case ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE:
509         return createMetricQueryPool();
510     case ZET_METRIC_QUERY_POOL_TYPE_EXECUTION:
511         return createSkipExecutionQueryPool();
512     default:
513         DEBUG_BREAK_IF(true);
514         return false;
515     }
516 }
517 
destroy()518 ze_result_t MetricQueryPoolImp::destroy() {
519     switch (description.type) {
520     case ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE:
521         if (metricQueryPools.size() > 0) {
522             for (auto &metricQueryPool : metricQueryPools) {
523                 MetricQueryPool::fromHandle(metricQueryPool)->destroy();
524             }
525         }
526         if (query.IsValid()) {
527             metricsLibrary.destroyMetricQuery(query);
528         }
529         if (pAllocation) {
530             metricContext.getDevice().getDriverHandle()->getMemoryManager()->freeGraphicsMemory(pAllocation);
531         }
532         break;
533     case ZET_METRIC_QUERY_POOL_TYPE_EXECUTION:
534         for (auto &metricQueryPool : metricQueryPools) {
535             MetricQueryPool::fromHandle(metricQueryPool)->destroy();
536         }
537         break;
538     default:
539         DEBUG_BREAK_IF(true);
540         break;
541     }
542 
543     // Check open queries.
544     if (metricContext.getMetricsLibrary().getMetricQueryCount() == 0) {
545         if (!metricContext.isMetricGroupActivated()) {
546             metricContext.getMetricsLibrary().release();
547         }
548     }
549 
550     delete this;
551 
552     return ZE_RESULT_SUCCESS;
553 }
554 
allocateGpuMemory()555 bool MetricQueryPoolImp::allocateGpuMemory() {
556 
557     if (description.type == ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE) {
558         // Get allocation size.
559         const auto &deviceImp = *static_cast<DeviceImp *>(&metricContext.getDevice());
560         allocationSize = (metricContext.isImplicitScalingCapable())
561                              ? deviceImp.subDevices[0]->getMetricContext().getMetricsLibrary().getQueryReportGpuSize() * description.count * deviceImp.numSubDevices
562                              : metricsLibrary.getQueryReportGpuSize() * description.count;
563 
564         if (allocationSize == 0) {
565             return false;
566         }
567 
568         // Allocate gpu memory.
569         NEO::AllocationProperties properties(
570             metricContext.getDevice().getRootDeviceIndex(), allocationSize, NEO::GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY, metricContext.getDevice().getNEODevice()->getDeviceBitfield());
571         properties.alignment = 64u;
572         pAllocation = metricContext.getDevice().getDriverHandle()->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties);
573 
574         UNRECOVERABLE_IF(pAllocation == nullptr);
575 
576         // Clear allocation.
577         memset(pAllocation->getUnderlyingBuffer(), 0, allocationSize);
578     }
579     return true;
580 }
581 
createMetricQueryPool()582 bool MetricQueryPoolImp::createMetricQueryPool() {
583     // Validate metric group query - only event based is supported.
584     auto metricGroupProperites = MetricGroup::getProperties(hMetricGroup);
585     const bool validMetricGroup = metricGroupProperites.samplingType == ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED;
586 
587     if (!validMetricGroup) {
588         return false;
589     }
590 
591     // Pool initialization.
592     pool.reserve(description.count);
593     for (uint32_t i = 0; i < description.count; ++i) {
594         pool.push_back({metricContext, *this, i});
595     }
596 
597     // Metrics library query object initialization.
598     return metricsLibrary.createMetricQuery(description.count, query, pAllocation);
599 }
600 
createSkipExecutionQueryPool()601 bool MetricQueryPoolImp::createSkipExecutionQueryPool() {
602 
603     pool.reserve(description.count);
604     for (uint32_t i = 0; i < description.count; ++i) {
605         pool.push_back({metricContext, *this, i});
606     }
607 
608     return true;
609 }
610 
fromHandle(zet_metric_query_pool_handle_t handle)611 MetricQueryPool *MetricQueryPool::fromHandle(zet_metric_query_pool_handle_t handle) {
612     return static_cast<MetricQueryPool *>(handle);
613 }
614 
toHandle()615 zet_metric_query_pool_handle_t MetricQueryPool::toHandle() { return this; }
616 
createMetricQuery(uint32_t index,zet_metric_query_handle_t * phMetricQuery)617 ze_result_t MetricQueryPoolImp::createMetricQuery(uint32_t index,
618                                                   zet_metric_query_handle_t *phMetricQuery) {
619 
620     if (index >= description.count) {
621         return ZE_RESULT_ERROR_INVALID_ARGUMENT;
622     }
623 
624     if (metricQueryPools.size() > 0) {
625 
626         auto pMetricQueryImp = new MetricQueryImp(metricContext, *this, index);
627 
628         for (auto metricQueryPoolHandle : metricQueryPools) {
629             auto &metricQueries = pMetricQueryImp->getMetricQueries();
630             auto metricQueryPoolImp = static_cast<MetricQueryPoolImp *>(MetricQueryPool::fromHandle(metricQueryPoolHandle));
631             metricQueries.push_back(&metricQueryPoolImp->pool[index]);
632         }
633 
634         *phMetricQuery = pMetricQueryImp;
635 
636         return ZE_RESULT_SUCCESS;
637 
638     } else {
639 
640         *phMetricQuery = &(pool[index]);
641 
642         return ZE_RESULT_SUCCESS;
643     }
644 }
645 
getMetricQueryPools()646 std::vector<zet_metric_query_pool_handle_t> &MetricQueryPoolImp::getMetricQueryPools() {
647     return metricQueryPools;
648 }
649 
MetricQueryImp(MetricContext & metricContextInput,MetricQueryPoolImp & poolInput,const uint32_t slotInput)650 MetricQueryImp::MetricQueryImp(MetricContext &metricContextInput, MetricQueryPoolImp &poolInput,
651                                const uint32_t slotInput)
652     : metricContext(metricContextInput), metricsLibrary(metricContext.getMetricsLibrary()),
653       pool(poolInput), slot(slotInput) {}
654 
appendBegin(CommandList & commandList)655 ze_result_t MetricQueryImp::appendBegin(CommandList &commandList) {
656     switch (pool.description.type) {
657     case ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE:
658         return writeMetricQuery(commandList, nullptr, 0, nullptr, true);
659     case ZET_METRIC_QUERY_POOL_TYPE_EXECUTION:
660         return writeSkipExecutionQuery(commandList, nullptr, 0, nullptr, true);
661     default:
662         DEBUG_BREAK_IF(true);
663         return ZE_RESULT_ERROR_INVALID_ARGUMENT;
664     }
665 }
666 
appendEnd(CommandList & commandList,ze_event_handle_t hSignalEvent,uint32_t numWaitEvents,ze_event_handle_t * phWaitEvents)667 ze_result_t MetricQueryImp::appendEnd(CommandList &commandList, ze_event_handle_t hSignalEvent,
668                                       uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) {
669     switch (pool.description.type) {
670     case ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE:
671         return writeMetricQuery(commandList, hSignalEvent, numWaitEvents, phWaitEvents, false);
672     case ZET_METRIC_QUERY_POOL_TYPE_EXECUTION:
673         return writeSkipExecutionQuery(commandList, hSignalEvent, numWaitEvents, phWaitEvents, false);
674     default:
675         DEBUG_BREAK_IF(true);
676         return ZE_RESULT_ERROR_INVALID_ARGUMENT;
677     }
678 }
679 
getData(size_t * pRawDataSize,uint8_t * pRawData)680 ze_result_t MetricQueryImp::getData(size_t *pRawDataSize, uint8_t *pRawData) {
681 
682     const bool calculateSizeOnly = *pRawDataSize == 0;
683     const size_t metricQueriesSize = metricQueries.size();
684     bool result = true;
685 
686     if (metricQueriesSize > 0) {
687 
688         if (calculateSizeOnly) {
689 
690             const size_t headerSize = sizeof(MetricGroupCalculateHeader);
691             const size_t rawDataOffsetsRequiredSize = sizeof(uint32_t) * metricQueriesSize;
692             const size_t rawDataSizesRequiredSize = sizeof(uint32_t) * metricQueriesSize;
693 
694             auto pMetricQueryImp = static_cast<MetricQueryImp *>(MetricQuery::fromHandle(metricQueries[0]));
695             result = pMetricQueryImp->metricsLibrary.getMetricQueryReportSize(*pRawDataSize);
696 
697             const size_t rawDataRequiredSize = *pRawDataSize * metricQueriesSize;
698 
699             *pRawDataSize = headerSize + rawDataOffsetsRequiredSize + rawDataSizesRequiredSize + rawDataRequiredSize;
700 
701         } else {
702 
703             MetricGroupCalculateHeader *pRawDataHeader = reinterpret_cast<MetricGroupCalculateHeader *>(pRawData);
704             pRawDataHeader->magic = MetricGroupCalculateHeader::magicValue;
705             pRawDataHeader->dataCount = static_cast<uint32_t>(metricQueriesSize);
706 
707             // Relative offsets in the header allow to move/copy the buffer.
708             pRawDataHeader->rawDataOffsets = sizeof(MetricGroupCalculateHeader);
709             pRawDataHeader->rawDataSizes = static_cast<uint32_t>(pRawDataHeader->rawDataOffsets + (sizeof(uint32_t) * metricQueriesSize));
710             pRawDataHeader->rawDataOffset = static_cast<uint32_t>(pRawDataHeader->rawDataSizes + (sizeof(uint32_t) * metricQueriesSize));
711 
712             const size_t sizePerSubDevice = (*pRawDataSize - pRawDataHeader->rawDataOffset) / metricQueriesSize;
713             DEBUG_BREAK_IF(sizePerSubDevice == 0);
714             *pRawDataSize = pRawDataHeader->rawDataOffset;
715 
716             uint32_t *pRawDataOffsetsUnpacked = reinterpret_cast<uint32_t *>(pRawData + pRawDataHeader->rawDataOffsets);
717             uint32_t *pRawDataSizesUnpacked = reinterpret_cast<uint32_t *>(pRawData + pRawDataHeader->rawDataSizes);
718             uint8_t *pRawDataUnpacked = reinterpret_cast<uint8_t *>(pRawData + pRawDataHeader->rawDataOffset);
719 
720             for (size_t i = 0; i < metricQueriesSize; ++i) {
721 
722                 size_t getDataSize = sizePerSubDevice;
723                 const uint32_t rawDataOffset = (i != 0) ? (pRawDataSizesUnpacked[i - 1] + pRawDataOffsetsUnpacked[i - 1]) : 0;
724                 auto pMetricQuery = MetricQuery::fromHandle(metricQueries[i]);
725                 ze_result_t tmpResult = pMetricQuery->getData(&getDataSize, pRawDataUnpacked + rawDataOffset);
726                 // Return at first error.
727                 if (tmpResult != ZE_RESULT_SUCCESS) {
728                     return tmpResult;
729                 }
730                 pRawDataSizesUnpacked[i] = static_cast<uint32_t>(getDataSize);
731                 pRawDataOffsetsUnpacked[i] = (i != 0) ? pRawDataOffsetsUnpacked[i - 1] + pRawDataSizesUnpacked[i] : 0;
732                 *pRawDataSize += getDataSize;
733             }
734         }
735 
736     } else {
737         result = calculateSizeOnly
738                      ? metricsLibrary.getMetricQueryReportSize(*pRawDataSize)
739                      : metricsLibrary.getMetricQueryReport(pool.query, slot, *pRawDataSize, pRawData);
740     }
741 
742     return result
743                ? ZE_RESULT_SUCCESS
744                : ZE_RESULT_ERROR_UNKNOWN;
745 }
746 
reset()747 ze_result_t MetricQueryImp::reset() {
748     return ZE_RESULT_SUCCESS;
749 }
750 
destroy()751 ze_result_t MetricQueryImp::destroy() {
752 
753     if (metricQueries.size() > 0) {
754         delete this;
755     }
756 
757     return ZE_RESULT_SUCCESS;
758 }
759 
getMetricQueries()760 std::vector<zet_metric_query_handle_t> &MetricQueryImp::getMetricQueries() {
761     return metricQueries;
762 }
763 
writeMetricQuery(CommandList & commandList,ze_event_handle_t hSignalEvent,uint32_t numWaitEvents,ze_event_handle_t * phWaitEvents,const bool begin)764 ze_result_t MetricQueryImp::writeMetricQuery(CommandList &commandList, ze_event_handle_t hSignalEvent,
765                                              uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents,
766                                              const bool begin) {
767 
768     bool result = true;
769     const bool writeCompletionEvent = hSignalEvent && !begin;
770     const size_t metricQueriesSize = metricQueries.size();
771 
772     // Make gpu allocation visible.
773     commandList.commandContainer.addToResidencyContainer(pool.pAllocation);
774 
775     // Wait for events before executing query.
776     commandList.appendWaitOnEvents(numWaitEvents, phWaitEvents);
777 
778     if (metricQueriesSize) {
779 
780         const size_t allocationSizeForSubDevice = pool.allocationSize / metricQueriesSize;
781         static_cast<CommandListImp &>(commandList).appendMultiPartitionPrologue(static_cast<uint32_t>(allocationSizeForSubDevice));
782         void *buffer = nullptr;
783         bool gpuCommandStatus = true;
784 
785         // Revert iteration to be ensured that the last set of gpu commands overwrite the previous written sets of gpu commands,
786         // so only one of the sub-device contexts will be used to append to command list.
787         for (int32_t i = static_cast<int32_t>(metricQueriesSize - 1); i >= 0; --i) {
788 
789             // Adjust cpu and gpu addresses for each sub-device's query object.
790             uint64_t gpuAddress = pool.pAllocation->getGpuAddress() + (i * allocationSizeForSubDevice);
791             uint8_t *cpuAddress = static_cast<uint8_t *>(pool.pAllocation->getUnderlyingBuffer()) + (i * allocationSizeForSubDevice);
792 
793             auto &metricQueryImp = *static_cast<MetricQueryImp *>(MetricQuery::fromHandle(metricQueries[i]));
794             auto &metricLibrarySubDevice = metricQueryImp.metricsLibrary;
795             auto &metricContextSubDevice = metricQueryImp.metricContext;
796 
797             // Obtain gpu commands.
798             CommandBufferData_1_0 commandBuffer = {};
799             commandBuffer.CommandsType = ObjectType::QueryHwCounters;
800             commandBuffer.QueryHwCounters.Handle = metricQueryImp.pool.query;
801             commandBuffer.QueryHwCounters.Begin = begin;
802             commandBuffer.QueryHwCounters.Slot = slot;
803             commandBuffer.Allocation.GpuAddress = gpuAddress;
804             commandBuffer.Allocation.CpuAddress = cpuAddress;
805             commandBuffer.Type = metricContextSubDevice.isComputeUsed()
806                                      ? GpuCommandBufferType::Compute
807                                      : GpuCommandBufferType::Render;
808 
809             // Obtain required command buffer size.
810             commandBuffer.Size = metricLibrarySubDevice.getGpuCommandsSize(commandBuffer);
811 
812             // Validate gpu commands size.
813             if (!commandBuffer.Size) {
814                 return ZE_RESULT_ERROR_UNKNOWN;
815             }
816 
817             // Allocate command buffer only once.
818             if (buffer == nullptr) {
819                 auto stream = commandList.commandContainer.getCommandStream();
820                 buffer = stream->getSpace(commandBuffer.Size);
821             }
822 
823             // Fill attached command buffer with gpu commands.
824             commandBuffer.Data = buffer;
825 
826             // Obtain gpu commands from metrics library for each sub-device to update cpu and gpu addresses for
827             // each query object in metrics library, so that get data works properly.
828             gpuCommandStatus = metricLibrarySubDevice.getGpuCommands(commandBuffer);
829             if (!gpuCommandStatus) {
830                 break;
831             }
832         }
833         static_cast<CommandListImp &>(commandList).appendMultiPartitionEpilogue();
834         if (!gpuCommandStatus) {
835             return ZE_RESULT_ERROR_UNKNOWN;
836         }
837 
838         // Write gpu commands for sub device index 0.
839     } else {
840         // Obtain gpu commands.
841         CommandBufferData_1_0 commandBuffer = {};
842         commandBuffer.CommandsType = ObjectType::QueryHwCounters;
843         commandBuffer.QueryHwCounters.Handle = pool.query;
844         commandBuffer.QueryHwCounters.Begin = begin;
845         commandBuffer.QueryHwCounters.Slot = slot;
846         commandBuffer.Allocation.GpuAddress = pool.pAllocation->getGpuAddress();
847         commandBuffer.Allocation.CpuAddress = pool.pAllocation->getUnderlyingBuffer();
848         commandBuffer.Type = metricContext.isComputeUsed()
849                                  ? GpuCommandBufferType::Compute
850                                  : GpuCommandBufferType::Render;
851 
852         // Get query commands.
853         result = metricsLibrary.getGpuCommands(commandList, commandBuffer);
854     }
855 
856     // Write completion event.
857     if (result && writeCompletionEvent) {
858         result = commandList.appendSignalEvent(hSignalEvent) == ZE_RESULT_SUCCESS;
859     }
860 
861     return result ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_UNKNOWN;
862 }
863 
writeSkipExecutionQuery(CommandList & commandList,ze_event_handle_t hSignalEvent,uint32_t numWaitEvents,ze_event_handle_t * phWaitEvents,const bool begin)864 ze_result_t MetricQueryImp::writeSkipExecutionQuery(CommandList &commandList, ze_event_handle_t hSignalEvent,
865                                                     uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents,
866                                                     const bool begin) {
867 
868     bool writeCompletionEvent = hSignalEvent && !begin;
869     bool result = false;
870 
871     // Obtain gpu commands.
872     CommandBufferData_1_0 commandBuffer = {};
873     commandBuffer.CommandsType = ObjectType::OverrideNullHardware;
874     commandBuffer.Override.Enable = begin;
875     commandBuffer.Type = metricContext.isComputeUsed()
876                              ? GpuCommandBufferType::Compute
877                              : GpuCommandBufferType::Render;
878 
879     // Wait for events before executing query.
880     zeCommandListAppendWaitOnEvents(commandList.toHandle(), numWaitEvents, phWaitEvents);
881 
882     // Get query commands.
883     result = metricsLibrary.getGpuCommands(commandList, commandBuffer);
884 
885     // Write completion event.
886     if (result && writeCompletionEvent) {
887         result = zeCommandListAppendSignalEvent(commandList.toHandle(), hSignalEvent) ==
888                  ZE_RESULT_SUCCESS;
889     }
890 
891     return result ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_UNKNOWN;
892 }
893 
appendMemoryBarrier(CommandList & commandList)894 ze_result_t MetricQuery::appendMemoryBarrier(CommandList &commandList) {
895 
896     DeviceImp *pDeviceImp = static_cast<DeviceImp *>(commandList.device);
897 
898     if (pDeviceImp->metricContext->isImplicitScalingCapable()) {
899         // Use one of the sub-device contexts to append to command list.
900         pDeviceImp = static_cast<DeviceImp *>(pDeviceImp->subDevices[0]);
901     }
902 
903     auto &metricContext = pDeviceImp->getMetricContext();
904     auto &metricsLibrary = metricContext.getMetricsLibrary();
905 
906     // Obtain gpu commands.
907     CommandBufferData_1_0 commandBuffer = {};
908     commandBuffer.CommandsType = ObjectType::OverrideFlushCaches;
909     commandBuffer.Override.Enable = true;
910     commandBuffer.Type = metricContext.isComputeUsed()
911                              ? GpuCommandBufferType::Compute
912                              : GpuCommandBufferType::Render;
913 
914     return metricsLibrary.getGpuCommands(commandList, commandBuffer) ? ZE_RESULT_SUCCESS
915                                                                      : ZE_RESULT_ERROR_UNKNOWN;
916 }
917 
appendStreamerMarker(CommandList & commandList,zet_metric_streamer_handle_t hMetricStreamer,uint32_t value)918 ze_result_t MetricQuery::appendStreamerMarker(CommandList &commandList,
919                                               zet_metric_streamer_handle_t hMetricStreamer,
920                                               uint32_t value) {
921 
922     DeviceImp *pDeviceImp = static_cast<DeviceImp *>(commandList.device);
923 
924     if (pDeviceImp->metricContext->isImplicitScalingCapable()) {
925         // Use one of the sub-device contexts to append to command list.
926         pDeviceImp = static_cast<DeviceImp *>(pDeviceImp->subDevices[0]);
927         pDeviceImp->metricContext->getMetricsLibrary().enableWorkloadPartition();
928     }
929     auto &metricContext = pDeviceImp->getMetricContext();
930     auto &metricsLibrary = metricContext.getMetricsLibrary();
931 
932     const uint32_t streamerMarkerHighBitsShift = 25;
933 
934     // Obtain gpu commands.
935     CommandBufferData_1_0 commandBuffer = {};
936     commandBuffer.CommandsType = ObjectType::MarkerStreamUser;
937     commandBuffer.MarkerStreamUser.Value = value;
938     commandBuffer.MarkerStreamUser.Reserved = (value >> streamerMarkerHighBitsShift);
939     commandBuffer.Type = metricContext.isComputeUsed()
940                              ? GpuCommandBufferType::Compute
941                              : GpuCommandBufferType::Render;
942 
943     return metricsLibrary.getGpuCommands(commandList, commandBuffer) ? ZE_RESULT_SUCCESS
944                                                                      : ZE_RESULT_ERROR_UNKNOWN;
945 }
946 
fromHandle(zet_metric_query_handle_t handle)947 MetricQuery *MetricQuery::fromHandle(zet_metric_query_handle_t handle) {
948     return static_cast<MetricQuery *>(handle);
949 }
950 
toHandle()951 zet_metric_query_handle_t MetricQuery::toHandle() { return this; }
952 
953 } // namespace L0
954