1 /*
2 * Copyright (C) 2020-2021 Intel Corporation
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 */
7
8 #include "level_zero/tools/source/metrics/metric_query_imp.h"
9
10 #include "shared/source/command_stream/command_stream_receiver.h"
11 #include "shared/source/device/device.h"
12 #include "shared/source/helpers/engine_node_helper.h"
13 #include "shared/source/memory_manager/allocation_properties.h"
14 #include "shared/source/memory_manager/memory_manager.h"
15 #include "shared/source/os_interface/os_context.h"
16 #include "shared/source/os_interface/os_library.h"
17
18 #include "level_zero/core/source/cmdlist/cmdlist.h"
19 #include "level_zero/core/source/cmdlist/cmdlist_imp.h"
20 #include "level_zero/core/source/device/device.h"
21 #include "level_zero/core/source/device/device_imp.h"
22 #include "level_zero/tools/source/metrics/metric_enumeration_imp.h"
23
24 using namespace MetricsLibraryApi;
25
26 namespace L0 {
27
MetricsLibrary(MetricContext & metricContextInput)28 MetricsLibrary::MetricsLibrary(MetricContext &metricContextInput)
29 : metricContext(metricContextInput) {}
30
~MetricsLibrary()31 MetricsLibrary::~MetricsLibrary() {
32 release();
33 }
34
getInitializationState()35 ze_result_t MetricsLibrary::getInitializationState() {
36 return initializationState;
37 }
38
isInitialized()39 bool MetricsLibrary::isInitialized() {
40 // Try to initialize metrics library only once.
41 if (initializationState == ZE_RESULT_ERROR_UNINITIALIZED) {
42 initialize();
43 }
44
45 return initializationState == ZE_RESULT_SUCCESS;
46 }
47
getQueryReportGpuSize()48 uint32_t MetricsLibrary::getQueryReportGpuSize() {
49
50 TypedValue_1_0 gpuReportSize = {};
51
52 // Obtain gpu report size.
53 if (!isInitialized() ||
54 api.GetParameter(ParameterType::QueryHwCountersReportGpuSize, &gpuReportSize.Type, &gpuReportSize) != StatusCode::Success) {
55
56 DEBUG_BREAK_IF(true);
57 return 0;
58 }
59
60 // Validate gpu report size.
61 if (!gpuReportSize.ValueUInt32) {
62 DEBUG_BREAK_IF(true);
63 return 0;
64 }
65
66 return gpuReportSize.ValueUInt32;
67 }
68
createMetricQuery(const uint32_t slotsCount,QueryHandle_1_0 & query,NEO::GraphicsAllocation * & pAllocation)69 bool MetricsLibrary::createMetricQuery(const uint32_t slotsCount, QueryHandle_1_0 &query,
70 NEO::GraphicsAllocation *&pAllocation) {
71
72 std::lock_guard<std::mutex> lock(mutex);
73
74 // Validate metrics library state.
75 if (!isInitialized()) {
76 DEBUG_BREAK_IF(true);
77 return false;
78 }
79
80 QueryCreateData_1_0 queryData = {};
81 queryData.HandleContext = context;
82 queryData.Type = ObjectType::QueryHwCounters;
83 queryData.Slots = slotsCount;
84
85 // Create query pool within metrics library.
86 if (api.QueryCreate(&queryData, &query) != StatusCode::Success) {
87 DEBUG_BREAK_IF(true);
88 return false;
89 }
90
91 // Register created query.
92 queries.push_back(query);
93
94 return true;
95 }
96
getMetricQueryCount()97 uint32_t MetricsLibrary::getMetricQueryCount() {
98 std::lock_guard<std::mutex> lock(mutex);
99 return static_cast<uint32_t>(queries.size());
100 }
101
destroyMetricQuery(QueryHandle_1_0 & query)102 bool MetricsLibrary::destroyMetricQuery(QueryHandle_1_0 &query) {
103 std::lock_guard<std::mutex> lock(mutex);
104 DEBUG_BREAK_IF(!query.IsValid());
105
106 const bool result = isInitialized() && (api.QueryDelete(query) == StatusCode::Success);
107 auto iter = std::find_if(queries.begin(), queries.end(), [&](const QueryHandle_1_0 &element) { return element.data == query.data; });
108
109 // Unregister query.
110 if (iter != queries.end()) {
111 queries.erase(iter);
112 }
113
114 return result;
115 }
116
getMetricQueryReportSize(size_t & rawDataSize)117 bool MetricsLibrary::getMetricQueryReportSize(size_t &rawDataSize) {
118 ValueType valueType = ValueType::Last;
119 TypedValue_1_0 value = {};
120
121 const bool result = isInitialized() && (api.GetParameter(ParameterType::QueryHwCountersReportApiSize, &valueType, &value) == StatusCode::Success);
122 rawDataSize = static_cast<size_t>(value.ValueUInt32);
123 DEBUG_BREAK_IF(!result);
124 return result;
125 }
126
getMetricQueryReport(QueryHandle_1_0 & query,const uint32_t slot,const size_t rawDataSize,uint8_t * pData)127 bool MetricsLibrary::getMetricQueryReport(QueryHandle_1_0 &query, const uint32_t slot,
128 const size_t rawDataSize, uint8_t *pData) {
129
130 GetReportData_1_0 report = {};
131 report.Type = ObjectType::QueryHwCounters;
132 report.Query.Handle = query;
133 report.Query.Slot = slot;
134 report.Query.SlotsCount = 1;
135 report.Query.Data = pData;
136 report.Query.DataSize = static_cast<uint32_t>(rawDataSize);
137
138 const bool result = isInitialized() && (api.GetData(&report) == StatusCode::Success);
139 DEBUG_BREAK_IF(!result);
140 return result;
141 }
142
initialize()143 void MetricsLibrary::initialize() {
144 auto &metricsEnumeration = metricContext.getMetricEnumeration();
145
146 // Function should be called only once.
147 DEBUG_BREAK_IF(initializationState != ZE_RESULT_ERROR_UNINITIALIZED);
148
149 // Metrics Enumeration needs to be initialized before Metrics Library
150 const bool validMetricsEnumeration = metricsEnumeration.isInitialized();
151 const bool validMetricsLibrary = validMetricsEnumeration && handle && createContext();
152
153 // Load metrics library and exported functions.
154 initializationState = validMetricsLibrary ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_UNKNOWN;
155 DEBUG_BREAK_IF(initializationState != ZE_RESULT_SUCCESS);
156 }
157
release()158 void MetricsLibrary::release() {
159
160 // Delete metric group configurations.
161 deleteAllConfigurations();
162
163 // Destroy context.
164 if (context.IsValid() && contextDeleteFunction) {
165 contextDeleteFunction(context);
166 }
167
168 // Reset metric query state to not initialized.
169 api = {};
170 callbacks = {};
171 context = {};
172 isWorkloadPartitionEnabled = false;
173 initializationState = ZE_RESULT_ERROR_UNINITIALIZED;
174 }
175
load()176 bool MetricsLibrary::load() {
177 // Load library.
178 handle = NEO::OsLibrary::load(getFilename());
179
180 // Load exported functions.
181 if (handle) {
182 contextCreateFunction = reinterpret_cast<ContextCreateFunction_1_0>(
183 handle->getProcAddress(METRICS_LIBRARY_CONTEXT_CREATE_1_0));
184 contextDeleteFunction = reinterpret_cast<ContextDeleteFunction_1_0>(
185 handle->getProcAddress(METRICS_LIBRARY_CONTEXT_DELETE_1_0));
186 }
187
188 if (contextCreateFunction == nullptr || contextDeleteFunction == nullptr) {
189 PRINT_DEBUG_STRING(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "cannot load %s exported functions\n", MetricsLibrary::getFilename());
190 return false;
191 }
192
193 // Return success if exported functions have been loaded.
194 return true;
195 }
196
enableWorkloadPartition()197 void MetricsLibrary::enableWorkloadPartition() {
198 isWorkloadPartitionEnabled = true;
199 }
200
getSubDeviceClientOptions(ClientOptionsData_1_0 & subDevice,ClientOptionsData_1_0 & subDeviceIndex,ClientOptionsData_1_0 & subDeviceCount,ClientOptionsData_1_0 & workloadPartition)201 void MetricsLibrary::getSubDeviceClientOptions(
202 ClientOptionsData_1_0 &subDevice,
203 ClientOptionsData_1_0 &subDeviceIndex,
204 ClientOptionsData_1_0 &subDeviceCount,
205 ClientOptionsData_1_0 &workloadPartition) {
206
207 const auto &deviceImp = *static_cast<DeviceImp *>(&metricContext.getDevice());
208
209 if (!deviceImp.isSubdevice) {
210
211 // Root device.
212 subDevice.Type = ClientOptionsType::SubDevice;
213 subDevice.SubDevice.Enabled = false;
214
215 subDeviceIndex.Type = ClientOptionsType::SubDeviceIndex;
216 subDeviceIndex.SubDeviceIndex.Index = 0;
217
218 subDeviceCount.Type = ClientOptionsType::SubDeviceCount;
219 subDeviceCount.SubDeviceCount.Count = std::max(deviceImp.getNEODevice()->getRootDevice()->getNumSubDevices(), 1u);
220
221 workloadPartition.Type = ClientOptionsType::WorkloadPartition;
222 workloadPartition.WorkloadPartition.Enabled = false;
223
224 } else {
225
226 // Sub device.
227 subDevice.Type = ClientOptionsType::SubDevice;
228 subDevice.SubDevice.Enabled = true;
229
230 subDeviceIndex.Type = ClientOptionsType::SubDeviceIndex;
231 subDeviceIndex.SubDeviceIndex.Index = static_cast<NEO::SubDevice *>(deviceImp.getNEODevice())->getSubDeviceIndex();
232
233 subDeviceCount.Type = ClientOptionsType::SubDeviceCount;
234 subDeviceCount.SubDeviceCount.Count = std::max(deviceImp.getNEODevice()->getRootDevice()->getNumSubDevices(), 1u);
235
236 workloadPartition.Type = ClientOptionsType::WorkloadPartition;
237 workloadPartition.WorkloadPartition.Enabled = isWorkloadPartitionEnabled;
238 }
239 }
240
createContext()241 bool MetricsLibrary::createContext() {
242 auto &device = metricContext.getDevice();
243 const auto &hwHelper = device.getHwHelper();
244 const auto &asyncComputeEngines = hwHelper.getGpgpuEngineInstances(device.getHwInfo());
245 ContextCreateData_1_0 createData = {};
246 ClientOptionsData_1_0 clientOptions[6] = {};
247 ClientData_1_0 clientData = {};
248 ClientType_1_0 clientType = {};
249 ClientDataLinuxAdapter_1_0 adapter = {};
250
251 // Check if compute command streamer is used.
252 auto asyncComputeEngine = std::find_if(asyncComputeEngines.begin(), asyncComputeEngines.end(), [&](const auto &engine) {
253 return engine.first == aub_stream::ENGINE_CCS;
254 });
255
256 const auto &deviceImp = *static_cast<DeviceImp *>(&device);
257 const auto &commandStreamReceiver = *deviceImp.getNEODevice()->getDefaultEngine().commandStreamReceiver;
258 const auto engineType = commandStreamReceiver.getOsContext().getEngineType();
259 const bool isComputeUsed = NEO::EngineHelpers::isCcs(engineType);
260
261 metricContext.setUseCompute(isComputeUsed);
262
263 // Create metrics library context.
264 DEBUG_BREAK_IF(!contextCreateFunction);
265 clientType.Api = ClientApi::OneApi;
266 clientType.Gen = getGenType(device.getPlatformInfo());
267
268 clientOptions[0].Type = ClientOptionsType::Compute;
269 clientOptions[0].Compute.Asynchronous = asyncComputeEngine != asyncComputeEngines.end();
270
271 clientOptions[1].Type = ClientOptionsType::Tbs;
272 clientOptions[1].Tbs.Enabled = metricContext.getMetricStreamer() != nullptr;
273
274 // Sub device client options #2
275 getSubDeviceClientOptions(clientOptions[2], clientOptions[3], clientOptions[4], clientOptions[5]);
276
277 clientData.Linux.Adapter = &adapter;
278 clientData.ClientOptions = clientOptions;
279 clientData.ClientOptionsCount = sizeof(clientOptions) / sizeof(ClientOptionsData_1_0);
280
281 createData.Api = &api;
282 createData.ClientCallbacks = &callbacks;
283 createData.ClientData = &clientData;
284
285 const bool result =
286 getContextData(device, createData) &&
287 contextCreateFunction(clientType, &createData, &context) == StatusCode::Success;
288
289 DEBUG_BREAK_IF(!result);
290 return result;
291 }
292
getGenType(const uint32_t gen) const293 ClientGen MetricsLibrary::getGenType(const uint32_t gen) const {
294 auto &hwHelper = NEO::HwHelper::get(static_cast<GFXCORE_FAMILY>(gen));
295 return static_cast<MetricsLibraryApi::ClientGen>(hwHelper.getMetricsLibraryGenId());
296 }
297
getGpuCommandsSize(CommandBufferData_1_0 & commandBuffer)298 uint32_t MetricsLibrary::getGpuCommandsSize(CommandBufferData_1_0 &commandBuffer) {
299 CommandBufferSize_1_0 commandBufferSize = {};
300
301 bool result = isInitialized();
302
303 // Validate metrics library initialization state.
304 if (result) {
305 commandBuffer.HandleContext = context;
306 result = api.CommandBufferGetSize(&commandBuffer, &commandBufferSize) == StatusCode::Success;
307 }
308
309 DEBUG_BREAK_IF(!result);
310 return result ? commandBufferSize.GpuMemorySize : 0;
311 }
312
getGpuCommands(CommandBufferData_1_0 & commandBuffer)313 bool MetricsLibrary::getGpuCommands(CommandBufferData_1_0 &commandBuffer) {
314
315 // Obtain gpu commands from metrics library.
316 const bool result =
317 isInitialized() && (api.CommandBufferGet(&commandBuffer) == StatusCode::Success);
318 DEBUG_BREAK_IF(!result);
319 return result;
320 }
321
getGpuCommands(CommandList & commandList,CommandBufferData_1_0 & commandBuffer)322 bool MetricsLibrary::getGpuCommands(CommandList &commandList,
323 CommandBufferData_1_0 &commandBuffer) {
324
325 // Obtain required command buffer size.
326 commandBuffer.Size = getGpuCommandsSize(commandBuffer);
327
328 // Validate gpu commands size.
329 if (!commandBuffer.Size) {
330 DEBUG_BREAK_IF(true);
331 return false;
332 }
333
334 // Allocate command buffer.
335 auto stream = commandList.commandContainer.getCommandStream();
336 auto buffer = stream->getSpace(commandBuffer.Size);
337
338 // Fill attached command buffer with gpu commands.
339 commandBuffer.Data = buffer;
340
341 // Obtain gpu commands from metrics library.
342 const bool result =
343 isInitialized() && (api.CommandBufferGet(&commandBuffer) == StatusCode::Success);
344 DEBUG_BREAK_IF(!result);
345 return result;
346 }
347
348 ConfigurationHandle_1_0
createConfiguration(const zet_metric_group_handle_t metricGroupHandle,const zet_metric_group_properties_t properties)349 MetricsLibrary::createConfiguration(const zet_metric_group_handle_t metricGroupHandle,
350 const zet_metric_group_properties_t properties) {
351 // Metric group internal data.
352 auto metricGroup = MetricGroup::fromHandle(metricGroupHandle);
353 auto metricGroupDummy = ConfigurationHandle_1_0{};
354 DEBUG_BREAK_IF(!metricGroup);
355
356 // Metrics library configuration creation data.
357 ConfigurationHandle_1_0 handle = {};
358 ConfigurationCreateData_1_0 handleData = {};
359 handleData.HandleContext = context;
360 handleData.Type = ObjectType::ConfigurationHwCountersOa;
361
362 // Check supported sampling types.
363 const bool validSampling =
364 properties.samplingType == ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED ||
365 properties.samplingType == ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_TIME_BASED;
366
367 // Activate metric group through metrics discovery to send metric group
368 // configuration to kernel driver.
369 const bool validActivate = isInitialized() && validSampling && metricGroup->activate();
370
371 if (validActivate) {
372 // Use metrics library to create configuration for the activated metric group.
373 api.ConfigurationCreate(&handleData, &handle);
374
375 // Use metrics discovery to deactivate metric group.
376 metricGroup->deactivate();
377 }
378
379 return validActivate ? handle : metricGroupDummy;
380 }
381
getConfiguration(zet_metric_group_handle_t handle)382 ConfigurationHandle_1_0 MetricsLibrary::getConfiguration(zet_metric_group_handle_t handle) {
383
384 auto iter = configurations.find(handle);
385 auto configuration = (iter != end(configurations)) ? iter->second : addConfiguration(handle);
386
387 DEBUG_BREAK_IF(!configuration.IsValid());
388 return configuration;
389 }
390
addConfiguration(zet_metric_group_handle_t handle)391 ConfigurationHandle_1_0 MetricsLibrary::addConfiguration(zet_metric_group_handle_t handle) {
392 ConfigurationHandle_1_0 libraryHandle = {};
393 DEBUG_BREAK_IF(!handle);
394
395 // Create metrics library configuration.
396 auto metricGroup = MetricGroup::fromHandle(handle);
397 auto properties = MetricGroup::getProperties(handle);
398 auto configuration = createConfiguration(metricGroup, properties);
399
400 // Cache configuration if valid.
401 if (configuration.IsValid()) {
402 libraryHandle = configuration;
403 cacheConfiguration(handle, libraryHandle);
404 }
405
406 DEBUG_BREAK_IF(!libraryHandle.IsValid());
407 return libraryHandle;
408 }
409
deleteAllConfigurations()410 void MetricsLibrary::deleteAllConfigurations() {
411
412 if (api.ConfigurationDelete) {
413 for (auto &configuration : configurations) {
414 if (configuration.second.IsValid()) {
415 api.ConfigurationDelete(configuration.second);
416 }
417 }
418 }
419
420 configurations.clear();
421 }
422
metricQueryPoolCreate(zet_context_handle_t hContext,zet_device_handle_t hDevice,zet_metric_group_handle_t hMetricGroup,const zet_metric_query_pool_desc_t * pDesc,zet_metric_query_pool_handle_t * phMetricQueryPool)423 ze_result_t metricQueryPoolCreate(zet_context_handle_t hContext, zet_device_handle_t hDevice, zet_metric_group_handle_t hMetricGroup,
424 const zet_metric_query_pool_desc_t *pDesc, zet_metric_query_pool_handle_t *phMetricQueryPool) {
425
426 auto device = Device::fromHandle(hDevice);
427 auto &metricContext = device->getMetricContext();
428
429 // Metric query cannot be used with streamer simultaneously
430 // (due to oa buffer usage constraints).
431 if (metricContext.getMetricStreamer() != nullptr) {
432 return ZE_RESULT_ERROR_NOT_AVAILABLE;
433 }
434
435 const auto &deviceImp = *static_cast<DeviceImp *>(device);
436 auto metricPoolImp = new MetricQueryPoolImp(device->getMetricContext(), hMetricGroup, *pDesc);
437
438 if (metricContext.isImplicitScalingCapable()) {
439
440 auto emptyMetricGroups = std::vector<zet_metric_group_handle_t>();
441 auto &metricGroups = hMetricGroup
442 ? static_cast<MetricGroupImp *>(MetricGroup::fromHandle(hMetricGroup))->getMetricGroups()
443 : emptyMetricGroups;
444
445 const bool useMetricGroupSubDevice = metricGroups.size() > 0;
446
447 auto &metricPools = metricPoolImp->getMetricQueryPools();
448
449 for (size_t i = 0; i < deviceImp.numSubDevices; ++i) {
450
451 auto &subDevice = deviceImp.subDevices[i];
452 auto &subDeviceMetricContext = subDevice->getMetricContext();
453
454 subDeviceMetricContext.getMetricsLibrary().enableWorkloadPartition();
455
456 zet_metric_group_handle_t metricGroupHandle = useMetricGroupSubDevice
457 ? metricGroups[subDeviceMetricContext.getSubDeviceIndex()]
458 : hMetricGroup;
459
460 auto metricPoolSubdeviceImp = new MetricQueryPoolImp(subDeviceMetricContext, metricGroupHandle, *pDesc);
461
462 // Create metric query pool.
463 if (!metricPoolSubdeviceImp->create()) {
464 metricPoolSubdeviceImp->destroy();
465 metricPoolImp->destroy();
466 metricPoolSubdeviceImp = nullptr;
467 metricPoolImp = nullptr;
468 *phMetricQueryPool = nullptr;
469 return ZE_RESULT_ERROR_INVALID_ARGUMENT;
470 }
471
472 metricPools.push_back(metricPoolSubdeviceImp);
473 }
474
475 } else {
476
477 // Create metric query pool.
478 if (!metricPoolImp->create()) {
479 metricPoolImp->destroy();
480 metricPoolImp = nullptr;
481 *phMetricQueryPool = nullptr;
482 return ZE_RESULT_ERROR_INVALID_ARGUMENT;
483 }
484 }
485
486 // Allocate gpu memory.
487 if (!metricPoolImp->allocateGpuMemory()) {
488 metricPoolImp->destroy();
489 metricPoolImp = nullptr;
490 *phMetricQueryPool = nullptr;
491 return ZE_RESULT_ERROR_INVALID_ARGUMENT;
492 }
493
494 *phMetricQueryPool = metricPoolImp;
495
496 return ZE_RESULT_SUCCESS;
497 }
498
MetricQueryPoolImp(MetricContext & metricContextInput,zet_metric_group_handle_t hEventMetricGroupInput,const zet_metric_query_pool_desc_t & poolDescription)499 MetricQueryPoolImp::MetricQueryPoolImp(MetricContext &metricContextInput,
500 zet_metric_group_handle_t hEventMetricGroupInput,
501 const zet_metric_query_pool_desc_t &poolDescription)
502 : metricContext(metricContextInput), metricsLibrary(metricContext.getMetricsLibrary()),
503 description(poolDescription),
504 hMetricGroup(hEventMetricGroupInput) {}
505
create()506 bool MetricQueryPoolImp::create() {
507 switch (description.type) {
508 case ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE:
509 return createMetricQueryPool();
510 case ZET_METRIC_QUERY_POOL_TYPE_EXECUTION:
511 return createSkipExecutionQueryPool();
512 default:
513 DEBUG_BREAK_IF(true);
514 return false;
515 }
516 }
517
destroy()518 ze_result_t MetricQueryPoolImp::destroy() {
519 switch (description.type) {
520 case ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE:
521 if (metricQueryPools.size() > 0) {
522 for (auto &metricQueryPool : metricQueryPools) {
523 MetricQueryPool::fromHandle(metricQueryPool)->destroy();
524 }
525 }
526 if (query.IsValid()) {
527 metricsLibrary.destroyMetricQuery(query);
528 }
529 if (pAllocation) {
530 metricContext.getDevice().getDriverHandle()->getMemoryManager()->freeGraphicsMemory(pAllocation);
531 }
532 break;
533 case ZET_METRIC_QUERY_POOL_TYPE_EXECUTION:
534 for (auto &metricQueryPool : metricQueryPools) {
535 MetricQueryPool::fromHandle(metricQueryPool)->destroy();
536 }
537 break;
538 default:
539 DEBUG_BREAK_IF(true);
540 break;
541 }
542
543 // Check open queries.
544 if (metricContext.getMetricsLibrary().getMetricQueryCount() == 0) {
545 if (!metricContext.isMetricGroupActivated()) {
546 metricContext.getMetricsLibrary().release();
547 }
548 }
549
550 delete this;
551
552 return ZE_RESULT_SUCCESS;
553 }
554
allocateGpuMemory()555 bool MetricQueryPoolImp::allocateGpuMemory() {
556
557 if (description.type == ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE) {
558 // Get allocation size.
559 const auto &deviceImp = *static_cast<DeviceImp *>(&metricContext.getDevice());
560 allocationSize = (metricContext.isImplicitScalingCapable())
561 ? deviceImp.subDevices[0]->getMetricContext().getMetricsLibrary().getQueryReportGpuSize() * description.count * deviceImp.numSubDevices
562 : metricsLibrary.getQueryReportGpuSize() * description.count;
563
564 if (allocationSize == 0) {
565 return false;
566 }
567
568 // Allocate gpu memory.
569 NEO::AllocationProperties properties(
570 metricContext.getDevice().getRootDeviceIndex(), allocationSize, NEO::GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY, metricContext.getDevice().getNEODevice()->getDeviceBitfield());
571 properties.alignment = 64u;
572 pAllocation = metricContext.getDevice().getDriverHandle()->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties);
573
574 UNRECOVERABLE_IF(pAllocation == nullptr);
575
576 // Clear allocation.
577 memset(pAllocation->getUnderlyingBuffer(), 0, allocationSize);
578 }
579 return true;
580 }
581
createMetricQueryPool()582 bool MetricQueryPoolImp::createMetricQueryPool() {
583 // Validate metric group query - only event based is supported.
584 auto metricGroupProperites = MetricGroup::getProperties(hMetricGroup);
585 const bool validMetricGroup = metricGroupProperites.samplingType == ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED;
586
587 if (!validMetricGroup) {
588 return false;
589 }
590
591 // Pool initialization.
592 pool.reserve(description.count);
593 for (uint32_t i = 0; i < description.count; ++i) {
594 pool.push_back({metricContext, *this, i});
595 }
596
597 // Metrics library query object initialization.
598 return metricsLibrary.createMetricQuery(description.count, query, pAllocation);
599 }
600
createSkipExecutionQueryPool()601 bool MetricQueryPoolImp::createSkipExecutionQueryPool() {
602
603 pool.reserve(description.count);
604 for (uint32_t i = 0; i < description.count; ++i) {
605 pool.push_back({metricContext, *this, i});
606 }
607
608 return true;
609 }
610
fromHandle(zet_metric_query_pool_handle_t handle)611 MetricQueryPool *MetricQueryPool::fromHandle(zet_metric_query_pool_handle_t handle) {
612 return static_cast<MetricQueryPool *>(handle);
613 }
614
toHandle()615 zet_metric_query_pool_handle_t MetricQueryPool::toHandle() { return this; }
616
createMetricQuery(uint32_t index,zet_metric_query_handle_t * phMetricQuery)617 ze_result_t MetricQueryPoolImp::createMetricQuery(uint32_t index,
618 zet_metric_query_handle_t *phMetricQuery) {
619
620 if (index >= description.count) {
621 return ZE_RESULT_ERROR_INVALID_ARGUMENT;
622 }
623
624 if (metricQueryPools.size() > 0) {
625
626 auto pMetricQueryImp = new MetricQueryImp(metricContext, *this, index);
627
628 for (auto metricQueryPoolHandle : metricQueryPools) {
629 auto &metricQueries = pMetricQueryImp->getMetricQueries();
630 auto metricQueryPoolImp = static_cast<MetricQueryPoolImp *>(MetricQueryPool::fromHandle(metricQueryPoolHandle));
631 metricQueries.push_back(&metricQueryPoolImp->pool[index]);
632 }
633
634 *phMetricQuery = pMetricQueryImp;
635
636 return ZE_RESULT_SUCCESS;
637
638 } else {
639
640 *phMetricQuery = &(pool[index]);
641
642 return ZE_RESULT_SUCCESS;
643 }
644 }
645
getMetricQueryPools()646 std::vector<zet_metric_query_pool_handle_t> &MetricQueryPoolImp::getMetricQueryPools() {
647 return metricQueryPools;
648 }
649
MetricQueryImp(MetricContext & metricContextInput,MetricQueryPoolImp & poolInput,const uint32_t slotInput)650 MetricQueryImp::MetricQueryImp(MetricContext &metricContextInput, MetricQueryPoolImp &poolInput,
651 const uint32_t slotInput)
652 : metricContext(metricContextInput), metricsLibrary(metricContext.getMetricsLibrary()),
653 pool(poolInput), slot(slotInput) {}
654
appendBegin(CommandList & commandList)655 ze_result_t MetricQueryImp::appendBegin(CommandList &commandList) {
656 switch (pool.description.type) {
657 case ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE:
658 return writeMetricQuery(commandList, nullptr, 0, nullptr, true);
659 case ZET_METRIC_QUERY_POOL_TYPE_EXECUTION:
660 return writeSkipExecutionQuery(commandList, nullptr, 0, nullptr, true);
661 default:
662 DEBUG_BREAK_IF(true);
663 return ZE_RESULT_ERROR_INVALID_ARGUMENT;
664 }
665 }
666
appendEnd(CommandList & commandList,ze_event_handle_t hSignalEvent,uint32_t numWaitEvents,ze_event_handle_t * phWaitEvents)667 ze_result_t MetricQueryImp::appendEnd(CommandList &commandList, ze_event_handle_t hSignalEvent,
668 uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) {
669 switch (pool.description.type) {
670 case ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE:
671 return writeMetricQuery(commandList, hSignalEvent, numWaitEvents, phWaitEvents, false);
672 case ZET_METRIC_QUERY_POOL_TYPE_EXECUTION:
673 return writeSkipExecutionQuery(commandList, hSignalEvent, numWaitEvents, phWaitEvents, false);
674 default:
675 DEBUG_BREAK_IF(true);
676 return ZE_RESULT_ERROR_INVALID_ARGUMENT;
677 }
678 }
679
getData(size_t * pRawDataSize,uint8_t * pRawData)680 ze_result_t MetricQueryImp::getData(size_t *pRawDataSize, uint8_t *pRawData) {
681
682 const bool calculateSizeOnly = *pRawDataSize == 0;
683 const size_t metricQueriesSize = metricQueries.size();
684 bool result = true;
685
686 if (metricQueriesSize > 0) {
687
688 if (calculateSizeOnly) {
689
690 const size_t headerSize = sizeof(MetricGroupCalculateHeader);
691 const size_t rawDataOffsetsRequiredSize = sizeof(uint32_t) * metricQueriesSize;
692 const size_t rawDataSizesRequiredSize = sizeof(uint32_t) * metricQueriesSize;
693
694 auto pMetricQueryImp = static_cast<MetricQueryImp *>(MetricQuery::fromHandle(metricQueries[0]));
695 result = pMetricQueryImp->metricsLibrary.getMetricQueryReportSize(*pRawDataSize);
696
697 const size_t rawDataRequiredSize = *pRawDataSize * metricQueriesSize;
698
699 *pRawDataSize = headerSize + rawDataOffsetsRequiredSize + rawDataSizesRequiredSize + rawDataRequiredSize;
700
701 } else {
702
703 MetricGroupCalculateHeader *pRawDataHeader = reinterpret_cast<MetricGroupCalculateHeader *>(pRawData);
704 pRawDataHeader->magic = MetricGroupCalculateHeader::magicValue;
705 pRawDataHeader->dataCount = static_cast<uint32_t>(metricQueriesSize);
706
707 // Relative offsets in the header allow to move/copy the buffer.
708 pRawDataHeader->rawDataOffsets = sizeof(MetricGroupCalculateHeader);
709 pRawDataHeader->rawDataSizes = static_cast<uint32_t>(pRawDataHeader->rawDataOffsets + (sizeof(uint32_t) * metricQueriesSize));
710 pRawDataHeader->rawDataOffset = static_cast<uint32_t>(pRawDataHeader->rawDataSizes + (sizeof(uint32_t) * metricQueriesSize));
711
712 const size_t sizePerSubDevice = (*pRawDataSize - pRawDataHeader->rawDataOffset) / metricQueriesSize;
713 DEBUG_BREAK_IF(sizePerSubDevice == 0);
714 *pRawDataSize = pRawDataHeader->rawDataOffset;
715
716 uint32_t *pRawDataOffsetsUnpacked = reinterpret_cast<uint32_t *>(pRawData + pRawDataHeader->rawDataOffsets);
717 uint32_t *pRawDataSizesUnpacked = reinterpret_cast<uint32_t *>(pRawData + pRawDataHeader->rawDataSizes);
718 uint8_t *pRawDataUnpacked = reinterpret_cast<uint8_t *>(pRawData + pRawDataHeader->rawDataOffset);
719
720 for (size_t i = 0; i < metricQueriesSize; ++i) {
721
722 size_t getDataSize = sizePerSubDevice;
723 const uint32_t rawDataOffset = (i != 0) ? (pRawDataSizesUnpacked[i - 1] + pRawDataOffsetsUnpacked[i - 1]) : 0;
724 auto pMetricQuery = MetricQuery::fromHandle(metricQueries[i]);
725 ze_result_t tmpResult = pMetricQuery->getData(&getDataSize, pRawDataUnpacked + rawDataOffset);
726 // Return at first error.
727 if (tmpResult != ZE_RESULT_SUCCESS) {
728 return tmpResult;
729 }
730 pRawDataSizesUnpacked[i] = static_cast<uint32_t>(getDataSize);
731 pRawDataOffsetsUnpacked[i] = (i != 0) ? pRawDataOffsetsUnpacked[i - 1] + pRawDataSizesUnpacked[i] : 0;
732 *pRawDataSize += getDataSize;
733 }
734 }
735
736 } else {
737 result = calculateSizeOnly
738 ? metricsLibrary.getMetricQueryReportSize(*pRawDataSize)
739 : metricsLibrary.getMetricQueryReport(pool.query, slot, *pRawDataSize, pRawData);
740 }
741
742 return result
743 ? ZE_RESULT_SUCCESS
744 : ZE_RESULT_ERROR_UNKNOWN;
745 }
746
reset()747 ze_result_t MetricQueryImp::reset() {
748 return ZE_RESULT_SUCCESS;
749 }
750
destroy()751 ze_result_t MetricQueryImp::destroy() {
752
753 if (metricQueries.size() > 0) {
754 delete this;
755 }
756
757 return ZE_RESULT_SUCCESS;
758 }
759
getMetricQueries()760 std::vector<zet_metric_query_handle_t> &MetricQueryImp::getMetricQueries() {
761 return metricQueries;
762 }
763
writeMetricQuery(CommandList & commandList,ze_event_handle_t hSignalEvent,uint32_t numWaitEvents,ze_event_handle_t * phWaitEvents,const bool begin)764 ze_result_t MetricQueryImp::writeMetricQuery(CommandList &commandList, ze_event_handle_t hSignalEvent,
765 uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents,
766 const bool begin) {
767
768 bool result = true;
769 const bool writeCompletionEvent = hSignalEvent && !begin;
770 const size_t metricQueriesSize = metricQueries.size();
771
772 // Make gpu allocation visible.
773 commandList.commandContainer.addToResidencyContainer(pool.pAllocation);
774
775 // Wait for events before executing query.
776 commandList.appendWaitOnEvents(numWaitEvents, phWaitEvents);
777
778 if (metricQueriesSize) {
779
780 const size_t allocationSizeForSubDevice = pool.allocationSize / metricQueriesSize;
781 static_cast<CommandListImp &>(commandList).appendMultiPartitionPrologue(static_cast<uint32_t>(allocationSizeForSubDevice));
782 void *buffer = nullptr;
783 bool gpuCommandStatus = true;
784
785 // Revert iteration to be ensured that the last set of gpu commands overwrite the previous written sets of gpu commands,
786 // so only one of the sub-device contexts will be used to append to command list.
787 for (int32_t i = static_cast<int32_t>(metricQueriesSize - 1); i >= 0; --i) {
788
789 // Adjust cpu and gpu addresses for each sub-device's query object.
790 uint64_t gpuAddress = pool.pAllocation->getGpuAddress() + (i * allocationSizeForSubDevice);
791 uint8_t *cpuAddress = static_cast<uint8_t *>(pool.pAllocation->getUnderlyingBuffer()) + (i * allocationSizeForSubDevice);
792
793 auto &metricQueryImp = *static_cast<MetricQueryImp *>(MetricQuery::fromHandle(metricQueries[i]));
794 auto &metricLibrarySubDevice = metricQueryImp.metricsLibrary;
795 auto &metricContextSubDevice = metricQueryImp.metricContext;
796
797 // Obtain gpu commands.
798 CommandBufferData_1_0 commandBuffer = {};
799 commandBuffer.CommandsType = ObjectType::QueryHwCounters;
800 commandBuffer.QueryHwCounters.Handle = metricQueryImp.pool.query;
801 commandBuffer.QueryHwCounters.Begin = begin;
802 commandBuffer.QueryHwCounters.Slot = slot;
803 commandBuffer.Allocation.GpuAddress = gpuAddress;
804 commandBuffer.Allocation.CpuAddress = cpuAddress;
805 commandBuffer.Type = metricContextSubDevice.isComputeUsed()
806 ? GpuCommandBufferType::Compute
807 : GpuCommandBufferType::Render;
808
809 // Obtain required command buffer size.
810 commandBuffer.Size = metricLibrarySubDevice.getGpuCommandsSize(commandBuffer);
811
812 // Validate gpu commands size.
813 if (!commandBuffer.Size) {
814 return ZE_RESULT_ERROR_UNKNOWN;
815 }
816
817 // Allocate command buffer only once.
818 if (buffer == nullptr) {
819 auto stream = commandList.commandContainer.getCommandStream();
820 buffer = stream->getSpace(commandBuffer.Size);
821 }
822
823 // Fill attached command buffer with gpu commands.
824 commandBuffer.Data = buffer;
825
826 // Obtain gpu commands from metrics library for each sub-device to update cpu and gpu addresses for
827 // each query object in metrics library, so that get data works properly.
828 gpuCommandStatus = metricLibrarySubDevice.getGpuCommands(commandBuffer);
829 if (!gpuCommandStatus) {
830 break;
831 }
832 }
833 static_cast<CommandListImp &>(commandList).appendMultiPartitionEpilogue();
834 if (!gpuCommandStatus) {
835 return ZE_RESULT_ERROR_UNKNOWN;
836 }
837
838 // Write gpu commands for sub device index 0.
839 } else {
840 // Obtain gpu commands.
841 CommandBufferData_1_0 commandBuffer = {};
842 commandBuffer.CommandsType = ObjectType::QueryHwCounters;
843 commandBuffer.QueryHwCounters.Handle = pool.query;
844 commandBuffer.QueryHwCounters.Begin = begin;
845 commandBuffer.QueryHwCounters.Slot = slot;
846 commandBuffer.Allocation.GpuAddress = pool.pAllocation->getGpuAddress();
847 commandBuffer.Allocation.CpuAddress = pool.pAllocation->getUnderlyingBuffer();
848 commandBuffer.Type = metricContext.isComputeUsed()
849 ? GpuCommandBufferType::Compute
850 : GpuCommandBufferType::Render;
851
852 // Get query commands.
853 result = metricsLibrary.getGpuCommands(commandList, commandBuffer);
854 }
855
856 // Write completion event.
857 if (result && writeCompletionEvent) {
858 result = commandList.appendSignalEvent(hSignalEvent) == ZE_RESULT_SUCCESS;
859 }
860
861 return result ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_UNKNOWN;
862 }
863
writeSkipExecutionQuery(CommandList & commandList,ze_event_handle_t hSignalEvent,uint32_t numWaitEvents,ze_event_handle_t * phWaitEvents,const bool begin)864 ze_result_t MetricQueryImp::writeSkipExecutionQuery(CommandList &commandList, ze_event_handle_t hSignalEvent,
865 uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents,
866 const bool begin) {
867
868 bool writeCompletionEvent = hSignalEvent && !begin;
869 bool result = false;
870
871 // Obtain gpu commands.
872 CommandBufferData_1_0 commandBuffer = {};
873 commandBuffer.CommandsType = ObjectType::OverrideNullHardware;
874 commandBuffer.Override.Enable = begin;
875 commandBuffer.Type = metricContext.isComputeUsed()
876 ? GpuCommandBufferType::Compute
877 : GpuCommandBufferType::Render;
878
879 // Wait for events before executing query.
880 zeCommandListAppendWaitOnEvents(commandList.toHandle(), numWaitEvents, phWaitEvents);
881
882 // Get query commands.
883 result = metricsLibrary.getGpuCommands(commandList, commandBuffer);
884
885 // Write completion event.
886 if (result && writeCompletionEvent) {
887 result = zeCommandListAppendSignalEvent(commandList.toHandle(), hSignalEvent) ==
888 ZE_RESULT_SUCCESS;
889 }
890
891 return result ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_UNKNOWN;
892 }
893
appendMemoryBarrier(CommandList & commandList)894 ze_result_t MetricQuery::appendMemoryBarrier(CommandList &commandList) {
895
896 DeviceImp *pDeviceImp = static_cast<DeviceImp *>(commandList.device);
897
898 if (pDeviceImp->metricContext->isImplicitScalingCapable()) {
899 // Use one of the sub-device contexts to append to command list.
900 pDeviceImp = static_cast<DeviceImp *>(pDeviceImp->subDevices[0]);
901 }
902
903 auto &metricContext = pDeviceImp->getMetricContext();
904 auto &metricsLibrary = metricContext.getMetricsLibrary();
905
906 // Obtain gpu commands.
907 CommandBufferData_1_0 commandBuffer = {};
908 commandBuffer.CommandsType = ObjectType::OverrideFlushCaches;
909 commandBuffer.Override.Enable = true;
910 commandBuffer.Type = metricContext.isComputeUsed()
911 ? GpuCommandBufferType::Compute
912 : GpuCommandBufferType::Render;
913
914 return metricsLibrary.getGpuCommands(commandList, commandBuffer) ? ZE_RESULT_SUCCESS
915 : ZE_RESULT_ERROR_UNKNOWN;
916 }
917
appendStreamerMarker(CommandList & commandList,zet_metric_streamer_handle_t hMetricStreamer,uint32_t value)918 ze_result_t MetricQuery::appendStreamerMarker(CommandList &commandList,
919 zet_metric_streamer_handle_t hMetricStreamer,
920 uint32_t value) {
921
922 DeviceImp *pDeviceImp = static_cast<DeviceImp *>(commandList.device);
923
924 if (pDeviceImp->metricContext->isImplicitScalingCapable()) {
925 // Use one of the sub-device contexts to append to command list.
926 pDeviceImp = static_cast<DeviceImp *>(pDeviceImp->subDevices[0]);
927 pDeviceImp->metricContext->getMetricsLibrary().enableWorkloadPartition();
928 }
929 auto &metricContext = pDeviceImp->getMetricContext();
930 auto &metricsLibrary = metricContext.getMetricsLibrary();
931
932 const uint32_t streamerMarkerHighBitsShift = 25;
933
934 // Obtain gpu commands.
935 CommandBufferData_1_0 commandBuffer = {};
936 commandBuffer.CommandsType = ObjectType::MarkerStreamUser;
937 commandBuffer.MarkerStreamUser.Value = value;
938 commandBuffer.MarkerStreamUser.Reserved = (value >> streamerMarkerHighBitsShift);
939 commandBuffer.Type = metricContext.isComputeUsed()
940 ? GpuCommandBufferType::Compute
941 : GpuCommandBufferType::Render;
942
943 return metricsLibrary.getGpuCommands(commandList, commandBuffer) ? ZE_RESULT_SUCCESS
944 : ZE_RESULT_ERROR_UNKNOWN;
945 }
946
fromHandle(zet_metric_query_handle_t handle)947 MetricQuery *MetricQuery::fromHandle(zet_metric_query_handle_t handle) {
948 return static_cast<MetricQuery *>(handle);
949 }
950
toHandle()951 zet_metric_query_handle_t MetricQuery::toHandle() { return this; }
952
953 } // namespace L0
954