1 /*
2 * Copyright (C) 2020-2021 Intel Corporation
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 */
7
8 #include "level_zero/tools/source/metrics/metric.h"
9
10 #include "shared/source/os_interface/os_library.h"
11
12 #include "level_zero/core/source/device/device_imp.h"
13 #include "level_zero/core/source/driver/driver.h"
14 #include "level_zero/core/source/driver/driver_handle_imp.h"
15 #include "level_zero/source/inc/ze_intel_gpu.h"
16 #include "level_zero/tools/source/metrics/metric_enumeration_imp.h"
17 #include "level_zero/tools/source/metrics/metric_query_imp.h"
18
19 #include <map>
20 #include <utility>
21
22 namespace L0 {
23
24 struct MetricGroupDomains {
25
26 public:
27 MetricGroupDomains(MetricContext &metricContext);
28 ze_result_t activateDeferred(const uint32_t subDeviceIndex, const uint32_t count, zet_metric_group_handle_t *phMetricGroups);
29 ze_result_t activate();
30 ze_result_t deactivate();
31 bool isActivated(const zet_metric_group_handle_t hMetricGroup);
32 uint32_t getActivatedCount();
33
34 protected:
35 bool activateMetricGroupDeferred(const zet_metric_group_handle_t hMetricGroup);
36 bool activateEventMetricGroup(const zet_metric_group_handle_t hMetricGroup);
37
38 protected:
39 MetricContext &metricContext;
40
41 // Map holds activated domains and associated metric groups.
42 // Content: <domain number, pair<metric group, is activated on gpu flag>
43 std::map<uint32_t, std::pair<zet_metric_group_handle_t, bool>> domains;
44 };
45
46 struct MetricContextImp : public MetricContext {
47 public:
48 MetricContextImp(Device &device);
49 ~MetricContextImp() override;
50
51 bool loadDependencies() override;
52 bool isInitialized() override;
53 void setInitializationState(const ze_result_t state) override;
54 Device &getDevice() override;
55 MetricsLibrary &getMetricsLibrary() override;
56 MetricEnumeration &getMetricEnumeration() override;
57 MetricStreamer *getMetricStreamer() override;
58 void setMetricStreamer(MetricStreamer *pMetricStreamer) override;
59 void setMetricsLibrary(MetricsLibrary &metricsLibrary) override;
60 void setMetricEnumeration(MetricEnumeration &metricEnumeration) override;
61
62 ze_result_t activateMetricGroups() override;
63 ze_result_t activateMetricGroupsDeferred(const uint32_t count,
64 zet_metric_group_handle_t *phMetricGroups) override;
65 bool isMetricGroupActivated(const zet_metric_group_handle_t hMetricGroup) override;
66 bool isMetricGroupActivated() override;
67
68 void setUseCompute(const bool useCompute) override;
69 bool isComputeUsed() override;
70 uint32_t getSubDeviceIndex() override;
71 void setSubDeviceIndex(const uint32_t index) override;
72 bool isImplicitScalingCapable() override;
73
74 protected:
75 ze_result_t initializationState = ZE_RESULT_ERROR_UNINITIALIZED;
76 struct Device &device;
77 std::unique_ptr<MetricEnumeration> metricEnumeration = nullptr;
78 std::unique_ptr<MetricsLibrary> metricsLibrary = nullptr;
79 MetricGroupDomains metricGroupDomains;
80 MetricStreamer *pMetricStreamer = nullptr;
81 uint32_t subDeviceIndex = 0;
82 bool useCompute = false;
83 bool implicitScalingCapable = false;
84 };
85
MetricContextImp(Device & deviceInput)86 MetricContextImp::MetricContextImp(Device &deviceInput)
87 : device(deviceInput),
88 metricEnumeration(std::unique_ptr<MetricEnumeration>(new (std::nothrow) MetricEnumeration(*this))),
89 metricsLibrary(std::unique_ptr<MetricsLibrary>(new (std::nothrow) MetricsLibrary(*this))),
90 metricGroupDomains(*this) {
91
92 auto deviceNeo = deviceInput.getNEODevice();
93 bool isSubDevice = deviceNeo->isSubDevice();
94
95 subDeviceIndex = isSubDevice
96 ? static_cast<NEO::SubDevice *>(deviceNeo)->getSubDeviceIndex()
97 : 0;
98
99 implicitScalingCapable = !isSubDevice && device.isImplicitScalingCapable();
100 }
101
~MetricContextImp()102 MetricContextImp::~MetricContextImp() {
103 metricsLibrary.reset();
104 metricEnumeration.reset();
105 }
106
loadDependencies()107 bool MetricContextImp::loadDependencies() {
108 bool result = true;
109 if (metricEnumeration->loadMetricsDiscovery() != ZE_RESULT_SUCCESS) {
110 result = false;
111 DEBUG_BREAK_IF(!result);
112 }
113 if (result && !metricsLibrary->load()) {
114 result = false;
115 DEBUG_BREAK_IF(!result);
116 }
117
118 // Set metric context initialization state.
119 setInitializationState(result
120 ? ZE_RESULT_SUCCESS
121 : ZE_RESULT_ERROR_UNKNOWN);
122
123 return result;
124 }
125
isInitialized()126 bool MetricContextImp::isInitialized() {
127 return initializationState == ZE_RESULT_SUCCESS;
128 }
129
setInitializationState(const ze_result_t state)130 void MetricContextImp::setInitializationState(const ze_result_t state) {
131 initializationState = state;
132 }
133
getDevice()134 Device &MetricContextImp::getDevice() { return device; }
135
getMetricsLibrary()136 MetricsLibrary &MetricContextImp::getMetricsLibrary() { return *metricsLibrary; }
137
getMetricEnumeration()138 MetricEnumeration &MetricContextImp::getMetricEnumeration() { return *metricEnumeration; }
139
getMetricStreamer()140 MetricStreamer *MetricContextImp::getMetricStreamer() { return pMetricStreamer; }
141
setMetricStreamer(MetricStreamer * pMetricStreamer)142 void MetricContextImp::setMetricStreamer(MetricStreamer *pMetricStreamer) {
143 this->pMetricStreamer = pMetricStreamer;
144 }
145
setMetricsLibrary(MetricsLibrary & metricsLibrary)146 void MetricContextImp::setMetricsLibrary(MetricsLibrary &metricsLibrary) {
147 this->metricsLibrary.release();
148 this->metricsLibrary.reset(&metricsLibrary);
149 }
150
setMetricEnumeration(MetricEnumeration & metricEnumeration)151 void MetricContextImp::setMetricEnumeration(MetricEnumeration &metricEnumeration) {
152 this->metricEnumeration.release();
153 this->metricEnumeration.reset(&metricEnumeration);
154 }
155
setUseCompute(const bool useCompute)156 void MetricContextImp::setUseCompute(const bool useCompute) {
157 this->useCompute = useCompute;
158 }
159
isComputeUsed()160 bool MetricContextImp::isComputeUsed() {
161 return useCompute;
162 }
163
getSubDeviceIndex()164 uint32_t MetricContextImp::getSubDeviceIndex() {
165 return subDeviceIndex;
166 }
167
setSubDeviceIndex(const uint32_t index)168 void MetricContextImp::setSubDeviceIndex(const uint32_t index) {
169 subDeviceIndex = index;
170 }
171
isImplicitScalingCapable()172 bool MetricContextImp::isImplicitScalingCapable() {
173 return implicitScalingCapable;
174 }
175
176 ze_result_t
activateMetricGroupsDeferred(const uint32_t count,zet_metric_group_handle_t * phMetricGroups)177 MetricContextImp::activateMetricGroupsDeferred(const uint32_t count,
178 zet_metric_group_handle_t *phMetricGroups) {
179
180 // Activation: postpone until zetMetricStreamerOpen or zeCommandQueueExecuteCommandLists
181 // Deactivation: execute immediately.
182 return phMetricGroups ? metricGroupDomains.activateDeferred(subDeviceIndex, count, phMetricGroups)
183 : metricGroupDomains.deactivate();
184 }
185
isMetricGroupActivated(const zet_metric_group_handle_t hMetricGroup)186 bool MetricContextImp::isMetricGroupActivated(const zet_metric_group_handle_t hMetricGroup) {
187 return metricGroupDomains.isActivated(hMetricGroup);
188 }
189
isMetricGroupActivated()190 bool MetricContextImp::isMetricGroupActivated() {
191 return metricGroupDomains.getActivatedCount() > 0;
192 }
193
activateMetricGroups()194 ze_result_t MetricContextImp::activateMetricGroups() { return metricGroupDomains.activate(); }
195
enableMetricApi()196 ze_result_t MetricContext::enableMetricApi() {
197
198 if (!isMetricApiAvailable()) {
199 return ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE;
200 }
201
202 bool failed = false;
203
204 auto driverHandle = L0::DriverHandle::fromHandle(GlobalDriverHandle);
205 auto rootDevices = std::vector<ze_device_handle_t>();
206 auto subDevices = std::vector<ze_device_handle_t>();
207
208 // Obtain root devices.
209 uint32_t rootDeviceCount = 0;
210 driverHandle->getDevice(&rootDeviceCount, nullptr);
211 rootDevices.resize(rootDeviceCount);
212 driverHandle->getDevice(&rootDeviceCount, rootDevices.data());
213
214 for (auto rootDeviceHandle : rootDevices) {
215
216 // Initialize root device.
217 auto rootDevice = static_cast<DeviceImp *>(L0::Device::fromHandle(rootDeviceHandle));
218 failed |= !rootDevice->metricContext->loadDependencies();
219
220 // Initialize sub devices.
221 for (uint32_t i = 0; i < rootDevice->numSubDevices; ++i) {
222 failed |= !rootDevice->subDevices[i]->getMetricContext().loadDependencies();
223 }
224 }
225
226 return failed
227 ? ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE
228 : ZE_RESULT_SUCCESS;
229 }
230
create(Device & device)231 std::unique_ptr<MetricContext> MetricContext::create(Device &device) {
232 auto metricContextImp = new (std::nothrow) MetricContextImp(device);
233 std::unique_ptr<MetricContext> metricContext{metricContextImp};
234 return metricContext;
235 }
236
isMetricApiAvailable()237 bool MetricContext::isMetricApiAvailable() {
238
239 std::unique_ptr<NEO::OsLibrary> library = nullptr;
240
241 // Check Metrics Discovery availability.
242 library.reset(NEO::OsLibrary::load(MetricEnumeration::getMetricsDiscoveryFilename()));
243 if (library == nullptr) {
244 PRINT_DEBUG_STRING(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Unable to find metrics discovery %s\n", MetricEnumeration::getMetricsDiscoveryFilename());
245 return false;
246 }
247
248 // Check Metrics Library availability.
249 library.reset(NEO::OsLibrary::load(MetricsLibrary::getFilename()));
250 if (library == nullptr) {
251 PRINT_DEBUG_STRING(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Unable to find metrics library %s\n", MetricsLibrary::getFilename());
252 return false;
253 }
254
255 return true;
256 }
257
MetricGroupDomains(MetricContext & metricContext)258 MetricGroupDomains::MetricGroupDomains(MetricContext &metricContext)
259 : metricContext(metricContext) {}
260
activateDeferred(const uint32_t subDeviceIndex,const uint32_t count,zet_metric_group_handle_t * phMetricGroups)261 ze_result_t MetricGroupDomains::activateDeferred(const uint32_t subDeviceIndex,
262 const uint32_t count,
263 zet_metric_group_handle_t *phMetricGroups) {
264 // For each metric group:
265 for (uint32_t i = 0; i < count; ++i) {
266 DEBUG_BREAK_IF(!phMetricGroups[i]);
267
268 zet_metric_group_handle_t handle = phMetricGroups[i];
269 auto pMetricGroupImp = static_cast<MetricGroupImp *>(MetricGroup::fromHandle(handle));
270 if (pMetricGroupImp->getMetricGroups().size() > 0) {
271 handle = pMetricGroupImp->getMetricGroups()[subDeviceIndex];
272 }
273
274 // Try to associate it with a domain (oa, ...).
275 if (!activateMetricGroupDeferred(handle)) {
276 return ZE_RESULT_ERROR_UNKNOWN;
277 }
278 }
279 return ZE_RESULT_SUCCESS;
280 }
281
activateMetricGroupDeferred(const zet_metric_group_handle_t hMetricGroup)282 bool MetricGroupDomains::activateMetricGroupDeferred(const zet_metric_group_handle_t hMetricGroup) {
283
284 const auto properites = MetricGroup::getProperties(hMetricGroup);
285 const auto domain = properites.domain;
286
287 const bool isDomainFree = domains[domain].first == nullptr;
288 const bool isSameGroup = domains[domain].first == hMetricGroup;
289
290 // The same metric group has been already associated.
291 if (isSameGroup) {
292 return true;
293 }
294
295 // Domain has been already associated with a different metric group.
296 if (!isDomainFree) {
297 return false;
298 }
299
300 // Associate metric group with domain and mark it as not active.
301 // Activation will be performed during zeCommandQueueExecuteCommandLists (query)
302 // or zetMetricStreamerOpen (time based sampling).
303 domains[domain].first = hMetricGroup;
304 domains[domain].second = false;
305
306 return true;
307 }
308
activate()309 ze_result_t MetricGroupDomains::activate() {
310
311 // For each domain.
312 for (auto &domain : domains) {
313
314 auto hMetricGroup = domain.second.first;
315 bool &metricGroupActive = domain.second.second;
316 bool metricGroupEventBased =
317 hMetricGroup && MetricGroup::getProperties(hMetricGroup).samplingType ==
318 ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED;
319
320 // Activate only event based metric groups.
321 // Time based metric group will be activated during zetMetricStreamerOpen.
322 if (metricGroupEventBased && !metricGroupActive) {
323
324 metricGroupActive = activateEventMetricGroup(hMetricGroup);
325
326 if (metricGroupActive == false) {
327 DEBUG_BREAK_IF(true);
328 return ZE_RESULT_ERROR_UNKNOWN;
329 }
330 }
331 }
332
333 return ZE_RESULT_SUCCESS;
334 }
335
activateEventMetricGroup(const zet_metric_group_handle_t hMetricGroup)336 bool MetricGroupDomains::activateEventMetricGroup(const zet_metric_group_handle_t hMetricGroup) {
337 // Obtain metric group configuration handle from metrics library.
338 auto hConfiguration = metricContext.getMetricsLibrary().getConfiguration(hMetricGroup);
339
340 // Validate metrics library handle.
341 if (!hConfiguration.IsValid()) {
342 DEBUG_BREAK_IF(true);
343 return false;
344 }
345
346 // Write metric group configuration to gpu.
347 const bool result = metricContext.getMetricsLibrary().activateConfiguration(hConfiguration);
348
349 DEBUG_BREAK_IF(!result);
350 return result;
351 }
352
deactivate()353 ze_result_t MetricGroupDomains::deactivate() {
354 // Deactivate metric group for each domain.
355 for (auto &domain : domains) {
356
357 auto hMetricGroup = domain.second.first;
358 bool metricGroupActivatedOnGpu = domain.second.second;
359
360 if (metricGroupActivatedOnGpu) {
361 // Only event based metric groups are activated on Gpu.
362 DEBUG_BREAK_IF(MetricGroup::getProperties(hMetricGroup).samplingType != ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED);
363 auto hConfiguration = metricContext.getMetricsLibrary().getConfiguration(hMetricGroup);
364 // Deactivate metric group configuration using metrics library.
365 metricContext.getMetricsLibrary().deactivateConfiguration(hConfiguration);
366 }
367 // Mark domain as free.
368 domain.second = {};
369 }
370
371 // Check any open queries.
372 if (metricContext.getMetricsLibrary().getMetricQueryCount() == 0) {
373 if (metricContext.getMetricsLibrary().getInitializationState() != ZE_RESULT_ERROR_UNINITIALIZED) {
374 metricContext.getMetricsLibrary().release();
375 }
376 }
377
378 return ZE_RESULT_SUCCESS;
379 }
380
isActivated(const zet_metric_group_handle_t hMetricGroup)381 bool MetricGroupDomains::isActivated(const zet_metric_group_handle_t hMetricGroup) {
382 auto metricGroupProperties = MetricGroup::getProperties(hMetricGroup);
383
384 // 1. Check whether domain is activated.
385 const auto domain = domains.find(metricGroupProperties.domain);
386 if (domain == domains.end()) {
387 return false;
388 }
389
390 // 2. Check whether the specific MetricGroup is activated.
391 return domain->second.first == hMetricGroup;
392 }
393
getActivatedCount()394 uint32_t MetricGroupDomains::getActivatedCount() {
395 uint32_t count = 0;
396 for (const auto &domain : domains) {
397 count += domain.second.second ? 1 : 0;
398 }
399 return count;
400 }
401
metricGroupGet(zet_device_handle_t hDevice,uint32_t * pCount,zet_metric_group_handle_t * phMetricGroups)402 ze_result_t metricGroupGet(zet_device_handle_t hDevice, uint32_t *pCount, zet_metric_group_handle_t *phMetricGroups) {
403 auto device = Device::fromHandle(hDevice);
404 return device->getMetricContext().getMetricEnumeration().metricGroupGet(*pCount,
405 phMetricGroups);
406 }
407
metricStreamerOpen(zet_context_handle_t hContext,zet_device_handle_t hDevice,zet_metric_group_handle_t hMetricGroup,zet_metric_streamer_desc_t * pDesc,ze_event_handle_t hNotificationEvent,zet_metric_streamer_handle_t * phMetricStreamer)408 ze_result_t metricStreamerOpen(zet_context_handle_t hContext, zet_device_handle_t hDevice, zet_metric_group_handle_t hMetricGroup,
409 zet_metric_streamer_desc_t *pDesc, ze_event_handle_t hNotificationEvent,
410 zet_metric_streamer_handle_t *phMetricStreamer) {
411
412 return MetricStreamer::open(hContext, hDevice, hMetricGroup, *pDesc, hNotificationEvent, phMetricStreamer);
413 }
414
415 } // namespace L0
416