1 /*
2  * Copyright (C) 2020-2021 Intel Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  */
7 
8 #include "level_zero/tools/source/metrics/metric.h"
9 
10 #include "shared/source/os_interface/os_library.h"
11 
12 #include "level_zero/core/source/device/device_imp.h"
13 #include "level_zero/core/source/driver/driver.h"
14 #include "level_zero/core/source/driver/driver_handle_imp.h"
15 #include "level_zero/source/inc/ze_intel_gpu.h"
16 #include "level_zero/tools/source/metrics/metric_enumeration_imp.h"
17 #include "level_zero/tools/source/metrics/metric_query_imp.h"
18 
19 #include <map>
20 #include <utility>
21 
22 namespace L0 {
23 
24 struct MetricGroupDomains {
25 
26   public:
27     MetricGroupDomains(MetricContext &metricContext);
28     ze_result_t activateDeferred(const uint32_t subDeviceIndex, const uint32_t count, zet_metric_group_handle_t *phMetricGroups);
29     ze_result_t activate();
30     ze_result_t deactivate();
31     bool isActivated(const zet_metric_group_handle_t hMetricGroup);
32     uint32_t getActivatedCount();
33 
34   protected:
35     bool activateMetricGroupDeferred(const zet_metric_group_handle_t hMetricGroup);
36     bool activateEventMetricGroup(const zet_metric_group_handle_t hMetricGroup);
37 
38   protected:
39     MetricContext &metricContext;
40 
41     // Map holds activated domains and associated metric groups.
42     // Content: <domain number, pair<metric group, is activated on gpu flag>
43     std::map<uint32_t, std::pair<zet_metric_group_handle_t, bool>> domains;
44 };
45 
46 struct MetricContextImp : public MetricContext {
47   public:
48     MetricContextImp(Device &device);
49     ~MetricContextImp() override;
50 
51     bool loadDependencies() override;
52     bool isInitialized() override;
53     void setInitializationState(const ze_result_t state) override;
54     Device &getDevice() override;
55     MetricsLibrary &getMetricsLibrary() override;
56     MetricEnumeration &getMetricEnumeration() override;
57     MetricStreamer *getMetricStreamer() override;
58     void setMetricStreamer(MetricStreamer *pMetricStreamer) override;
59     void setMetricsLibrary(MetricsLibrary &metricsLibrary) override;
60     void setMetricEnumeration(MetricEnumeration &metricEnumeration) override;
61 
62     ze_result_t activateMetricGroups() override;
63     ze_result_t activateMetricGroupsDeferred(const uint32_t count,
64                                              zet_metric_group_handle_t *phMetricGroups) override;
65     bool isMetricGroupActivated(const zet_metric_group_handle_t hMetricGroup) override;
66     bool isMetricGroupActivated() override;
67 
68     void setUseCompute(const bool useCompute) override;
69     bool isComputeUsed() override;
70     uint32_t getSubDeviceIndex() override;
71     void setSubDeviceIndex(const uint32_t index) override;
72     bool isImplicitScalingCapable() override;
73 
74   protected:
75     ze_result_t initializationState = ZE_RESULT_ERROR_UNINITIALIZED;
76     struct Device &device;
77     std::unique_ptr<MetricEnumeration> metricEnumeration = nullptr;
78     std::unique_ptr<MetricsLibrary> metricsLibrary = nullptr;
79     MetricGroupDomains metricGroupDomains;
80     MetricStreamer *pMetricStreamer = nullptr;
81     uint32_t subDeviceIndex = 0;
82     bool useCompute = false;
83     bool implicitScalingCapable = false;
84 };
85 
MetricContextImp(Device & deviceInput)86 MetricContextImp::MetricContextImp(Device &deviceInput)
87     : device(deviceInput),
88       metricEnumeration(std::unique_ptr<MetricEnumeration>(new (std::nothrow) MetricEnumeration(*this))),
89       metricsLibrary(std::unique_ptr<MetricsLibrary>(new (std::nothrow) MetricsLibrary(*this))),
90       metricGroupDomains(*this) {
91 
92     auto deviceNeo = deviceInput.getNEODevice();
93     bool isSubDevice = deviceNeo->isSubDevice();
94 
95     subDeviceIndex = isSubDevice
96                          ? static_cast<NEO::SubDevice *>(deviceNeo)->getSubDeviceIndex()
97                          : 0;
98 
99     implicitScalingCapable = !isSubDevice && device.isImplicitScalingCapable();
100 }
101 
~MetricContextImp()102 MetricContextImp::~MetricContextImp() {
103     metricsLibrary.reset();
104     metricEnumeration.reset();
105 }
106 
loadDependencies()107 bool MetricContextImp::loadDependencies() {
108     bool result = true;
109     if (metricEnumeration->loadMetricsDiscovery() != ZE_RESULT_SUCCESS) {
110         result = false;
111         DEBUG_BREAK_IF(!result);
112     }
113     if (result && !metricsLibrary->load()) {
114         result = false;
115         DEBUG_BREAK_IF(!result);
116     }
117 
118     // Set metric context initialization state.
119     setInitializationState(result
120                                ? ZE_RESULT_SUCCESS
121                                : ZE_RESULT_ERROR_UNKNOWN);
122 
123     return result;
124 }
125 
isInitialized()126 bool MetricContextImp::isInitialized() {
127     return initializationState == ZE_RESULT_SUCCESS;
128 }
129 
setInitializationState(const ze_result_t state)130 void MetricContextImp::setInitializationState(const ze_result_t state) {
131     initializationState = state;
132 }
133 
getDevice()134 Device &MetricContextImp::getDevice() { return device; }
135 
getMetricsLibrary()136 MetricsLibrary &MetricContextImp::getMetricsLibrary() { return *metricsLibrary; }
137 
getMetricEnumeration()138 MetricEnumeration &MetricContextImp::getMetricEnumeration() { return *metricEnumeration; }
139 
getMetricStreamer()140 MetricStreamer *MetricContextImp::getMetricStreamer() { return pMetricStreamer; }
141 
setMetricStreamer(MetricStreamer * pMetricStreamer)142 void MetricContextImp::setMetricStreamer(MetricStreamer *pMetricStreamer) {
143     this->pMetricStreamer = pMetricStreamer;
144 }
145 
setMetricsLibrary(MetricsLibrary & metricsLibrary)146 void MetricContextImp::setMetricsLibrary(MetricsLibrary &metricsLibrary) {
147     this->metricsLibrary.release();
148     this->metricsLibrary.reset(&metricsLibrary);
149 }
150 
setMetricEnumeration(MetricEnumeration & metricEnumeration)151 void MetricContextImp::setMetricEnumeration(MetricEnumeration &metricEnumeration) {
152     this->metricEnumeration.release();
153     this->metricEnumeration.reset(&metricEnumeration);
154 }
155 
setUseCompute(const bool useCompute)156 void MetricContextImp::setUseCompute(const bool useCompute) {
157     this->useCompute = useCompute;
158 }
159 
isComputeUsed()160 bool MetricContextImp::isComputeUsed() {
161     return useCompute;
162 }
163 
getSubDeviceIndex()164 uint32_t MetricContextImp::getSubDeviceIndex() {
165     return subDeviceIndex;
166 }
167 
setSubDeviceIndex(const uint32_t index)168 void MetricContextImp::setSubDeviceIndex(const uint32_t index) {
169     subDeviceIndex = index;
170 }
171 
isImplicitScalingCapable()172 bool MetricContextImp::isImplicitScalingCapable() {
173     return implicitScalingCapable;
174 }
175 
176 ze_result_t
activateMetricGroupsDeferred(const uint32_t count,zet_metric_group_handle_t * phMetricGroups)177 MetricContextImp::activateMetricGroupsDeferred(const uint32_t count,
178                                                zet_metric_group_handle_t *phMetricGroups) {
179 
180     // Activation: postpone until zetMetricStreamerOpen or zeCommandQueueExecuteCommandLists
181     // Deactivation: execute immediately.
182     return phMetricGroups ? metricGroupDomains.activateDeferred(subDeviceIndex, count, phMetricGroups)
183                           : metricGroupDomains.deactivate();
184 }
185 
isMetricGroupActivated(const zet_metric_group_handle_t hMetricGroup)186 bool MetricContextImp::isMetricGroupActivated(const zet_metric_group_handle_t hMetricGroup) {
187     return metricGroupDomains.isActivated(hMetricGroup);
188 }
189 
isMetricGroupActivated()190 bool MetricContextImp::isMetricGroupActivated() {
191     return metricGroupDomains.getActivatedCount() > 0;
192 }
193 
activateMetricGroups()194 ze_result_t MetricContextImp::activateMetricGroups() { return metricGroupDomains.activate(); }
195 
enableMetricApi()196 ze_result_t MetricContext::enableMetricApi() {
197 
198     if (!isMetricApiAvailable()) {
199         return ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE;
200     }
201 
202     bool failed = false;
203 
204     auto driverHandle = L0::DriverHandle::fromHandle(GlobalDriverHandle);
205     auto rootDevices = std::vector<ze_device_handle_t>();
206     auto subDevices = std::vector<ze_device_handle_t>();
207 
208     // Obtain root devices.
209     uint32_t rootDeviceCount = 0;
210     driverHandle->getDevice(&rootDeviceCount, nullptr);
211     rootDevices.resize(rootDeviceCount);
212     driverHandle->getDevice(&rootDeviceCount, rootDevices.data());
213 
214     for (auto rootDeviceHandle : rootDevices) {
215 
216         // Initialize root device.
217         auto rootDevice = static_cast<DeviceImp *>(L0::Device::fromHandle(rootDeviceHandle));
218         failed |= !rootDevice->metricContext->loadDependencies();
219 
220         // Initialize sub devices.
221         for (uint32_t i = 0; i < rootDevice->numSubDevices; ++i) {
222             failed |= !rootDevice->subDevices[i]->getMetricContext().loadDependencies();
223         }
224     }
225 
226     return failed
227                ? ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE
228                : ZE_RESULT_SUCCESS;
229 }
230 
create(Device & device)231 std::unique_ptr<MetricContext> MetricContext::create(Device &device) {
232     auto metricContextImp = new (std::nothrow) MetricContextImp(device);
233     std::unique_ptr<MetricContext> metricContext{metricContextImp};
234     return metricContext;
235 }
236 
isMetricApiAvailable()237 bool MetricContext::isMetricApiAvailable() {
238 
239     std::unique_ptr<NEO::OsLibrary> library = nullptr;
240 
241     // Check Metrics Discovery availability.
242     library.reset(NEO::OsLibrary::load(MetricEnumeration::getMetricsDiscoveryFilename()));
243     if (library == nullptr) {
244         PRINT_DEBUG_STRING(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Unable to find metrics discovery %s\n", MetricEnumeration::getMetricsDiscoveryFilename());
245         return false;
246     }
247 
248     // Check Metrics Library availability.
249     library.reset(NEO::OsLibrary::load(MetricsLibrary::getFilename()));
250     if (library == nullptr) {
251         PRINT_DEBUG_STRING(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Unable to find metrics library %s\n", MetricsLibrary::getFilename());
252         return false;
253     }
254 
255     return true;
256 }
257 
MetricGroupDomains(MetricContext & metricContext)258 MetricGroupDomains::MetricGroupDomains(MetricContext &metricContext)
259     : metricContext(metricContext) {}
260 
activateDeferred(const uint32_t subDeviceIndex,const uint32_t count,zet_metric_group_handle_t * phMetricGroups)261 ze_result_t MetricGroupDomains::activateDeferred(const uint32_t subDeviceIndex,
262                                                  const uint32_t count,
263                                                  zet_metric_group_handle_t *phMetricGroups) {
264     // For each metric group:
265     for (uint32_t i = 0; i < count; ++i) {
266         DEBUG_BREAK_IF(!phMetricGroups[i]);
267 
268         zet_metric_group_handle_t handle = phMetricGroups[i];
269         auto pMetricGroupImp = static_cast<MetricGroupImp *>(MetricGroup::fromHandle(handle));
270         if (pMetricGroupImp->getMetricGroups().size() > 0) {
271             handle = pMetricGroupImp->getMetricGroups()[subDeviceIndex];
272         }
273 
274         // Try to associate it with a domain (oa, ...).
275         if (!activateMetricGroupDeferred(handle)) {
276             return ZE_RESULT_ERROR_UNKNOWN;
277         }
278     }
279     return ZE_RESULT_SUCCESS;
280 }
281 
activateMetricGroupDeferred(const zet_metric_group_handle_t hMetricGroup)282 bool MetricGroupDomains::activateMetricGroupDeferred(const zet_metric_group_handle_t hMetricGroup) {
283 
284     const auto properites = MetricGroup::getProperties(hMetricGroup);
285     const auto domain = properites.domain;
286 
287     const bool isDomainFree = domains[domain].first == nullptr;
288     const bool isSameGroup = domains[domain].first == hMetricGroup;
289 
290     // The same metric group has been already associated.
291     if (isSameGroup) {
292         return true;
293     }
294 
295     // Domain has been already associated with a different metric group.
296     if (!isDomainFree) {
297         return false;
298     }
299 
300     // Associate metric group with domain and mark it as not active.
301     // Activation will be performed during zeCommandQueueExecuteCommandLists (query)
302     // or zetMetricStreamerOpen (time based sampling).
303     domains[domain].first = hMetricGroup;
304     domains[domain].second = false;
305 
306     return true;
307 }
308 
activate()309 ze_result_t MetricGroupDomains::activate() {
310 
311     // For each domain.
312     for (auto &domain : domains) {
313 
314         auto hMetricGroup = domain.second.first;
315         bool &metricGroupActive = domain.second.second;
316         bool metricGroupEventBased =
317             hMetricGroup && MetricGroup::getProperties(hMetricGroup).samplingType ==
318                                 ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED;
319 
320         // Activate only event based metric groups.
321         // Time based metric group will be activated during zetMetricStreamerOpen.
322         if (metricGroupEventBased && !metricGroupActive) {
323 
324             metricGroupActive = activateEventMetricGroup(hMetricGroup);
325 
326             if (metricGroupActive == false) {
327                 DEBUG_BREAK_IF(true);
328                 return ZE_RESULT_ERROR_UNKNOWN;
329             }
330         }
331     }
332 
333     return ZE_RESULT_SUCCESS;
334 }
335 
activateEventMetricGroup(const zet_metric_group_handle_t hMetricGroup)336 bool MetricGroupDomains::activateEventMetricGroup(const zet_metric_group_handle_t hMetricGroup) {
337     // Obtain metric group configuration handle from metrics library.
338     auto hConfiguration = metricContext.getMetricsLibrary().getConfiguration(hMetricGroup);
339 
340     // Validate metrics library handle.
341     if (!hConfiguration.IsValid()) {
342         DEBUG_BREAK_IF(true);
343         return false;
344     }
345 
346     // Write metric group configuration to gpu.
347     const bool result = metricContext.getMetricsLibrary().activateConfiguration(hConfiguration);
348 
349     DEBUG_BREAK_IF(!result);
350     return result;
351 }
352 
deactivate()353 ze_result_t MetricGroupDomains::deactivate() {
354     // Deactivate metric group for each domain.
355     for (auto &domain : domains) {
356 
357         auto hMetricGroup = domain.second.first;
358         bool metricGroupActivatedOnGpu = domain.second.second;
359 
360         if (metricGroupActivatedOnGpu) {
361             // Only event based metric groups are activated on Gpu.
362             DEBUG_BREAK_IF(MetricGroup::getProperties(hMetricGroup).samplingType != ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED);
363             auto hConfiguration = metricContext.getMetricsLibrary().getConfiguration(hMetricGroup);
364             // Deactivate metric group configuration using metrics library.
365             metricContext.getMetricsLibrary().deactivateConfiguration(hConfiguration);
366         }
367         // Mark domain as free.
368         domain.second = {};
369     }
370 
371     // Check any open queries.
372     if (metricContext.getMetricsLibrary().getMetricQueryCount() == 0) {
373         if (metricContext.getMetricsLibrary().getInitializationState() != ZE_RESULT_ERROR_UNINITIALIZED) {
374             metricContext.getMetricsLibrary().release();
375         }
376     }
377 
378     return ZE_RESULT_SUCCESS;
379 }
380 
isActivated(const zet_metric_group_handle_t hMetricGroup)381 bool MetricGroupDomains::isActivated(const zet_metric_group_handle_t hMetricGroup) {
382     auto metricGroupProperties = MetricGroup::getProperties(hMetricGroup);
383 
384     // 1. Check whether domain is activated.
385     const auto domain = domains.find(metricGroupProperties.domain);
386     if (domain == domains.end()) {
387         return false;
388     }
389 
390     // 2. Check whether the specific MetricGroup is activated.
391     return domain->second.first == hMetricGroup;
392 }
393 
getActivatedCount()394 uint32_t MetricGroupDomains::getActivatedCount() {
395     uint32_t count = 0;
396     for (const auto &domain : domains) {
397         count += domain.second.second ? 1 : 0;
398     }
399     return count;
400 }
401 
metricGroupGet(zet_device_handle_t hDevice,uint32_t * pCount,zet_metric_group_handle_t * phMetricGroups)402 ze_result_t metricGroupGet(zet_device_handle_t hDevice, uint32_t *pCount, zet_metric_group_handle_t *phMetricGroups) {
403     auto device = Device::fromHandle(hDevice);
404     return device->getMetricContext().getMetricEnumeration().metricGroupGet(*pCount,
405                                                                             phMetricGroups);
406 }
407 
metricStreamerOpen(zet_context_handle_t hContext,zet_device_handle_t hDevice,zet_metric_group_handle_t hMetricGroup,zet_metric_streamer_desc_t * pDesc,ze_event_handle_t hNotificationEvent,zet_metric_streamer_handle_t * phMetricStreamer)408 ze_result_t metricStreamerOpen(zet_context_handle_t hContext, zet_device_handle_t hDevice, zet_metric_group_handle_t hMetricGroup,
409                                zet_metric_streamer_desc_t *pDesc, ze_event_handle_t hNotificationEvent,
410                                zet_metric_streamer_handle_t *phMetricStreamer) {
411 
412     return MetricStreamer::open(hContext, hDevice, hMetricGroup, *pDesc, hNotificationEvent, phMetricStreamer);
413 }
414 
415 } // namespace L0
416