1 /*
2  * Copyright (C) 2021 Intel Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  */
7 
8 #include "level_zero/tools/source/sysman/linux/pmt/pmt.h"
9 
10 #include "shared/source/debug_settings/debug_settings_manager.h"
11 
12 #include <algorithm>
13 #include <errno.h>
14 #include <fcntl.h>
15 #include <string.h>
16 
17 namespace L0 {
18 const std::string PlatformMonitoringTech::baseTelemSysFS("/sys/class/intel_pmt");
19 const std::string PlatformMonitoringTech::telem("telem");
20 uint32_t PlatformMonitoringTech::rootDeviceTelemNodeIndex = 0;
21 
readValue(const std::string key,uint32_t & value)22 ze_result_t PlatformMonitoringTech::readValue(const std::string key, uint32_t &value) {
23     auto offset = keyOffsetMap.find(key);
24     if (offset == keyOffsetMap.end()) {
25         return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
26     }
27     int fd = this->openFunction(telemetryDeviceEntry.c_str(), O_RDONLY);
28     if (fd == -1) {
29         return ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE;
30     }
31 
32     ze_result_t res = ZE_RESULT_SUCCESS;
33     if (this->preadFunction(fd, &value, sizeof(uint32_t), baseOffset + offset->second) != sizeof(uint32_t)) {
34         res = ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE;
35     }
36 
37     if (this->closeFunction(fd) < 0) {
38         return ZE_RESULT_ERROR_UNKNOWN;
39     }
40 
41     return res;
42 }
43 
readValue(const std::string key,uint64_t & value)44 ze_result_t PlatformMonitoringTech::readValue(const std::string key, uint64_t &value) {
45     auto offset = keyOffsetMap.find(key);
46     if (offset == keyOffsetMap.end()) {
47         return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
48     }
49     int fd = this->openFunction(telemetryDeviceEntry.c_str(), O_RDONLY);
50     if (fd == -1) {
51         return ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE;
52     }
53 
54     ze_result_t res = ZE_RESULT_SUCCESS;
55     if (this->preadFunction(fd, &value, sizeof(uint64_t), baseOffset + offset->second) != sizeof(uint64_t)) {
56         res = ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE;
57     }
58 
59     if (this->closeFunction(fd) < 0) {
60         return ZE_RESULT_ERROR_UNKNOWN;
61     }
62 
63     return res;
64 }
65 
compareTelemNodes(std::string & telemNode1,std::string & telemNode2)66 bool compareTelemNodes(std::string &telemNode1, std::string &telemNode2) {
67     std::string telem = "telem";
68     auto indexString1 = telemNode1.substr(telem.size(), telemNode1.size());
69     auto indexForTelemNode1 = stoi(indexString1);
70     auto indexString2 = telemNode2.substr(telem.size(), telemNode2.size());
71     auto indexForTelemNode2 = stoi(indexString2);
72     return indexForTelemNode1 < indexForTelemNode2;
73 }
74 
75 // Check if Telemetry node(say /sys/class/intel_pmt/telem1) and rootPciPathOfGpuDevice share same PCI Root port
isValidTelemNode(FsAccess * pFsAccess,const std::string & rootPciPathOfGpuDevice,const std::string sysfsTelemNode)76 static bool isValidTelemNode(FsAccess *pFsAccess, const std::string &rootPciPathOfGpuDevice, const std::string sysfsTelemNode) {
77     std::string realPathOfTelemNode;
78     auto result = pFsAccess->getRealPath(sysfsTelemNode, realPathOfTelemNode);
79     if (result != ZE_RESULT_SUCCESS) {
80         return false;
81     }
82 
83     // Example: If
84     // rootPciPathOfGpuDevice = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0";
85     // realPathOfTelemNode = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:02.0/0000:8e:00.1/pmt_telemetry.1.auto/intel_pmt/telem1";
86     // As rootPciPathOfGpuDevice is a substring og realPathOfTelemNode , hence both sysfs telemNode and GPU device share same PCI Root.
87     // Hence this telem node entry is valid for GPU device.
88     return (realPathOfTelemNode.compare(0, rootPciPathOfGpuDevice.size(), rootPciPathOfGpuDevice) == 0);
89 }
90 
enumerateRootTelemIndex(FsAccess * pFsAccess,std::string & rootPciPathOfGpuDevice)91 ze_result_t PlatformMonitoringTech::enumerateRootTelemIndex(FsAccess *pFsAccess, std::string &rootPciPathOfGpuDevice) {
92     std::vector<std::string> listOfTelemNodes;
93     auto result = pFsAccess->listDirectory(baseTelemSysFS, listOfTelemNodes);
94     if (ZE_RESULT_SUCCESS != result) {
95         return result;
96     }
97 
98     // listOfTelemNodes vector could contain non "telem" entries which are not interested to us.
99     // Lets refactor listOfTelemNodes vector as below
100     for (auto iterator = listOfTelemNodes.begin(); iterator != listOfTelemNodes.end(); iterator++) {
101         if (iterator->compare(0, telem.size(), telem) != 0) {
102             listOfTelemNodes.erase(iterator--); // Remove entry if its suffix is not "telem"
103         }
104     }
105 
106     // Exmaple: For below directory
107     // # /sys/class/intel_pmt$ ls
108     // telem1  telem2  telem3
109     // Then listOfTelemNodes would contain telem1, telem2, telem3
110     std::sort(listOfTelemNodes.begin(), listOfTelemNodes.end(), compareTelemNodes); // sort listOfTelemNodes, to arange telem nodes in ascending order
111     for (const auto &telemNode : listOfTelemNodes) {
112         if (isValidTelemNode(pFsAccess, rootPciPathOfGpuDevice, baseTelemSysFS + "/" + telemNode)) {
113             auto indexString = telemNode.substr(telem.size(), telemNode.size());
114             rootDeviceTelemNodeIndex = stoi(indexString); // if telemNode is telemN, then rootDeviceTelemNodeIndex = N
115             return ZE_RESULT_SUCCESS;
116         }
117     }
118     return ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE;
119 }
120 
init(FsAccess * pFsAccess,const std::string & rootPciPathOfGpuDevice)121 ze_result_t PlatformMonitoringTech::init(FsAccess *pFsAccess, const std::string &rootPciPathOfGpuDevice) {
122     std::string telemNode = telem + std::to_string(rootDeviceTelemNodeIndex);
123     if (isSubdevice) {
124         uint32_t telemNodeIndex = 0;
125         // If rootDeviceTelemNode is telem1, then rootDeviceTelemNodeIndex = 1
126         // And thus for subdevice0 --> telem node will be telem2,
127         // for subdevice1 --> telem node will be telem3 etc
128         telemNodeIndex = rootDeviceTelemNodeIndex + subdeviceId + 1;
129         telemNode = telem + std::to_string(telemNodeIndex);
130     }
131     std::string baseTelemSysFSNode = baseTelemSysFS + "/" + telemNode;
132     if (!isValidTelemNode(pFsAccess, rootPciPathOfGpuDevice, baseTelemSysFSNode)) {
133         return ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE;
134     }
135 
136     telemetryDeviceEntry = baseTelemSysFSNode + "/" + telem;
137     if (!pFsAccess->fileExists(telemetryDeviceEntry)) {
138         NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr,
139                               "Telemetry support not available. No file %s\n", telemetryDeviceEntry.c_str());
140         return ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE;
141     }
142 
143     std::string guid;
144     std::string guidPath = baseTelemSysFSNode + std::string("/guid");
145     ze_result_t result = pFsAccess->read(guidPath, guid);
146     if (ZE_RESULT_SUCCESS != result) {
147         NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr,
148                               "Telemetry sysfs entry not available %s\n", guidPath.c_str());
149         return result;
150     }
151     result = getKeyOffsetMap(guid, keyOffsetMap);
152     if (ZE_RESULT_SUCCESS != result) {
153         // We didnt have any entry for this guid in guidToKeyOffsetMap
154         return result;
155     }
156 
157     std::string offsetPath = baseTelemSysFSNode + std::string("/offset");
158     result = pFsAccess->read(offsetPath, baseOffset);
159     if (ZE_RESULT_SUCCESS != result) {
160         NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr,
161                               "Telemetry sysfs entry not available %s\n", offsetPath.c_str());
162         return result;
163     }
164 
165     return ZE_RESULT_SUCCESS;
166 }
167 
PlatformMonitoringTech(FsAccess * pFsAccess,ze_bool_t onSubdevice,uint32_t subdeviceId)168 PlatformMonitoringTech::PlatformMonitoringTech(FsAccess *pFsAccess, ze_bool_t onSubdevice,
169                                                uint32_t subdeviceId) : subdeviceId(subdeviceId), isSubdevice(onSubdevice) {
170 }
171 
doInitPmtObject(FsAccess * pFsAccess,uint32_t subdeviceId,PlatformMonitoringTech * pPmt,const std::string & rootPciPathOfGpuDevice,std::map<uint32_t,L0::PlatformMonitoringTech * > & mapOfSubDeviceIdToPmtObject)172 void PlatformMonitoringTech::doInitPmtObject(FsAccess *pFsAccess, uint32_t subdeviceId, PlatformMonitoringTech *pPmt,
173                                              const std::string &rootPciPathOfGpuDevice,
174                                              std::map<uint32_t, L0::PlatformMonitoringTech *> &mapOfSubDeviceIdToPmtObject) {
175     if (pPmt->init(pFsAccess, rootPciPathOfGpuDevice) == ZE_RESULT_SUCCESS) {
176         mapOfSubDeviceIdToPmtObject.emplace(subdeviceId, pPmt);
177         return;
178     }
179     delete pPmt; // We are here as pPmt->init failed and thus this pPmt object is not useful. Let's delete that.
180 }
181 
create(const std::vector<ze_device_handle_t> & deviceHandles,FsAccess * pFsAccess,std::string & rootPciPathOfGpuDevice,std::map<uint32_t,L0::PlatformMonitoringTech * > & mapOfSubDeviceIdToPmtObject)182 void PlatformMonitoringTech::create(const std::vector<ze_device_handle_t> &deviceHandles,
183                                     FsAccess *pFsAccess, std::string &rootPciPathOfGpuDevice,
184                                     std::map<uint32_t, L0::PlatformMonitoringTech *> &mapOfSubDeviceIdToPmtObject) {
185     if (ZE_RESULT_SUCCESS == PlatformMonitoringTech::enumerateRootTelemIndex(pFsAccess, rootPciPathOfGpuDevice)) {
186         for (const auto &deviceHandle : deviceHandles) {
187             ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
188             Device::fromHandle(deviceHandle)->getProperties(&deviceProperties);
189             auto pPmt = new PlatformMonitoringTech(pFsAccess, deviceProperties.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE,
190                                                    deviceProperties.subdeviceId);
191             UNRECOVERABLE_IF(nullptr == pPmt);
192             PlatformMonitoringTech::doInitPmtObject(pFsAccess, deviceProperties.subdeviceId, pPmt,
193                                                     rootPciPathOfGpuDevice, mapOfSubDeviceIdToPmtObject);
194         }
195     }
196 }
197 
~PlatformMonitoringTech()198 PlatformMonitoringTech::~PlatformMonitoringTech() {
199 }
200 
201 } // namespace L0
202