1 /*
2 * Copyright (C) 2021 Intel Corporation
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 */
7
8 #include "level_zero/tools/source/sysman/linux/pmt/pmt.h"
9
10 #include "shared/source/debug_settings/debug_settings_manager.h"
11
12 #include <algorithm>
13 #include <errno.h>
14 #include <fcntl.h>
15 #include <string.h>
16
17 namespace L0 {
18 const std::string PlatformMonitoringTech::baseTelemSysFS("/sys/class/intel_pmt");
19 const std::string PlatformMonitoringTech::telem("telem");
20 uint32_t PlatformMonitoringTech::rootDeviceTelemNodeIndex = 0;
21
readValue(const std::string key,uint32_t & value)22 ze_result_t PlatformMonitoringTech::readValue(const std::string key, uint32_t &value) {
23 auto offset = keyOffsetMap.find(key);
24 if (offset == keyOffsetMap.end()) {
25 return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
26 }
27 int fd = this->openFunction(telemetryDeviceEntry.c_str(), O_RDONLY);
28 if (fd == -1) {
29 return ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE;
30 }
31
32 ze_result_t res = ZE_RESULT_SUCCESS;
33 if (this->preadFunction(fd, &value, sizeof(uint32_t), baseOffset + offset->second) != sizeof(uint32_t)) {
34 res = ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE;
35 }
36
37 if (this->closeFunction(fd) < 0) {
38 return ZE_RESULT_ERROR_UNKNOWN;
39 }
40
41 return res;
42 }
43
readValue(const std::string key,uint64_t & value)44 ze_result_t PlatformMonitoringTech::readValue(const std::string key, uint64_t &value) {
45 auto offset = keyOffsetMap.find(key);
46 if (offset == keyOffsetMap.end()) {
47 return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
48 }
49 int fd = this->openFunction(telemetryDeviceEntry.c_str(), O_RDONLY);
50 if (fd == -1) {
51 return ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE;
52 }
53
54 ze_result_t res = ZE_RESULT_SUCCESS;
55 if (this->preadFunction(fd, &value, sizeof(uint64_t), baseOffset + offset->second) != sizeof(uint64_t)) {
56 res = ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE;
57 }
58
59 if (this->closeFunction(fd) < 0) {
60 return ZE_RESULT_ERROR_UNKNOWN;
61 }
62
63 return res;
64 }
65
compareTelemNodes(std::string & telemNode1,std::string & telemNode2)66 bool compareTelemNodes(std::string &telemNode1, std::string &telemNode2) {
67 std::string telem = "telem";
68 auto indexString1 = telemNode1.substr(telem.size(), telemNode1.size());
69 auto indexForTelemNode1 = stoi(indexString1);
70 auto indexString2 = telemNode2.substr(telem.size(), telemNode2.size());
71 auto indexForTelemNode2 = stoi(indexString2);
72 return indexForTelemNode1 < indexForTelemNode2;
73 }
74
75 // Check if Telemetry node(say /sys/class/intel_pmt/telem1) and rootPciPathOfGpuDevice share same PCI Root port
isValidTelemNode(FsAccess * pFsAccess,const std::string & rootPciPathOfGpuDevice,const std::string sysfsTelemNode)76 static bool isValidTelemNode(FsAccess *pFsAccess, const std::string &rootPciPathOfGpuDevice, const std::string sysfsTelemNode) {
77 std::string realPathOfTelemNode;
78 auto result = pFsAccess->getRealPath(sysfsTelemNode, realPathOfTelemNode);
79 if (result != ZE_RESULT_SUCCESS) {
80 return false;
81 }
82
83 // Example: If
84 // rootPciPathOfGpuDevice = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0";
85 // realPathOfTelemNode = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:02.0/0000:8e:00.1/pmt_telemetry.1.auto/intel_pmt/telem1";
86 // As rootPciPathOfGpuDevice is a substring og realPathOfTelemNode , hence both sysfs telemNode and GPU device share same PCI Root.
87 // Hence this telem node entry is valid for GPU device.
88 return (realPathOfTelemNode.compare(0, rootPciPathOfGpuDevice.size(), rootPciPathOfGpuDevice) == 0);
89 }
90
enumerateRootTelemIndex(FsAccess * pFsAccess,std::string & rootPciPathOfGpuDevice)91 ze_result_t PlatformMonitoringTech::enumerateRootTelemIndex(FsAccess *pFsAccess, std::string &rootPciPathOfGpuDevice) {
92 std::vector<std::string> listOfTelemNodes;
93 auto result = pFsAccess->listDirectory(baseTelemSysFS, listOfTelemNodes);
94 if (ZE_RESULT_SUCCESS != result) {
95 return result;
96 }
97
98 // listOfTelemNodes vector could contain non "telem" entries which are not interested to us.
99 // Lets refactor listOfTelemNodes vector as below
100 for (auto iterator = listOfTelemNodes.begin(); iterator != listOfTelemNodes.end(); iterator++) {
101 if (iterator->compare(0, telem.size(), telem) != 0) {
102 listOfTelemNodes.erase(iterator--); // Remove entry if its suffix is not "telem"
103 }
104 }
105
106 // Exmaple: For below directory
107 // # /sys/class/intel_pmt$ ls
108 // telem1 telem2 telem3
109 // Then listOfTelemNodes would contain telem1, telem2, telem3
110 std::sort(listOfTelemNodes.begin(), listOfTelemNodes.end(), compareTelemNodes); // sort listOfTelemNodes, to arange telem nodes in ascending order
111 for (const auto &telemNode : listOfTelemNodes) {
112 if (isValidTelemNode(pFsAccess, rootPciPathOfGpuDevice, baseTelemSysFS + "/" + telemNode)) {
113 auto indexString = telemNode.substr(telem.size(), telemNode.size());
114 rootDeviceTelemNodeIndex = stoi(indexString); // if telemNode is telemN, then rootDeviceTelemNodeIndex = N
115 return ZE_RESULT_SUCCESS;
116 }
117 }
118 return ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE;
119 }
120
init(FsAccess * pFsAccess,const std::string & rootPciPathOfGpuDevice)121 ze_result_t PlatformMonitoringTech::init(FsAccess *pFsAccess, const std::string &rootPciPathOfGpuDevice) {
122 std::string telemNode = telem + std::to_string(rootDeviceTelemNodeIndex);
123 if (isSubdevice) {
124 uint32_t telemNodeIndex = 0;
125 // If rootDeviceTelemNode is telem1, then rootDeviceTelemNodeIndex = 1
126 // And thus for subdevice0 --> telem node will be telem2,
127 // for subdevice1 --> telem node will be telem3 etc
128 telemNodeIndex = rootDeviceTelemNodeIndex + subdeviceId + 1;
129 telemNode = telem + std::to_string(telemNodeIndex);
130 }
131 std::string baseTelemSysFSNode = baseTelemSysFS + "/" + telemNode;
132 if (!isValidTelemNode(pFsAccess, rootPciPathOfGpuDevice, baseTelemSysFSNode)) {
133 return ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE;
134 }
135
136 telemetryDeviceEntry = baseTelemSysFSNode + "/" + telem;
137 if (!pFsAccess->fileExists(telemetryDeviceEntry)) {
138 NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr,
139 "Telemetry support not available. No file %s\n", telemetryDeviceEntry.c_str());
140 return ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE;
141 }
142
143 std::string guid;
144 std::string guidPath = baseTelemSysFSNode + std::string("/guid");
145 ze_result_t result = pFsAccess->read(guidPath, guid);
146 if (ZE_RESULT_SUCCESS != result) {
147 NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr,
148 "Telemetry sysfs entry not available %s\n", guidPath.c_str());
149 return result;
150 }
151 result = getKeyOffsetMap(guid, keyOffsetMap);
152 if (ZE_RESULT_SUCCESS != result) {
153 // We didnt have any entry for this guid in guidToKeyOffsetMap
154 return result;
155 }
156
157 std::string offsetPath = baseTelemSysFSNode + std::string("/offset");
158 result = pFsAccess->read(offsetPath, baseOffset);
159 if (ZE_RESULT_SUCCESS != result) {
160 NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr,
161 "Telemetry sysfs entry not available %s\n", offsetPath.c_str());
162 return result;
163 }
164
165 return ZE_RESULT_SUCCESS;
166 }
167
PlatformMonitoringTech(FsAccess * pFsAccess,ze_bool_t onSubdevice,uint32_t subdeviceId)168 PlatformMonitoringTech::PlatformMonitoringTech(FsAccess *pFsAccess, ze_bool_t onSubdevice,
169 uint32_t subdeviceId) : subdeviceId(subdeviceId), isSubdevice(onSubdevice) {
170 }
171
doInitPmtObject(FsAccess * pFsAccess,uint32_t subdeviceId,PlatformMonitoringTech * pPmt,const std::string & rootPciPathOfGpuDevice,std::map<uint32_t,L0::PlatformMonitoringTech * > & mapOfSubDeviceIdToPmtObject)172 void PlatformMonitoringTech::doInitPmtObject(FsAccess *pFsAccess, uint32_t subdeviceId, PlatformMonitoringTech *pPmt,
173 const std::string &rootPciPathOfGpuDevice,
174 std::map<uint32_t, L0::PlatformMonitoringTech *> &mapOfSubDeviceIdToPmtObject) {
175 if (pPmt->init(pFsAccess, rootPciPathOfGpuDevice) == ZE_RESULT_SUCCESS) {
176 mapOfSubDeviceIdToPmtObject.emplace(subdeviceId, pPmt);
177 return;
178 }
179 delete pPmt; // We are here as pPmt->init failed and thus this pPmt object is not useful. Let's delete that.
180 }
181
create(const std::vector<ze_device_handle_t> & deviceHandles,FsAccess * pFsAccess,std::string & rootPciPathOfGpuDevice,std::map<uint32_t,L0::PlatformMonitoringTech * > & mapOfSubDeviceIdToPmtObject)182 void PlatformMonitoringTech::create(const std::vector<ze_device_handle_t> &deviceHandles,
183 FsAccess *pFsAccess, std::string &rootPciPathOfGpuDevice,
184 std::map<uint32_t, L0::PlatformMonitoringTech *> &mapOfSubDeviceIdToPmtObject) {
185 if (ZE_RESULT_SUCCESS == PlatformMonitoringTech::enumerateRootTelemIndex(pFsAccess, rootPciPathOfGpuDevice)) {
186 for (const auto &deviceHandle : deviceHandles) {
187 ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
188 Device::fromHandle(deviceHandle)->getProperties(&deviceProperties);
189 auto pPmt = new PlatformMonitoringTech(pFsAccess, deviceProperties.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE,
190 deviceProperties.subdeviceId);
191 UNRECOVERABLE_IF(nullptr == pPmt);
192 PlatformMonitoringTech::doInitPmtObject(pFsAccess, deviceProperties.subdeviceId, pPmt,
193 rootPciPathOfGpuDevice, mapOfSubDeviceIdToPmtObject);
194 }
195 }
196 }
197
~PlatformMonitoringTech()198 PlatformMonitoringTech::~PlatformMonitoringTech() {
199 }
200
201 } // namespace L0
202