1 /*
2 * Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "NNPIMLTraceWrapper.h"
17 #include "DebugMacros.h"
18 #include "nnpi_ice_caps_hwtrace.h"
19 #include "nnpi_ice_caps_swtrace.h"
20 #include "nnpi_inference.h"
21 #include <chrono>
22 #include <cstring>
23 #include <fstream>
24 #include <memory>
25 #include <sstream>
26 #include <time.h>
27 #include <unistd.h>
28 #include <unordered_map>
29 #include <vector>
30
31 /// Set default Hardware buffer size to 3GB.
32 #define MAX_HW_TRACE_BUFFER_SIZE_MB (1024 * 3)
33 /// Set default Software buffer size to 400MB.
34 #define MAX_SW_TRACE_BUFFER_SIZE_MB (400)
35
secondsToMicroseconds(double seconds)36 static inline uint64_t secondsToMicroseconds(double seconds) {
37 return (uint64_t)(seconds * 1e6f);
38 }
39
nanosecondsToMicrosecondsSigned(int64_t nanoseconds)40 static inline int64_t nanosecondsToMicrosecondsSigned(int64_t nanoseconds) {
41 return nanoseconds / 1e3;
42 }
43
getNow()44 static uint64_t inline getNow() {
45 return std::chrono::duration_cast<std::chrono::microseconds>(
46 std::chrono::steady_clock::now().time_since_epoch())
47 .count();
48 }
49
50 static eIceCapsSwTraceEvent swEventTypes[] = {
51 eIceCapsSwTraceEvent::ICE_CAPS_SW_EVENT_CMDLIST,
52 eIceCapsSwTraceEvent::ICE_CAPS_SW_EVENT_COPY,
53 eIceCapsSwTraceEvent::ICE_CAPS_SW_EVENT_CPYLIST_CREATE,
54 eIceCapsSwTraceEvent::ICE_CAPS_SW_EVENT_ICE_DRV,
55 eIceCapsSwTraceEvent::ICE_CAPS_SW_EVENT_INFR_SUBRES,
56 eIceCapsSwTraceEvent::ICE_CAPS_SW_EVENT_INFR_CREATE,
57 eIceCapsSwTraceEvent::ICE_CAPS_SW_EVENT_INFR_REQ,
58 eIceCapsSwTraceEvent::ICE_CAPS_SW_EVENT_RUNTIME,
59 eIceCapsSwTraceEvent::ICE_CAPS_SW_EVENT_USER_DATA};
60
NNPITraceContext(unsigned devID)61 NNPITraceContext::NNPITraceContext(unsigned devID)
62 : capsSession_(0), devID_(devID), devIDSet_(false) {}
63
~NNPITraceContext()64 NNPITraceContext::~NNPITraceContext() { destroyInternalContext(); }
65
startCapture(NNPIDeviceContext deviceContext,bool swTraces,bool hwTraces,uint32_t softwareBufferSizeMB,uint32_t hardwareBufferSizeMB)66 bool NNPITraceContext::startCapture(NNPIDeviceContext deviceContext,
67 bool swTraces, bool hwTraces,
68 uint32_t softwareBufferSizeMB,
69 uint32_t hardwareBufferSizeMB) {
70 if (!createInternalContext(swTraces, hwTraces, softwareBufferSizeMB,
71 hardwareBufferSizeMB)) {
72 LOG(WARNING) << "nnpi_trace: Failed to create trace device context.";
73 return false;
74 }
75
76 nnpimlStatus mlStatus = nnpiIceCapsStart(capsSession_);
77 if (mlStatus != NNPIML_SUCCESS) {
78 LOG(WARNING) << "nnpi_trace: Failed to start trace, err=" << mlStatus;
79 return false;
80 }
81 LOG_NNPI_INF_IF_ERROR(
82 nnpiDeviceContextTraceUserData(deviceContext, "BG", getNow()),
83 "Failed to inject trace timestamp - device trace may not be "
84 "synchronized");
85 return true;
86 }
87
stopCapture(NNPIDeviceContext deviceContext) const88 bool NNPITraceContext::stopCapture(NNPIDeviceContext deviceContext) const {
89 LOG_NNPI_INF_IF_ERROR(
90 nnpiDeviceContextTraceUserData(deviceContext, "EN", getNow()),
91 "Failed to inject trace timestamp - device trace may not be "
92 "synchronized");
93 nnpimlStatus mlStatus = nnpiIceCapsStop(capsSession_);
94 if (mlStatus != NNPIML_SUCCESS) {
95 return false;
96 }
97 return true;
98 }
99
readTraceOutput()100 bool NNPITraceContext::readTraceOutput() {
101 nnpimlStatus mlStatus = nnpiIceCapsRead(capsSession_);
102 if (mlStatus != NNPIML_SUCCESS) {
103 // Failed to read trace.
104 LOG(WARNING) << "nnpi_trace: Failed to read traces from device, err="
105 << mlStatus;
106 return false;
107 }
108 mlStatus = nnpiIceCapsParse(capsSession_);
109 if (mlStatus != NNPIML_SUCCESS) {
110 // Failed to read trace.
111 LOG(WARNING) << "nnpi_trace: Failed to parse traces on device, err="
112 << mlStatus;
113 return false;
114 }
115
116 mlStatus = nnpiIceCapsProcess(capsSession_);
117 if (mlStatus != NNPIML_SUCCESS) {
118 // Failed to read trace.
119 LOG(WARNING) << "nnpi_trace: Failed to process traces on device, err="
120 << mlStatus;
121 return false;
122 }
123 size_t entryCount = 0;
124 mlStatus = nnpiIceCapsGetEntriesCount(capsSession_, &entryCount);
125 if (mlStatus != NNPIML_SUCCESS) {
126 // Failed to read trace.
127 LOG(WARNING) << "nnpi_trace: Failed to read traces count, err=" << mlStatus;
128 return false;
129 }
130
131 bool started = false;
132 uint64_t glowStart = 0;
133 uint64_t glowEnd = 0;
134 uint64_t deviceStart = 0;
135 uint64_t deviceEnd = 0;
136 uint64_t hostStart = 0;
137 uint64_t hostEnd = 0;
138 for (size_t i = 0; i < entryCount; i++) {
139 IceCapsEntry entry;
140 NNPITraceEntry traceEntry;
141 std::stringstream entryStrRep;
142 mlStatus = nnpiIceCapsGetEntry(capsSession_, i, &entry);
143 if (mlStatus != NNPIML_SUCCESS) {
144 // Failed to read trace.
145 LOG(WARNING) << "nnpi_trace: Failed to read trace entries, err="
146 << mlStatus;
147 return false;
148 }
149
150 // Set parameters.
151 traceEntry.params["name"] = entry.event_name;
152 traceEntry.params["state"] = entry.state;
153 traceEntry.hostTime = entry.timestamp;
154 traceEntry.engineTime = entry.engine_timestamp;
155 traceEntry.params["engine"] =
156 ((entry.engine == eIceCapsEngine::ICE_CAPS_SW_TRACE)
157 ? std::string("SW")
158 : std::string("HW"));
159 traceEntry.params["event_key"] = std::to_string(entry.event_key);
160 traceEntry.params["device_id"] = std::to_string(entry.device_id);
161 traceEntry.params["context_id"] = std::to_string(entry.context_id);
162 traceEntry.params["network_id"] = std::to_string(entry.network_id);
163 traceEntry.params["infer_id"] = std::to_string(entry.infer_id);
164 if (entry.ice_id >= 0 && entry.ice_id < 12) {
165 // Valid ICE ID.
166 traceEntry.params["ice_id"] = std::to_string(entry.ice_id);
167 }
168
169 traceEntry.params["core_id"] = std::to_string(entry.core_id);
170 traceEntry.params["network_name"] = entry.network_name;
171 traceEntry.params["kernel_name"] = entry.kernel_name;
172 traceEntry.params["opcode"] = entry.opcode;
173
174 std::stringstream params;
175 for (size_t p = 0; p < entry.params_count; p++) {
176 IceCapsParam param;
177 mlStatus = nnpiIceCapsGetEntryParam(capsSession_, i, p, ¶m);
178 if (mlStatus != NNPIML_SUCCESS) {
179 // Failed to read params.
180 LOG(WARNING) << "nnpi_trace: Failed to read trace entry params, err="
181 << mlStatus;
182 break;
183 }
184 traceEntry.params[param.name] = param.value;
185 params << param.name << ":" << param.value << ", ";
186 }
187
188 if (entry.state == "created" || entry.state == "queued" ||
189 entry.state == "req" || entry.state == "add") {
190 entry.state = "q";
191 } else if (entry.state == "executed" || entry.state == "cbs" ||
192 entry.state == "start") {
193 entry.state = "s";
194 } else if (entry.state == "completed" || entry.state == "cbc") {
195 entry.state = "c";
196 }
197 traceEntry.params["state"] = entry.state;
198 entries_.push_back(traceEntry);
199 if (entry.event_name == "user_data") {
200 if (!started && traceEntry.params["key"] == "BG") {
201 glowStart = std::stol(traceEntry.params["user_data"]);
202 deviceStart = entry.engine_timestamp;
203 hostStart = entry.timestamp;
204 started = true;
205 } else if (traceEntry.params["key"] == "EN") {
206 glowEnd = std::stol(traceEntry.params["user_data"]);
207 deviceEnd = entry.engine_timestamp;
208 hostEnd = entry.timestamp;
209 }
210 }
211 }
212 // Sync clocks:
213 if (glowStart > 0 && glowEnd > 0 && hostStart > 0 && hostEnd > 0 &&
214 deviceStart > 0 && deviceEnd > 0) {
215 // Calculate host time function for host time.
216 double hostM =
217 (double)(glowEnd - glowStart) / (double)(hostEnd - hostStart);
218 double deviceM =
219 (double)(glowEnd - glowStart) / (double)(deviceEnd - deviceStart);
220 int64_t hostC = glowStart - hostM * hostStart;
221 int64_t deviceC = glowStart - deviceM * deviceStart;
222 // Update host time.
223 for (NNPITraceEntry &entry : entries_) {
224 entry.hostTime = entry.hostTime * hostM + hostC;
225 entry.engineTime = entry.engineTime * deviceM + deviceC;
226 }
227 } else {
228 LOG(WARNING) << "Failed to synchronize glow and nnpi device traces.";
229 }
230 return true;
231 }
232
load()233 bool NNPITraceContext::load() {
234 entries_.clear();
235 std::stringstream inputStream;
236
237 if (!readTraceOutput()) {
238 destroyInternalContext();
239 return false;
240 }
241 destroyInternalContext();
242 return true;
243 }
244
destroyInternalContext()245 bool NNPITraceContext::destroyInternalContext() {
246 if (capsSession_ == 0) {
247 return false;
248 }
249 nnpimlStatus mlStatus = nnpiIceCapsCloseSession(capsSession_);
250 capsSession_ = 0;
251 if (mlStatus != NNPIML_SUCCESS) {
252 LOG(WARNING) << "nnpi_trace: Failed to stop device trace session, err="
253 << mlStatus;
254 capsSession_ = 0;
255 return false;
256 }
257
258 return true;
259 }
260
createInternalContext(bool swTraces,bool hwTraces,uint32_t softwareBufferSizeMB,uint32_t hardwareBufferSizeMB)261 bool NNPITraceContext::createInternalContext(bool swTraces, bool hwTraces,
262 uint32_t softwareBufferSizeMB,
263 uint32_t hardwareBufferSizeMB) {
264 if (capsSession_ != 0) {
265 return false;
266 }
267 nnpimlStatus mlStatus = nnpiIceCapsOpenSession(&capsSession_);
268 if (mlStatus != NNPIML_SUCCESS) {
269 LOG(WARNING) << "nnpi_trace: Failed to trace session, err=" << mlStatus;
270 capsSession_ = 0;
271 return false;
272 }
273 devMask_ = 1UL << devID_;
274 if (swTraces) {
275 uint64_t maxBufferSize = softwareBufferSizeMB > 0
276 ? softwareBufferSizeMB
277 : MAX_SW_TRACE_BUFFER_SIZE_MB;
278 size_t swEventsCount = sizeof(swEventTypes) / sizeof(swEventTypes[0]);
279 size_t idx = 0;
280
281 IceCapsSwTraceConfig traceConfigs[1 + swEventsCount];
282 traceConfigs[idx].traceOptions.config_type =
283 eIceCapsSwTraceConfigType::ICE_CAPS_SWTRACE_OPTIONS;
284 traceConfigs[idx].traceOptions.device_mask = devMask_;
285 // Software traces engine max_bytes is in bytes while maxBufferSize is in
286 // megabytes.
287 traceConfigs[idx].traceOptions.max_bytes = maxBufferSize * 1024 * 1024;
288 idx++;
289 for (size_t i = 0; i < swEventsCount; i++) {
290 traceConfigs[idx].traceEvent.config_type =
291 eIceCapsSwTraceConfigType::ICE_CAPS_SWTRACE_EVENT;
292 traceConfigs[idx].traceEvent.event = swEventTypes[i];
293 idx++;
294 }
295
296 IceCapsConfig iceSWCapsConfig;
297 iceSWCapsConfig.engine = eIceCapsEngine::ICE_CAPS_SW_TRACE;
298 iceSWCapsConfig.size = sizeof(traceConfigs);
299 iceSWCapsConfig.buffer = traceConfigs;
300 mlStatus = nnpiIceCapsPrepare(capsSession_, &iceSWCapsConfig);
301 if (mlStatus != NNPIML_SUCCESS) {
302 LOG(WARNING)
303 << "nnpi_trace: Failed to set device Software trace options, err="
304 << mlStatus;
305 destroyInternalContext();
306 return false;
307 }
308 }
309 if (hwTraces) {
310 uint64_t maxBufferSize = hardwareBufferSizeMB > 0
311 ? hardwareBufferSizeMB
312 : MAX_HW_TRACE_BUFFER_SIZE_MB;
313 IceCapsHwTraceConfig traceConfigs[2];
314 traceConfigs[0].traceOptions.config_type =
315 eIceCapsHwTraceConfigType::ICE_CAPS_HWTRACE_OPTIONS;
316 traceConfigs[0].traceOptions.device_mask = devMask_;
317 // Hardware traces engine max_trace_size is in megabytes.
318 traceConfigs[0].traceOptions.max_trace_size = maxBufferSize;
319 traceConfigs[1].iceFilter.config_type =
320 eIceCapsHwTraceConfigType::ICE_CAPS_HWTRACE_FILTER;
321 traceConfigs[1].iceFilter.ice_mask = 0xFFF; // All ICEs.
322 traceConfigs[1].iceFilter.filter_type =
323 eIceCapsHwTraceFilter::ICE_CAPS_HWTRACE_CAPTURE_ALL;
324
325 IceCapsConfig iceHWCapsConfig;
326 iceHWCapsConfig.engine = eIceCapsEngine::ICE_CAPS_HW_TRACE;
327 iceHWCapsConfig.size = sizeof(traceConfigs);
328 iceHWCapsConfig.buffer = traceConfigs;
329
330 mlStatus = nnpiIceCapsPrepare(capsSession_, &iceHWCapsConfig);
331 if (mlStatus != NNPIML_SUCCESS) {
332 LOG(WARNING)
333 << "nnpi_trace: Failed to set device Hardware trace options, err="
334 << mlStatus;
335 destroyInternalContext();
336 return false;
337 }
338 }
339
340 return true;
341 }
342