1 /*
2  * Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "NNPIMLTraceWrapper.h"
17 #include "DebugMacros.h"
18 #include "nnpi_ice_caps_hwtrace.h"
19 #include "nnpi_ice_caps_swtrace.h"
20 #include "nnpi_inference.h"
21 #include <chrono>
22 #include <cstring>
23 #include <fstream>
24 #include <memory>
25 #include <sstream>
26 #include <time.h>
27 #include <unistd.h>
28 #include <unordered_map>
29 #include <vector>
30 
31 /// Set default Hardware buffer size to 3GB.
32 #define MAX_HW_TRACE_BUFFER_SIZE_MB (1024 * 3)
33 /// Set default Software buffer size to 400MB.
34 #define MAX_SW_TRACE_BUFFER_SIZE_MB (400)
35 
secondsToMicroseconds(double seconds)36 static inline uint64_t secondsToMicroseconds(double seconds) {
37   return (uint64_t)(seconds * 1e6f);
38 }
39 
nanosecondsToMicrosecondsSigned(int64_t nanoseconds)40 static inline int64_t nanosecondsToMicrosecondsSigned(int64_t nanoseconds) {
41   return nanoseconds / 1e3;
42 }
43 
getNow()44 static uint64_t inline getNow() {
45   return std::chrono::duration_cast<std::chrono::microseconds>(
46              std::chrono::steady_clock::now().time_since_epoch())
47       .count();
48 }
49 
50 static eIceCapsSwTraceEvent swEventTypes[] = {
51     eIceCapsSwTraceEvent::ICE_CAPS_SW_EVENT_CMDLIST,
52     eIceCapsSwTraceEvent::ICE_CAPS_SW_EVENT_COPY,
53     eIceCapsSwTraceEvent::ICE_CAPS_SW_EVENT_CPYLIST_CREATE,
54     eIceCapsSwTraceEvent::ICE_CAPS_SW_EVENT_ICE_DRV,
55     eIceCapsSwTraceEvent::ICE_CAPS_SW_EVENT_INFR_SUBRES,
56     eIceCapsSwTraceEvent::ICE_CAPS_SW_EVENT_INFR_CREATE,
57     eIceCapsSwTraceEvent::ICE_CAPS_SW_EVENT_INFR_REQ,
58     eIceCapsSwTraceEvent::ICE_CAPS_SW_EVENT_RUNTIME,
59     eIceCapsSwTraceEvent::ICE_CAPS_SW_EVENT_USER_DATA};
60 
NNPITraceContext(unsigned devID)61 NNPITraceContext::NNPITraceContext(unsigned devID)
62     : capsSession_(0), devID_(devID), devIDSet_(false) {}
63 
~NNPITraceContext()64 NNPITraceContext::~NNPITraceContext() { destroyInternalContext(); }
65 
startCapture(NNPIDeviceContext deviceContext,bool swTraces,bool hwTraces,uint32_t softwareBufferSizeMB,uint32_t hardwareBufferSizeMB)66 bool NNPITraceContext::startCapture(NNPIDeviceContext deviceContext,
67                                     bool swTraces, bool hwTraces,
68                                     uint32_t softwareBufferSizeMB,
69                                     uint32_t hardwareBufferSizeMB) {
70   if (!createInternalContext(swTraces, hwTraces, softwareBufferSizeMB,
71                              hardwareBufferSizeMB)) {
72     LOG(WARNING) << "nnpi_trace: Failed to create trace device context.";
73     return false;
74   }
75 
76   nnpimlStatus mlStatus = nnpiIceCapsStart(capsSession_);
77   if (mlStatus != NNPIML_SUCCESS) {
78     LOG(WARNING) << "nnpi_trace: Failed to start trace, err=" << mlStatus;
79     return false;
80   }
81   LOG_NNPI_INF_IF_ERROR(
82       nnpiDeviceContextTraceUserData(deviceContext, "BG", getNow()),
83       "Failed to inject trace timestamp - device trace may not be "
84       "synchronized");
85   return true;
86 }
87 
stopCapture(NNPIDeviceContext deviceContext) const88 bool NNPITraceContext::stopCapture(NNPIDeviceContext deviceContext) const {
89   LOG_NNPI_INF_IF_ERROR(
90       nnpiDeviceContextTraceUserData(deviceContext, "EN", getNow()),
91       "Failed to inject trace timestamp - device trace may not be "
92       "synchronized");
93   nnpimlStatus mlStatus = nnpiIceCapsStop(capsSession_);
94   if (mlStatus != NNPIML_SUCCESS) {
95     return false;
96   }
97   return true;
98 }
99 
readTraceOutput()100 bool NNPITraceContext::readTraceOutput() {
101   nnpimlStatus mlStatus = nnpiIceCapsRead(capsSession_);
102   if (mlStatus != NNPIML_SUCCESS) {
103     // Failed to read trace.
104     LOG(WARNING) << "nnpi_trace: Failed to read traces from device, err="
105                  << mlStatus;
106     return false;
107   }
108   mlStatus = nnpiIceCapsParse(capsSession_);
109   if (mlStatus != NNPIML_SUCCESS) {
110     // Failed to read trace.
111     LOG(WARNING) << "nnpi_trace: Failed to parse traces on device, err="
112                  << mlStatus;
113     return false;
114   }
115 
116   mlStatus = nnpiIceCapsProcess(capsSession_);
117   if (mlStatus != NNPIML_SUCCESS) {
118     // Failed to read trace.
119     LOG(WARNING) << "nnpi_trace: Failed to process traces on device, err="
120                  << mlStatus;
121     return false;
122   }
123   size_t entryCount = 0;
124   mlStatus = nnpiIceCapsGetEntriesCount(capsSession_, &entryCount);
125   if (mlStatus != NNPIML_SUCCESS) {
126     // Failed to read trace.
127     LOG(WARNING) << "nnpi_trace: Failed to read traces count, err=" << mlStatus;
128     return false;
129   }
130 
131   bool started = false;
132   uint64_t glowStart = 0;
133   uint64_t glowEnd = 0;
134   uint64_t deviceStart = 0;
135   uint64_t deviceEnd = 0;
136   uint64_t hostStart = 0;
137   uint64_t hostEnd = 0;
138   for (size_t i = 0; i < entryCount; i++) {
139     IceCapsEntry entry;
140     NNPITraceEntry traceEntry;
141     std::stringstream entryStrRep;
142     mlStatus = nnpiIceCapsGetEntry(capsSession_, i, &entry);
143     if (mlStatus != NNPIML_SUCCESS) {
144       // Failed to read trace.
145       LOG(WARNING) << "nnpi_trace: Failed to read trace entries, err="
146                    << mlStatus;
147       return false;
148     }
149 
150     // Set parameters.
151     traceEntry.params["name"] = entry.event_name;
152     traceEntry.params["state"] = entry.state;
153     traceEntry.hostTime = entry.timestamp;
154     traceEntry.engineTime = entry.engine_timestamp;
155     traceEntry.params["engine"] =
156         ((entry.engine == eIceCapsEngine::ICE_CAPS_SW_TRACE)
157              ? std::string("SW")
158              : std::string("HW"));
159     traceEntry.params["event_key"] = std::to_string(entry.event_key);
160     traceEntry.params["device_id"] = std::to_string(entry.device_id);
161     traceEntry.params["context_id"] = std::to_string(entry.context_id);
162     traceEntry.params["network_id"] = std::to_string(entry.network_id);
163     traceEntry.params["infer_id"] = std::to_string(entry.infer_id);
164     if (entry.ice_id >= 0 && entry.ice_id < 12) {
165       // Valid ICE ID.
166       traceEntry.params["ice_id"] = std::to_string(entry.ice_id);
167     }
168 
169     traceEntry.params["core_id"] = std::to_string(entry.core_id);
170     traceEntry.params["network_name"] = entry.network_name;
171     traceEntry.params["kernel_name"] = entry.kernel_name;
172     traceEntry.params["opcode"] = entry.opcode;
173 
174     std::stringstream params;
175     for (size_t p = 0; p < entry.params_count; p++) {
176       IceCapsParam param;
177       mlStatus = nnpiIceCapsGetEntryParam(capsSession_, i, p, &param);
178       if (mlStatus != NNPIML_SUCCESS) {
179         // Failed to read params.
180         LOG(WARNING) << "nnpi_trace: Failed to read trace entry params, err="
181                      << mlStatus;
182         break;
183       }
184       traceEntry.params[param.name] = param.value;
185       params << param.name << ":" << param.value << ", ";
186     }
187 
188     if (entry.state == "created" || entry.state == "queued" ||
189         entry.state == "req" || entry.state == "add") {
190       entry.state = "q";
191     } else if (entry.state == "executed" || entry.state == "cbs" ||
192                entry.state == "start") {
193       entry.state = "s";
194     } else if (entry.state == "completed" || entry.state == "cbc") {
195       entry.state = "c";
196     }
197     traceEntry.params["state"] = entry.state;
198     entries_.push_back(traceEntry);
199     if (entry.event_name == "user_data") {
200       if (!started && traceEntry.params["key"] == "BG") {
201         glowStart = std::stol(traceEntry.params["user_data"]);
202         deviceStart = entry.engine_timestamp;
203         hostStart = entry.timestamp;
204         started = true;
205       } else if (traceEntry.params["key"] == "EN") {
206         glowEnd = std::stol(traceEntry.params["user_data"]);
207         deviceEnd = entry.engine_timestamp;
208         hostEnd = entry.timestamp;
209       }
210     }
211   }
212   // Sync clocks:
213   if (glowStart > 0 && glowEnd > 0 && hostStart > 0 && hostEnd > 0 &&
214       deviceStart > 0 && deviceEnd > 0) {
215     // Calculate host time function for host time.
216     double hostM =
217         (double)(glowEnd - glowStart) / (double)(hostEnd - hostStart);
218     double deviceM =
219         (double)(glowEnd - glowStart) / (double)(deviceEnd - deviceStart);
220     int64_t hostC = glowStart - hostM * hostStart;
221     int64_t deviceC = glowStart - deviceM * deviceStart;
222     // Update host time.
223     for (NNPITraceEntry &entry : entries_) {
224       entry.hostTime = entry.hostTime * hostM + hostC;
225       entry.engineTime = entry.engineTime * deviceM + deviceC;
226     }
227   } else {
228     LOG(WARNING) << "Failed to synchronize glow and nnpi device traces.";
229   }
230   return true;
231 }
232 
load()233 bool NNPITraceContext::load() {
234   entries_.clear();
235   std::stringstream inputStream;
236 
237   if (!readTraceOutput()) {
238     destroyInternalContext();
239     return false;
240   }
241   destroyInternalContext();
242   return true;
243 }
244 
destroyInternalContext()245 bool NNPITraceContext::destroyInternalContext() {
246   if (capsSession_ == 0) {
247     return false;
248   }
249   nnpimlStatus mlStatus = nnpiIceCapsCloseSession(capsSession_);
250   capsSession_ = 0;
251   if (mlStatus != NNPIML_SUCCESS) {
252     LOG(WARNING) << "nnpi_trace: Failed to stop device trace session, err="
253                  << mlStatus;
254     capsSession_ = 0;
255     return false;
256   }
257 
258   return true;
259 }
260 
createInternalContext(bool swTraces,bool hwTraces,uint32_t softwareBufferSizeMB,uint32_t hardwareBufferSizeMB)261 bool NNPITraceContext::createInternalContext(bool swTraces, bool hwTraces,
262                                              uint32_t softwareBufferSizeMB,
263                                              uint32_t hardwareBufferSizeMB) {
264   if (capsSession_ != 0) {
265     return false;
266   }
267   nnpimlStatus mlStatus = nnpiIceCapsOpenSession(&capsSession_);
268   if (mlStatus != NNPIML_SUCCESS) {
269     LOG(WARNING) << "nnpi_trace: Failed to trace session, err=" << mlStatus;
270     capsSession_ = 0;
271     return false;
272   }
273   devMask_ = 1UL << devID_;
274   if (swTraces) {
275     uint64_t maxBufferSize = softwareBufferSizeMB > 0
276                                  ? softwareBufferSizeMB
277                                  : MAX_SW_TRACE_BUFFER_SIZE_MB;
278     size_t swEventsCount = sizeof(swEventTypes) / sizeof(swEventTypes[0]);
279     size_t idx = 0;
280 
281     IceCapsSwTraceConfig traceConfigs[1 + swEventsCount];
282     traceConfigs[idx].traceOptions.config_type =
283         eIceCapsSwTraceConfigType::ICE_CAPS_SWTRACE_OPTIONS;
284     traceConfigs[idx].traceOptions.device_mask = devMask_;
285     // Software traces engine max_bytes is in bytes while maxBufferSize is in
286     // megabytes.
287     traceConfigs[idx].traceOptions.max_bytes = maxBufferSize * 1024 * 1024;
288     idx++;
289     for (size_t i = 0; i < swEventsCount; i++) {
290       traceConfigs[idx].traceEvent.config_type =
291           eIceCapsSwTraceConfigType::ICE_CAPS_SWTRACE_EVENT;
292       traceConfigs[idx].traceEvent.event = swEventTypes[i];
293       idx++;
294     }
295 
296     IceCapsConfig iceSWCapsConfig;
297     iceSWCapsConfig.engine = eIceCapsEngine::ICE_CAPS_SW_TRACE;
298     iceSWCapsConfig.size = sizeof(traceConfigs);
299     iceSWCapsConfig.buffer = traceConfigs;
300     mlStatus = nnpiIceCapsPrepare(capsSession_, &iceSWCapsConfig);
301     if (mlStatus != NNPIML_SUCCESS) {
302       LOG(WARNING)
303           << "nnpi_trace: Failed to set device Software trace options, err="
304           << mlStatus;
305       destroyInternalContext();
306       return false;
307     }
308   }
309   if (hwTraces) {
310     uint64_t maxBufferSize = hardwareBufferSizeMB > 0
311                                  ? hardwareBufferSizeMB
312                                  : MAX_HW_TRACE_BUFFER_SIZE_MB;
313     IceCapsHwTraceConfig traceConfigs[2];
314     traceConfigs[0].traceOptions.config_type =
315         eIceCapsHwTraceConfigType::ICE_CAPS_HWTRACE_OPTIONS;
316     traceConfigs[0].traceOptions.device_mask = devMask_;
317     // Hardware traces engine max_trace_size is in megabytes.
318     traceConfigs[0].traceOptions.max_trace_size = maxBufferSize;
319     traceConfigs[1].iceFilter.config_type =
320         eIceCapsHwTraceConfigType::ICE_CAPS_HWTRACE_FILTER;
321     traceConfigs[1].iceFilter.ice_mask = 0xFFF; // All ICEs.
322     traceConfigs[1].iceFilter.filter_type =
323         eIceCapsHwTraceFilter::ICE_CAPS_HWTRACE_CAPTURE_ALL;
324 
325     IceCapsConfig iceHWCapsConfig;
326     iceHWCapsConfig.engine = eIceCapsEngine::ICE_CAPS_HW_TRACE;
327     iceHWCapsConfig.size = sizeof(traceConfigs);
328     iceHWCapsConfig.buffer = traceConfigs;
329 
330     mlStatus = nnpiIceCapsPrepare(capsSession_, &iceHWCapsConfig);
331     if (mlStatus != NNPIML_SUCCESS) {
332       LOG(WARNING)
333           << "nnpi_trace: Failed to set device Hardware trace options, err="
334           << mlStatus;
335       destroyInternalContext();
336       return false;
337     }
338   }
339 
340   return true;
341 }
342