1 /**
2  * Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "HabanaDeviceManager.h"
18 
19 #include "glow/Runtime/StatsExporter.h"
20 
21 #include "llvm/Support/CommandLine.h"
22 #include "llvm/Support/FormatVariadic.h"
23 #include "llvm/Support/raw_ostream.h"
24 
25 #include "synapse.h"
26 
27 #include <glog/logging.h>
28 #include <limits>
29 
30 using namespace glow;
31 using namespace glow::runtime;
32 
33 namespace glow {
34 namespace runtime {
35 
36 unsigned GlowHabanaMemory = 7 << 20; // 7 GB.
37 
38 static llvm::cl::opt<unsigned, /* ExternalStorage */ true> GlowHabanaMemoryOpt(
39     "glow-habana-memory",
40     llvm::cl::desc("Amount of DRAM to allocate per Habana device in kilobytes"),
41     llvm::cl::location(GlowHabanaMemory));
42 
createHabanaDeviceManager(const DeviceConfig & config)43 DeviceManager *createHabanaDeviceManager(const DeviceConfig &config) {
44   return new HabanaDeviceManager(config);
45 }
46 } // namespace runtime
47 } // namespace glow
48 
49 // Initialization of static class variables.
50 unsigned HabanaDeviceManager::numActiveDevices_{0};
51 std::mutex HabanaDeviceManager::synapseMtx_;
52 std::atomic<RunIdentifierTy> HabanaDeviceManager::runIdentifier_;
53 
HabanaDeviceManager(const DeviceConfig & config,unsigned numRunners,unsigned numWaiters)54 HabanaDeviceManager::HabanaDeviceManager(const DeviceConfig &config,
55                                          unsigned numRunners,
56                                          unsigned numWaiters)
57     : DeviceManager(config), numRunners_(numRunners), numWaiters_(numWaiters) {}
58 
~HabanaDeviceManager()59 HabanaDeviceManager::~HabanaDeviceManager() {
60   // If a device was never successfully acquired, there's nothing to clean up.
61   if (deviceId_ == INVALID_DEVICE) {
62     return;
63   }
64   std::lock_guard<std::mutex> lock(synapseMtx_);
65   numActiveDevices_--;
66   statsExporterRegistry_->incrementCounter(kDevicesUsedHabana, -1);
67 
68   // Explicitly clear this map to force synFree of the managed IOBuffers to
69   // happen now, before we synReleaseDevice.  Otherwise synReleaseDevice will
70   // free the buffers, and then the destructor will try to do it again.
71   functions_.clear();
72   chk_kill(synReleaseDevice(deviceId_));
73 
74   // If this is the last HabanaDeviceManager to be destroyed, destroy the
75   // Synapse API.
76   if (numActiveDevices_ == 0) {
77     chk_kill(synDestroy());
78   }
79 }
80 
init()81 Error HabanaDeviceManager::init() {
82   std::lock_guard<std::mutex> lock(synapseMtx_);
83 
84   // If this is the first HabanaDeviceManager to be created, initialize the
85   // Synapse API.
86   if (numActiveDevices_ == 0) {
87     LOG(INFO) << "Using version " << synGetVersion();
88     // This environment variable tells Synapse to allow enqueueing tensors that
89     // are smaller than the declared size, which offers a significant savings
90     // in PCI traffic for embedding lookups.
91     setenv("IGNORE_ENQUEUE_SIZE_VALIDATION", "1", /*overwrite*/ 1);
92     chk(synInitialize());
93   }
94 
95   // Acquire a device to work with for the lifetime of this instance.
96   synStatus status = synAcquireDevice(&deviceId_, nullptr);
97   if (status != synSuccess) {
98     RETURN_ERR("Failed to acquire device");
99   }
100 
101   numActiveDevices_++;
102   statsExporterRegistry_->incrementCounter(kDevicesUsedHabana);
103 
104   // Fetch initial memory information.
105   RETURN_IF_ERR(updateMemoryUsage());
106 
107   // Create thread pools for running functions and waiting on function results.
108   runPool_ = glow::make_unique<ThreadPool>(numRunners_);
109   waitPool_ = glow::make_unique<ThreadPool>(numWaiters_);
110 
111   if (!runPool_ || !waitPool_) {
112     RETURN_ERR("Failed to create HabanaDeviceManager thread pools");
113   }
114 
115   return Error::success();
116 }
117 
updateMemoryUsage()118 Error HabanaDeviceManager::updateMemoryUsage() {
119   // TODO: Use synGetMemInfo once implemented.
120 
121   // Use GlowHabanaMemory if it is defined from GFLAGS or llvm params,
122   // otherwise, fall back to what config says.
123   uint64_t defaultMemory = 7 << 20;
124   if (GlowHabanaMemory == defaultMemory && config_.getDeviceMemory() != 0) {
125     totalMemory_ = config_.getDeviceMemory();
126   } else {
127     totalMemory_ = uint64_t{GlowHabanaMemory} * 1024;
128   }
129   freeMemory_ = totalMemory_;
130 
131   // Account for the size used by each function loaded on the card.
132   for (const auto &pr : functions_) {
133     const auto &functionMeta = pr.second;
134     const auto &runtimeBundle = functionMeta.function->getRuntimeBundle();
135     freeMemory_ -= runtimeBundle.getConstantWeightSize();
136     freeMemory_ -= runtimeBundle.getMutableWeightSize();
137   }
138 
139   return Error::success();
140 }
141 
addNetwork(const Module * module,FunctionMapTy functions,ReadyCBTy readyCB)142 void HabanaDeviceManager::addNetwork(const Module *module,
143                                      FunctionMapTy functions,
144                                      ReadyCBTy readyCB) {
145   DCHECK(readyCB != nullptr);
146 
147   std::unique_lock<std::mutex> lk(instanceMtx_);
148   for (const auto &func : functions) {
149     // Check if a function with the same name has already been added.
150     if (functions_.count(func.first) != 0) {
151       lk.unlock();
152       readyCB(module,
153               MAKE_ERR(strFormat(
154                   "Failed to add network: already have a function called %s",
155                   func.first.c_str())));
156       return;
157     }
158 
159     uint64_t topologyId = 0;
160     HabanaFunction *habanaFunction = static_cast<HabanaFunction *>(func.second);
161 
162     // Load the recipe (created during compilation) and store the resultant
163     // topology ID. This is the reference that will be used lated to "activate"
164     // this function and make it executable.
165     synStatus status = synFail;
166 
167     {
168       std::lock_guard<std::mutex> lock(synapseMtx_);
169       status = synLoadRecipe(deviceId_, habanaFunction->getRecipeName().c_str(),
170                              &topologyId);
171     }
172 
173     if (auto err = chk_make_err(status)) {
174       LOG(ERROR) << "Unable to load recipe " << habanaFunction->getRecipeName()
175                  << " for function " << func.first << ".";
176       // TODO: Unload functions that were loaded successfully.
177       lk.unlock();
178       readyCB(module, std::move(err));
179       return;
180     }
181 
182     // Insert the function into functions_.
183     bool inserted = false;
184     std::tie(std::ignore, inserted) = functions_.insert(std::make_pair(
185         func.first,
186         HabanaFunctionMeta{topologyId, habanaFunction,
187                            glow::make_unique<HabanaIOBufferPool>(
188                                deviceId_, habanaFunction->getInputs(),
189                                habanaFunction->getOutputs())}));
190 
191     if (!inserted) {
192       // TODO: Unload functions that were loaded successfully.
193       lk.unlock();
194       readyCB(module, MAKE_ERR(strFormat(
195                           "Unable to add function %s to HabanaDeviceManager",
196                           func.first.c_str())));
197       return;
198     }
199 
200     // Optimistically activate the topology if nothing else is loaded.
201     cv_.wait(lk, [this] { return inflightRequests_ == 0; });
202     if (auto err = chk_make_err(synActivateTopology(deviceId_, topologyId))) {
203       lk.unlock();
204       readyCB(module, std::move(err));
205       return;
206     }
207     activeTopo_ = topologyId;
208   }
209 
210   lk.unlock();
211 
212   // Update memory information after loading all the functions.
213   if (auto err = updateMemoryUsage()) {
214     readyCB(module, std::move(err));
215     return;
216   }
217 
218   readyCB(module, Error::success());
219 }
220 
evictNetwork(std::string functionName,EvictFunctionCBTy evictCB)221 void HabanaDeviceManager::evictNetwork(std::string functionName,
222                                        EvictFunctionCBTy evictCB) {
223   DCHECK(evictCB != nullptr);
224 
225   std::unique_lock<std::mutex> lk(instanceMtx_);
226 
227   // Check if a network with the given name exists on the device.
228   if (functions_.count(functionName) == 0) {
229     lk.unlock();
230     evictCB(functionName,
231             MAKE_ERR(strFormat(
232                 "Failed to evict network: function called %s was not added",
233                 functionName.c_str())));
234     return;
235   }
236 
237   // Unload the topology ID corresponding to the function.
238   synStatus status = synFail;
239   uint64_t topologyId = functions_[functionName].topologyId;
240 
241   {
242     std::lock_guard<std::mutex> lock(synapseMtx_);
243     status = synUnloadTopology(deviceId_, topologyId);
244     if (topologyId == activeTopo_) {
245       activeTopo_ = INVALID_TOPOLOGY;
246     }
247   }
248 
249   if (auto err = chk_make_err(status)) {
250     LOG(ERROR) << "Unable to unload function " << functionName;
251     lk.unlock();
252     evictCB(functionName, std::move(err));
253     return;
254   }
255 
256   // Erase the function from the functions_ map.
257   auto numErased = functions_.erase(functionName);
258 
259   if (numErased == 0) {
260     lk.unlock();
261     evictCB(functionName,
262             MAKE_ERR(strFormat(
263                 "Unable to evict function %s from HabanaDeviceManager",
264                 functionName.c_str())));
265     return;
266   }
267 
268   lk.unlock();
269 
270   // Update memory information after evicting the function.
271   if (auto err = updateMemoryUsage()) {
272     evictCB(functionName, std::move(err));
273     return;
274   }
275 
276   evictCB(functionName, Error::success());
277 }
278 
runFunctionImpl(RunIdentifierTy runId,std::string functionName,std::unique_ptr<ExecutionContext> ctx,runtime::ResultCBTy resultCB)279 void HabanaDeviceManager::runFunctionImpl(RunIdentifierTy runId,
280                                           std::string functionName,
281                                           std::unique_ptr<ExecutionContext> ctx,
282                                           runtime::ResultCBTy resultCB) {
283   DCHECK(resultCB != nullptr);
284 
285   TRACE_EVENT_SCOPE_NAMED(ctx->getTraceContext(), TraceLevel::RUNTIME,
286                           "HabanaDM::runnerThread", trEvent);
287 
288   /// Habana DeviceManager doesn't support Device Resident Tensors.
289   ctx->getPlaceholderBindings()->ensureOnHost();
290 
291   if (ctx->getTraceContext()) {
292     ctx->getTraceContext()->setThreadName(
293         llvm::formatv("Habana {0} (enqueue)", deviceId_).str());
294   }
295   // Try to find the function with the given name in functions_.
296   uint64_t topologyId;
297   HabanaFunction *function;
298   HabanaIOBufferPool *ioBufferPool;
299   {
300     std::lock_guard<std::mutex> lock(instanceMtx_);
301     auto it = functions_.find(functionName);
302     if (it == functions_.end()) {
303       resultCB(runId,
304                MAKE_ERR(strFormat(
305                    "Failed to run function: function called %s was not added",
306                    functionName.c_str())),
307                std::move(ctx));
308       return;
309     }
310 
311     topologyId = (it->second).topologyId;
312     function = (it->second).function;
313     ioBufferPool = (it->second).ioBufferPool.get();
314   }
315 
316   // If we need to switch topos, wait to drain the queue.
317   {
318     std::unique_lock<std::mutex> lock(instanceMtx_);
319     if (topologyId != activeTopo_) {
320       // FIXME: This can starve inactive topos.
321       cv_.wait(lock, [this] { return inflightRequests_ == 0; });
322       const auto activateTopoRes = synActivateTopology(deviceId_, topologyId);
323       if (auto err = chk_make_err(activateTopoRes)) {
324         LOG(ERROR) << "synActivateTopology failed with status "
325                    << activateTopoRes;
326         trEvent.addArg(
327             "error", llvm::formatv("synActivateTopology failed with status {0}",
328                                    activateTopoRes)
329                          .str());
330         TRACE_EVENT_SCOPE_END_NAMED(trEvent);
331         resultCB(runId, std::move(err), std::move(ctx));
332         return;
333       }
334       activeTopo_ = topologyId;
335     }
336     inflightRequests_++;
337   }
338 
339   // Execute the function.
340   auto deviceBindings =
341       glow::make_unique<HabanaBindings>(deviceId_, topologyId);
342   deviceBindings->setIOBuffer(ioBufferPool->get());
343   ctx->setDeviceBindings(std::move(deviceBindings));
344 
345   auto executeErr = function->execute(ctx.get());
346   if (executeErr) {
347     trEvent.addArg("error", "execute() failed");
348     TRACE_EVENT_SCOPE_END_NAMED(trEvent);
349     resultCB(runId, std::move(executeErr), std::move(ctx));
350     return;
351   }
352 
353   // Give the handle to the wait thread pool to wait on and call the callback
354   // for.
355   waitPool_->submit([this, runId, function, ioBufferPool,
356                      functionName = std::move(functionName),
357                      ctx = std::move(ctx),
358                      resultCB = std::move(resultCB)]() mutable {
359     DCHECK(resultCB != nullptr);
360 
361     TRACE_EVENT_SCOPE(ctx->getTraceContext(), TraceLevel::RUNTIME,
362                       "HabanaDM::waiterThread");
363     if (ctx->getTraceContext()) {
364       ctx->getTraceContext()->setThreadName(
365           llvm::formatv("Habana {0} (waiter)", deviceId_).str());
366     }
367 
368     TRACE_EVENT_BEGIN(ctx->getTraceContext(), TraceLevel::RUNTIME, "wait");
369     auto &habanaHandle =
370         static_cast<HabanaBindings *>(ctx->getDeviceBindings())->getHandle();
371     bool ok = habanaHandle.wait();
372     std::unique_ptr<HabanaIOBuffer> ioBuffer =
373         static_cast<HabanaBindings *>(ctx->getDeviceBindings())->getIOBuffer();
374     TRACE_EVENT_END(ctx->getTraceContext(), TraceLevel::RUNTIME, "wait");
375 
376     // Notify anything waiting for a topo switch.
377     {
378       std::lock_guard<std::mutex> lock(this->instanceMtx_);
379       inflightRequests_--;
380     }
381     cv_.notify_one();
382 
383     if (!ok) {
384       // Return the IO buffer to the IO buffer pool.
385       ioBufferPool->put(std::move(ioBuffer));
386 
387       resultCB(runId,
388                MAKE_ERR(strFormat("Failed to execute function %s",
389                                   functionName.c_str())),
390                std::move(ctx));
391     } else {
392       // Copy the execution outputs from the designated IO buffer back to the
393       // PlaceholderBindings inside ctx.
394       TRACE_EVENT_SCOPE_NAMED(ctx->getTraceContext(), TraceLevel::RUNTIME,
395                               "copyOutputs", coEvent);
396       auto bindings = ctx->getPlaceholderBindings();
397       size_t tensors{0}, bytes{0};
398       for (const auto &ph : function->getOutputs()) {
399         auto *tensor = bindings->get(ph);
400         if (!tensor) {
401           tensor =
402               bindings->get(bindings->getPlaceholderByNameSlow(ph->getName()));
403         }
404         tensors++;
405 
406         if (auto ioBufferDataOrErr = ioBuffer->get(ph)) {
407           memcpy(tensor->getUnsafePtr(), *ioBufferDataOrErr,
408                  ph->getType()->getSizeInBytes());
409           bytes += ph->getType()->getSizeInBytes();
410         } else {
411           // Return the IO buffer to the IO buffer pool.
412           ioBufferPool->put(std::move(ioBuffer));
413           coEvent.addArg("tensors", std::to_string(tensors));
414           coEvent.addArg("bytes", std::to_string(bytes));
415           coEvent.addArg("missingTensor", ph->getName().str());
416           TRACE_EVENT_SCOPE_END_NAMED(coEvent);
417           resultCB(runId, ioBufferDataOrErr.takeError(), std::move(ctx));
418           return;
419         }
420       }
421       coEvent.addArg("tensors", std::to_string(tensors));
422       coEvent.addArg("bytes", std::to_string(bytes));
423       TRACE_EVENT_SCOPE_END_NAMED(coEvent);
424 
425       // Return the IO buffer to the IO buffer pool.
426       ioBufferPool->put(std::move(ioBuffer));
427       resultCB(runId, Error::success(), std::move(ctx));
428     }
429   });
430 }
431 
432 RunIdentifierTy
runFunction(std::string functionName,std::unique_ptr<ExecutionContext> ctx,runtime::ResultCBTy resultCB)433 HabanaDeviceManager::runFunction(std::string functionName,
434                                  std::unique_ptr<ExecutionContext> ctx,
435                                  runtime::ResultCBTy resultCB) {
436   DCHECK(resultCB != nullptr);
437 
438   RunIdentifierTy runId = runIdentifier_++;
439   runPool_->submit([this, runId, functionName = std::move(functionName),
440                     ctx = std::move(ctx),
441                     resultCB = std::move(resultCB)]() mutable {
442     runFunctionImpl(runId, std::move(functionName), std::move(ctx),
443                     std::move(resultCB));
444   });
445   return runId;
446 }
447 
stop(bool block)448 Error HabanaDeviceManager::stop(bool block) {
449   runPool_->stop(block);
450   waitPool_->stop(block);
451   return Error::success();
452 }
453 
getMaximumMemory() const454 uint64_t HabanaDeviceManager::getMaximumMemory() const { return totalMemory_; }
455 
getAvailableMemory() const456 uint64_t HabanaDeviceManager::getAvailableMemory() const { return freeMemory_; }
457 
isMemoryAvailable(uint64_t estimate) const458 bool HabanaDeviceManager::isMemoryAvailable(uint64_t estimate) const {
459   return estimate <= freeMemory_;
460 }
461 
getDeviceInfo() const462 DeviceInfo HabanaDeviceManager::getDeviceInfo() const {
463   DeviceInfo info = DeviceInfo();
464   info.sramCapacity = 50 * 1024 * 1024;
465   info.peakCompute = 0.45 * 1024 * 1024 * 1024 * 1024;
466   info.peakDramBw = 30.0 * 1024 * 1024 * 1024;
467   info.peakSramBw = 1024.0 * 1024 * 1024 * 1024;
468   info.peakPCIeBw = 16.0 * 1024 * 1024 * 1024;
469   return info;
470 }
471