1 //===--------- device.cpp - Target independent OpenMP target RTL ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Functionality for managing devices that are handled by RTL plugins.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "device.h"
14 #include "private.h"
15 #include "rtl.h"
16 
17 #include <cassert>
18 #include <climits>
19 #include <cstdio>
20 #include <string>
21 
DeviceTy(const DeviceTy & D)22 DeviceTy::DeviceTy(const DeviceTy &D)
23     : DeviceID(D.DeviceID), RTL(D.RTL), RTLDeviceID(D.RTLDeviceID),
24       IsInit(D.IsInit), InitFlag(), HasPendingGlobals(D.HasPendingGlobals),
25       HostDataToTargetMap(D.HostDataToTargetMap),
26       PendingCtorsDtors(D.PendingCtorsDtors), ShadowPtrMap(D.ShadowPtrMap),
27       DataMapMtx(), PendingGlobalsMtx(), ShadowMtx(),
28       LoopTripCnt(D.LoopTripCnt) {}
29 
operator =(const DeviceTy & D)30 DeviceTy &DeviceTy::operator=(const DeviceTy &D) {
31   DeviceID = D.DeviceID;
32   RTL = D.RTL;
33   RTLDeviceID = D.RTLDeviceID;
34   IsInit = D.IsInit;
35   HasPendingGlobals = D.HasPendingGlobals;
36   HostDataToTargetMap = D.HostDataToTargetMap;
37   PendingCtorsDtors = D.PendingCtorsDtors;
38   ShadowPtrMap = D.ShadowPtrMap;
39   LoopTripCnt = D.LoopTripCnt;
40 
41   return *this;
42 }
43 
DeviceTy(RTLInfoTy * RTL)44 DeviceTy::DeviceTy(RTLInfoTy *RTL)
45     : DeviceID(-1), RTL(RTL), RTLDeviceID(-1), IsInit(false), InitFlag(),
46       HasPendingGlobals(false), HostDataToTargetMap(), PendingCtorsDtors(),
47       ShadowPtrMap(), DataMapMtx(), PendingGlobalsMtx(), ShadowMtx() {}
48 
~DeviceTy()49 DeviceTy::~DeviceTy() {
50   if (DeviceID == -1 || !(getInfoLevel() & OMP_INFOTYPE_DUMP_TABLE))
51     return;
52 
53   ident_t loc = {0, 0, 0, 0, ";libomptarget;libomptarget;0;0;;"};
54   dumpTargetPointerMappings(&loc, *this);
55 }
56 
associatePtr(void * HstPtrBegin,void * TgtPtrBegin,int64_t Size)57 int DeviceTy::associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size) {
58   DataMapMtx.lock();
59 
60   // Check if entry exists
61   auto search = HostDataToTargetMap.find(HstPtrBeginTy{(uintptr_t)HstPtrBegin});
62   if (search != HostDataToTargetMap.end()) {
63     // Mapping already exists
64     bool isValid = search->HstPtrEnd == (uintptr_t)HstPtrBegin + Size &&
65                    search->TgtPtrBegin == (uintptr_t)TgtPtrBegin;
66     DataMapMtx.unlock();
67     if (isValid) {
68       DP("Attempt to re-associate the same device ptr+offset with the same "
69          "host ptr, nothing to do\n");
70       return OFFLOAD_SUCCESS;
71     } else {
72       REPORT("Not allowed to re-associate a different device ptr+offset with "
73              "the same host ptr\n");
74       return OFFLOAD_FAIL;
75     }
76   }
77 
78   // Mapping does not exist, allocate it with refCount=INF
79   const HostDataToTargetTy &newEntry =
80       *HostDataToTargetMap
81            .emplace(
82                /*HstPtrBase=*/(uintptr_t)HstPtrBegin,
83                /*HstPtrBegin=*/(uintptr_t)HstPtrBegin,
84                /*HstPtrEnd=*/(uintptr_t)HstPtrBegin + Size,
85                /*TgtPtrBegin=*/(uintptr_t)TgtPtrBegin, /*Name=*/nullptr,
86                /*IsRefCountINF=*/true)
87            .first;
88   DP("Creating new map entry: HstBase=" DPxMOD ", HstBegin=" DPxMOD
89      ", HstEnd=" DPxMOD ", TgtBegin=" DPxMOD ", RefCount=%s\n",
90      DPxPTR(newEntry.HstPtrBase), DPxPTR(newEntry.HstPtrBegin),
91      DPxPTR(newEntry.HstPtrEnd), DPxPTR(newEntry.TgtPtrBegin),
92      newEntry.refCountToStr().c_str());
93   (void)newEntry;
94 
95   DataMapMtx.unlock();
96 
97   return OFFLOAD_SUCCESS;
98 }
99 
disassociatePtr(void * HstPtrBegin)100 int DeviceTy::disassociatePtr(void *HstPtrBegin) {
101   DataMapMtx.lock();
102 
103   auto search = HostDataToTargetMap.find(HstPtrBeginTy{(uintptr_t)HstPtrBegin});
104   if (search != HostDataToTargetMap.end()) {
105     // Mapping exists
106     if (search->isRefCountInf()) {
107       DP("Association found, removing it\n");
108       HostDataToTargetMap.erase(search);
109       DataMapMtx.unlock();
110       return OFFLOAD_SUCCESS;
111     } else {
112       REPORT("Trying to disassociate a pointer which was not mapped via "
113              "omp_target_associate_ptr\n");
114     }
115   }
116 
117   // Mapping not found
118   DataMapMtx.unlock();
119   REPORT("Association not found\n");
120   return OFFLOAD_FAIL;
121 }
122 
lookupMapping(void * HstPtrBegin,int64_t Size)123 LookupResult DeviceTy::lookupMapping(void *HstPtrBegin, int64_t Size) {
124   uintptr_t hp = (uintptr_t)HstPtrBegin;
125   LookupResult lr;
126 
127   DP("Looking up mapping(HstPtrBegin=" DPxMOD ", Size=%" PRId64 ")...\n",
128      DPxPTR(hp), Size);
129 
130   if (HostDataToTargetMap.empty())
131     return lr;
132 
133   auto upper = HostDataToTargetMap.upper_bound(hp);
134   // check the left bin
135   if (upper != HostDataToTargetMap.begin()) {
136     lr.Entry = std::prev(upper);
137     auto &HT = *lr.Entry;
138     // Is it contained?
139     lr.Flags.IsContained = hp >= HT.HstPtrBegin && hp < HT.HstPtrEnd &&
140                            (hp + Size) <= HT.HstPtrEnd;
141     // Does it extend beyond the mapped region?
142     lr.Flags.ExtendsAfter = hp < HT.HstPtrEnd && (hp + Size) > HT.HstPtrEnd;
143   }
144 
145   // check the right bin
146   if (!(lr.Flags.IsContained || lr.Flags.ExtendsAfter) &&
147       upper != HostDataToTargetMap.end()) {
148     lr.Entry = upper;
149     auto &HT = *lr.Entry;
150     // Does it extend into an already mapped region?
151     lr.Flags.ExtendsBefore =
152         hp < HT.HstPtrBegin && (hp + Size) > HT.HstPtrBegin;
153     // Does it extend beyond the mapped region?
154     lr.Flags.ExtendsAfter = hp < HT.HstPtrEnd && (hp + Size) > HT.HstPtrEnd;
155   }
156 
157   if (lr.Flags.ExtendsBefore) {
158     DP("WARNING: Pointer is not mapped but section extends into already "
159        "mapped data\n");
160   }
161   if (lr.Flags.ExtendsAfter) {
162     DP("WARNING: Pointer is already mapped but section extends beyond mapped "
163        "region\n");
164   }
165 
166   return lr;
167 }
168 
169 TargetPointerResultTy
getTargetPointer(void * HstPtrBegin,void * HstPtrBase,int64_t Size,map_var_info_t HstPtrName,MoveDataStateTy MoveData,bool IsImplicit,bool UpdateRefCount,bool HasCloseModifier,bool HasPresentModifier,AsyncInfoTy & AsyncInfo)170 DeviceTy::getTargetPointer(void *HstPtrBegin, void *HstPtrBase, int64_t Size,
171                            map_var_info_t HstPtrName, MoveDataStateTy MoveData,
172                            bool IsImplicit, bool UpdateRefCount,
173                            bool HasCloseModifier, bool HasPresentModifier,
174                            AsyncInfoTy &AsyncInfo) {
175   void *TargetPointer = nullptr;
176   bool IsHostPtr = false;
177   bool IsNew = false;
178 
179   DataMapMtx.lock();
180 
181   LookupResult LR = lookupMapping(HstPtrBegin, Size);
182   auto Entry = LR.Entry;
183 
184   // Check if the pointer is contained.
185   // If a variable is mapped to the device manually by the user - which would
186   // lead to the IsContained flag to be true - then we must ensure that the
187   // device address is returned even under unified memory conditions.
188   if (LR.Flags.IsContained ||
189       ((LR.Flags.ExtendsBefore || LR.Flags.ExtendsAfter) && IsImplicit)) {
190     auto &HT = *LR.Entry;
191     assert(HT.getRefCount() > 0 && "expected existing RefCount > 0");
192     if (UpdateRefCount)
193       // After this, RefCount > 1.
194       HT.incRefCount();
195     else
196       // It might have been allocated with the parent, but it's still new.
197       IsNew = HT.getRefCount() == 1;
198     uintptr_t Ptr = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin);
199     INFO(OMP_INFOTYPE_MAPPING_EXISTS, DeviceID,
200          "Mapping exists%s with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD
201          ", "
202          "Size=%" PRId64 ", RefCount=%s (%s), Name=%s\n",
203          (IsImplicit ? " (implicit)" : ""), DPxPTR(HstPtrBegin), DPxPTR(Ptr),
204          Size, HT.refCountToStr().c_str(),
205          UpdateRefCount ? "incremented" : "update suppressed",
206          (HstPtrName) ? getNameFromMapping(HstPtrName).c_str() : "unknown");
207     TargetPointer = (void *)Ptr;
208   } else if ((LR.Flags.ExtendsBefore || LR.Flags.ExtendsAfter) && !IsImplicit) {
209     // Explicit extension of mapped data - not allowed.
210     MESSAGE("explicit extension not allowed: host address specified is " DPxMOD
211             " (%" PRId64
212             " bytes), but device allocation maps to host at " DPxMOD
213             " (%" PRId64 " bytes)",
214             DPxPTR(HstPtrBegin), Size, DPxPTR(Entry->HstPtrBegin),
215             Entry->HstPtrEnd - Entry->HstPtrBegin);
216     if (HasPresentModifier)
217       MESSAGE("device mapping required by 'present' map type modifier does not "
218               "exist for host address " DPxMOD " (%" PRId64 " bytes)",
219               DPxPTR(HstPtrBegin), Size);
220   } else if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
221              !HasCloseModifier) {
222     // If unified shared memory is active, implicitly mapped variables that are
223     // not privatized use host address. Any explicitly mapped variables also use
224     // host address where correctness is not impeded. In all other cases maps
225     // are respected.
226     // In addition to the mapping rules above, the close map modifier forces the
227     // mapping of the variable to the device.
228     if (Size) {
229       DP("Return HstPtrBegin " DPxMOD " Size=%" PRId64 " for unified shared "
230          "memory\n",
231          DPxPTR((uintptr_t)HstPtrBegin), Size);
232       IsHostPtr = true;
233       TargetPointer = HstPtrBegin;
234     }
235   } else if (HasPresentModifier) {
236     DP("Mapping required by 'present' map type modifier does not exist for "
237        "HstPtrBegin=" DPxMOD ", Size=%" PRId64 "\n",
238        DPxPTR(HstPtrBegin), Size);
239     MESSAGE("device mapping required by 'present' map type modifier does not "
240             "exist for host address " DPxMOD " (%" PRId64 " bytes)",
241             DPxPTR(HstPtrBegin), Size);
242   } else if (Size) {
243     // If it is not contained and Size > 0, we should create a new entry for it.
244     IsNew = true;
245     uintptr_t Ptr = (uintptr_t)allocData(Size, HstPtrBegin);
246     Entry = HostDataToTargetMap
247                 .emplace((uintptr_t)HstPtrBase, (uintptr_t)HstPtrBegin,
248                          (uintptr_t)HstPtrBegin + Size, Ptr, HstPtrName)
249                 .first;
250     INFO(OMP_INFOTYPE_MAPPING_CHANGED, DeviceID,
251          "Creating new map entry with "
252          "HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", Size=%ld, "
253          "RefCount=%s, Name=%s\n",
254          DPxPTR(HstPtrBegin), DPxPTR(Ptr), Size, Entry->refCountToStr().c_str(),
255          (HstPtrName) ? getNameFromMapping(HstPtrName).c_str() : "unknown");
256     TargetPointer = (void *)Ptr;
257   }
258 
259   if (IsNew && MoveData == MoveDataStateTy::UNKNOWN)
260     MoveData = MoveDataStateTy::REQUIRED;
261 
262   // If the target pointer is valid, and we need to transfer data, issue the
263   // data transfer.
264   if (TargetPointer && (MoveData == MoveDataStateTy::REQUIRED)) {
265     // Lock the entry before releasing the mapping table lock such that another
266     // thread that could issue data movement will get the right result.
267     Entry->lock();
268     // Release the mapping table lock right after the entry is locked.
269     DataMapMtx.unlock();
270 
271     DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n", Size,
272        DPxPTR(HstPtrBegin), DPxPTR(TargetPointer));
273 
274     int Ret = submitData(TargetPointer, HstPtrBegin, Size, AsyncInfo);
275 
276     // Unlock the entry immediately after the data movement is issued.
277     Entry->unlock();
278 
279     if (Ret != OFFLOAD_SUCCESS) {
280       REPORT("Copying data to device failed.\n");
281       // We will also return nullptr if the data movement fails because that
282       // pointer points to a corrupted memory region so it doesn't make any
283       // sense to continue to use it.
284       TargetPointer = nullptr;
285     }
286   } else {
287     // Release the mapping table lock directly.
288     DataMapMtx.unlock();
289   }
290 
291   return {{IsNew, IsHostPtr}, Entry, TargetPointer};
292 }
293 
294 // Used by targetDataBegin, targetDataEnd, targetDataUpdate and target.
295 // Return the target pointer begin (where the data will be moved).
296 // Decrement the reference counter if called from targetDataEnd.
getTgtPtrBegin(void * HstPtrBegin,int64_t Size,bool & IsLast,bool UpdateRefCount,bool & IsHostPtr,bool MustContain,bool ForceDelete)297 void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
298                                bool UpdateRefCount, bool &IsHostPtr,
299                                bool MustContain, bool ForceDelete) {
300   void *rc = NULL;
301   IsHostPtr = false;
302   IsLast = false;
303   DataMapMtx.lock();
304   LookupResult lr = lookupMapping(HstPtrBegin, Size);
305 
306   if (lr.Flags.IsContained ||
307       (!MustContain && (lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter))) {
308     auto &HT = *lr.Entry;
309     // We do not decrement the reference count to zero here.  deallocTgtPtr does
310     // that atomically with removing the mapping.  Otherwise, before this thread
311     // removed the mapping in deallocTgtPtr, another thread could retrieve the
312     // mapping, increment and decrement back to zero, and then both threads
313     // would try to remove the mapping, resulting in a double free.
314     IsLast = HT.decShouldRemove(ForceDelete);
315     const char *RefCountAction;
316     if (!UpdateRefCount) {
317       RefCountAction = "update suppressed";
318     } else if (ForceDelete) {
319       HT.resetRefCount();
320       assert(IsLast == HT.decShouldRemove() &&
321              "expected correct IsLast prediction for reset");
322       if (IsLast)
323         RefCountAction = "reset, deferred final decrement";
324       else
325         RefCountAction = "reset";
326     } else if (IsLast) {
327       RefCountAction = "deferred final decrement";
328     } else {
329       RefCountAction = "decremented";
330       HT.decRefCount();
331     }
332     uintptr_t tp = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin);
333     INFO(OMP_INFOTYPE_MAPPING_EXISTS, DeviceID,
334          "Mapping exists with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", "
335          "Size=%" PRId64 ", RefCount=%s (%s)\n",
336          DPxPTR(HstPtrBegin), DPxPTR(tp), Size, HT.refCountToStr().c_str(),
337          RefCountAction);
338     rc = (void *)tp;
339   } else if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) {
340     // If the value isn't found in the mapping and unified shared memory
341     // is on then it means we have stumbled upon a value which we need to
342     // use directly from the host.
343     DP("Get HstPtrBegin " DPxMOD " Size=%" PRId64 " for unified shared "
344        "memory\n",
345        DPxPTR((uintptr_t)HstPtrBegin), Size);
346     IsHostPtr = true;
347     rc = HstPtrBegin;
348   }
349 
350   DataMapMtx.unlock();
351   return rc;
352 }
353 
354 // Return the target pointer begin (where the data will be moved).
355 // Lock-free version called when loading global symbols from the fat binary.
getTgtPtrBegin(void * HstPtrBegin,int64_t Size)356 void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size) {
357   uintptr_t hp = (uintptr_t)HstPtrBegin;
358   LookupResult lr = lookupMapping(HstPtrBegin, Size);
359   if (lr.Flags.IsContained || lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) {
360     auto &HT = *lr.Entry;
361     uintptr_t tp = HT.TgtPtrBegin + (hp - HT.HstPtrBegin);
362     return (void *)tp;
363   }
364 
365   return NULL;
366 }
367 
deallocTgtPtr(void * HstPtrBegin,int64_t Size,bool HasCloseModifier)368 int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size,
369                             bool HasCloseModifier) {
370   if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
371       !HasCloseModifier)
372     return OFFLOAD_SUCCESS;
373   // Check if the pointer is contained in any sub-nodes.
374   int rc;
375   DataMapMtx.lock();
376   LookupResult lr = lookupMapping(HstPtrBegin, Size);
377   if (lr.Flags.IsContained || lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) {
378     auto &HT = *lr.Entry;
379     if (HT.decRefCount() == 0) {
380       DP("Deleting tgt data " DPxMOD " of size %" PRId64 "\n",
381          DPxPTR(HT.TgtPtrBegin), Size);
382       deleteData((void *)HT.TgtPtrBegin);
383       INFO(OMP_INFOTYPE_MAPPING_CHANGED, DeviceID,
384            "Removing map entry with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD
385            ", Size=%" PRId64 ", Name=%s\n",
386            DPxPTR(HT.HstPtrBegin), DPxPTR(HT.TgtPtrBegin), Size,
387            (HT.HstPtrName) ? getNameFromMapping(HT.HstPtrName).c_str()
388                            : "unknown");
389       HostDataToTargetMap.erase(lr.Entry);
390     }
391     rc = OFFLOAD_SUCCESS;
392   } else {
393     REPORT("Section to delete (hst addr " DPxMOD ") does not exist in the"
394            " allocated memory\n",
395            DPxPTR(HstPtrBegin));
396     rc = OFFLOAD_FAIL;
397   }
398 
399   DataMapMtx.unlock();
400   return rc;
401 }
402 
403 /// Init device, should not be called directly.
init()404 void DeviceTy::init() {
405   // Make call to init_requires if it exists for this plugin.
406   if (RTL->init_requires)
407     RTL->init_requires(PM->RTLs.RequiresFlags);
408   int32_t Ret = RTL->init_device(RTLDeviceID);
409   if (Ret != OFFLOAD_SUCCESS)
410     return;
411 
412   IsInit = true;
413 }
414 
415 /// Thread-safe method to initialize the device only once.
initOnce()416 int32_t DeviceTy::initOnce() {
417   std::call_once(InitFlag, &DeviceTy::init, this);
418 
419   // At this point, if IsInit is true, then either this thread or some other
420   // thread in the past successfully initialized the device, so we can return
421   // OFFLOAD_SUCCESS. If this thread executed init() via call_once() and it
422   // failed, return OFFLOAD_FAIL. If call_once did not invoke init(), it means
423   // that some other thread already attempted to execute init() and if IsInit
424   // is still false, return OFFLOAD_FAIL.
425   if (IsInit)
426     return OFFLOAD_SUCCESS;
427   else
428     return OFFLOAD_FAIL;
429 }
430 
431 // Load binary to device.
load_binary(void * Img)432 __tgt_target_table *DeviceTy::load_binary(void *Img) {
433   RTL->Mtx.lock();
434   __tgt_target_table *rc = RTL->load_binary(RTLDeviceID, Img);
435   RTL->Mtx.unlock();
436   return rc;
437 }
438 
allocData(int64_t Size,void * HstPtr,int32_t Kind)439 void *DeviceTy::allocData(int64_t Size, void *HstPtr, int32_t Kind) {
440   return RTL->data_alloc(RTLDeviceID, Size, HstPtr, Kind);
441 }
442 
deleteData(void * TgtPtrBegin)443 int32_t DeviceTy::deleteData(void *TgtPtrBegin) {
444   return RTL->data_delete(RTLDeviceID, TgtPtrBegin);
445 }
446 
447 // Submit data to device
submitData(void * TgtPtrBegin,void * HstPtrBegin,int64_t Size,AsyncInfoTy & AsyncInfo)448 int32_t DeviceTy::submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size,
449                              AsyncInfoTy &AsyncInfo) {
450   if (getInfoLevel() & OMP_INFOTYPE_DATA_TRANSFER) {
451     LookupResult LR = lookupMapping(HstPtrBegin, Size);
452     auto *HT = &*LR.Entry;
453 
454     INFO(OMP_INFOTYPE_DATA_TRANSFER, DeviceID,
455          "Copying data from host to device, HstPtr=" DPxMOD ", TgtPtr=" DPxMOD
456          ", Size=%" PRId64 ", Name=%s\n",
457          DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin), Size,
458          (HT && HT->HstPtrName) ? getNameFromMapping(HT->HstPtrName).c_str()
459                                 : "unknown");
460   }
461 
462   if (!AsyncInfo || !RTL->data_submit_async || !RTL->synchronize)
463     return RTL->data_submit(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size);
464   else
465     return RTL->data_submit_async(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size,
466                                   AsyncInfo);
467 }
468 
469 // Retrieve data from device
retrieveData(void * HstPtrBegin,void * TgtPtrBegin,int64_t Size,AsyncInfoTy & AsyncInfo)470 int32_t DeviceTy::retrieveData(void *HstPtrBegin, void *TgtPtrBegin,
471                                int64_t Size, AsyncInfoTy &AsyncInfo) {
472   if (getInfoLevel() & OMP_INFOTYPE_DATA_TRANSFER) {
473     LookupResult LR = lookupMapping(HstPtrBegin, Size);
474     auto *HT = &*LR.Entry;
475     INFO(OMP_INFOTYPE_DATA_TRANSFER, DeviceID,
476          "Copying data from device to host, TgtPtr=" DPxMOD ", HstPtr=" DPxMOD
477          ", Size=%" PRId64 ", Name=%s\n",
478          DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin), Size,
479          (HT && HT->HstPtrName) ? getNameFromMapping(HT->HstPtrName).c_str()
480                                 : "unknown");
481   }
482 
483   if (!RTL->data_retrieve_async || !RTL->synchronize)
484     return RTL->data_retrieve(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size);
485   else
486     return RTL->data_retrieve_async(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size,
487                                     AsyncInfo);
488 }
489 
490 // Copy data from current device to destination device directly
dataExchange(void * SrcPtr,DeviceTy & DstDev,void * DstPtr,int64_t Size,AsyncInfoTy & AsyncInfo)491 int32_t DeviceTy::dataExchange(void *SrcPtr, DeviceTy &DstDev, void *DstPtr,
492                                int64_t Size, AsyncInfoTy &AsyncInfo) {
493   if (!AsyncInfo || !RTL->data_exchange_async || !RTL->synchronize) {
494     assert(RTL->data_exchange && "RTL->data_exchange is nullptr");
495     return RTL->data_exchange(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, DstPtr,
496                               Size);
497   } else
498     return RTL->data_exchange_async(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID,
499                                     DstPtr, Size, AsyncInfo);
500 }
501 
502 // Run region on device
runRegion(void * TgtEntryPtr,void ** TgtVarsPtr,ptrdiff_t * TgtOffsets,int32_t TgtVarsSize,AsyncInfoTy & AsyncInfo)503 int32_t DeviceTy::runRegion(void *TgtEntryPtr, void **TgtVarsPtr,
504                             ptrdiff_t *TgtOffsets, int32_t TgtVarsSize,
505                             AsyncInfoTy &AsyncInfo) {
506   if (!RTL->run_region || !RTL->synchronize)
507     return RTL->run_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets,
508                            TgtVarsSize);
509   else
510     return RTL->run_region_async(RTLDeviceID, TgtEntryPtr, TgtVarsPtr,
511                                  TgtOffsets, TgtVarsSize, AsyncInfo);
512 }
513 
514 // Run region on device
printDeviceInfo(int32_t RTLDevId)515 bool DeviceTy::printDeviceInfo(int32_t RTLDevId) {
516   if (!RTL->print_device_info)
517     return false;
518   RTL->print_device_info(RTLDevId);
519   return true;
520 }
521 
522 // Run team region on device.
runTeamRegion(void * TgtEntryPtr,void ** TgtVarsPtr,ptrdiff_t * TgtOffsets,int32_t TgtVarsSize,int32_t NumTeams,int32_t ThreadLimit,uint64_t LoopTripCount,AsyncInfoTy & AsyncInfo)523 int32_t DeviceTy::runTeamRegion(void *TgtEntryPtr, void **TgtVarsPtr,
524                                 ptrdiff_t *TgtOffsets, int32_t TgtVarsSize,
525                                 int32_t NumTeams, int32_t ThreadLimit,
526                                 uint64_t LoopTripCount,
527                                 AsyncInfoTy &AsyncInfo) {
528   if (!RTL->run_team_region_async || !RTL->synchronize)
529     return RTL->run_team_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr,
530                                 TgtOffsets, TgtVarsSize, NumTeams, ThreadLimit,
531                                 LoopTripCount);
532   else
533     return RTL->run_team_region_async(RTLDeviceID, TgtEntryPtr, TgtVarsPtr,
534                                       TgtOffsets, TgtVarsSize, NumTeams,
535                                       ThreadLimit, LoopTripCount, AsyncInfo);
536 }
537 
538 // Whether data can be copied to DstDevice directly
isDataExchangable(const DeviceTy & DstDevice)539 bool DeviceTy::isDataExchangable(const DeviceTy &DstDevice) {
540   if (RTL != DstDevice.RTL || !RTL->is_data_exchangable)
541     return false;
542 
543   if (RTL->is_data_exchangable(RTLDeviceID, DstDevice.RTLDeviceID))
544     return (RTL->data_exchange != nullptr) ||
545            (RTL->data_exchange_async != nullptr);
546 
547   return false;
548 }
549 
synchronize(AsyncInfoTy & AsyncInfo)550 int32_t DeviceTy::synchronize(AsyncInfoTy &AsyncInfo) {
551   if (RTL->synchronize)
552     return RTL->synchronize(RTLDeviceID, AsyncInfo);
553   return OFFLOAD_SUCCESS;
554 }
555 
556 /// Check whether a device has an associated RTL and initialize it if it's not
557 /// already initialized.
device_is_ready(int device_num)558 bool device_is_ready(int device_num) {
559   DP("Checking whether device %d is ready.\n", device_num);
560   // Devices.size() can only change while registering a new
561   // library, so try to acquire the lock of RTLs' mutex.
562   PM->RTLsMtx.lock();
563   size_t DevicesSize = PM->Devices.size();
564   PM->RTLsMtx.unlock();
565   if (DevicesSize <= (size_t)device_num) {
566     DP("Device ID  %d does not have a matching RTL\n", device_num);
567     return false;
568   }
569 
570   // Get device info
571   DeviceTy &Device = PM->Devices[device_num];
572 
573   DP("Is the device %d (local ID %d) initialized? %d\n", device_num,
574      Device.RTLDeviceID, Device.IsInit);
575 
576   // Init the device if not done before
577   if (!Device.IsInit && Device.initOnce() != OFFLOAD_SUCCESS) {
578     DP("Failed to init device %d\n", device_num);
579     return false;
580   }
581 
582   DP("Device %d is ready to use.\n", device_num);
583 
584   return true;
585 }
586