1 //===--------- device.cpp - Target independent OpenMP target RTL ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Functionality for managing devices that are handled by RTL plugins.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "device.h"
14 #include "private.h"
15 #include "rtl.h"
16 
17 #include <cassert>
18 #include <climits>
19 #include <string>
20 
21 /// Map between Device ID (i.e. openmp device id) and its DeviceTy.
22 DevicesTy Devices;
23 
associatePtr(void * HstPtrBegin,void * TgtPtrBegin,int64_t Size)24 int DeviceTy::associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size) {
25   DataMapMtx.lock();
26 
27   // Check if entry exists
28   auto search = HostDataToTargetMap.find(HstPtrBeginTy{(uintptr_t)HstPtrBegin});
29   if (search != HostDataToTargetMap.end()) {
30     // Mapping already exists
31     bool isValid = search->HstPtrEnd == (uintptr_t)HstPtrBegin + Size &&
32                    search->TgtPtrBegin == (uintptr_t)TgtPtrBegin;
33     DataMapMtx.unlock();
34     if (isValid) {
35       DP("Attempt to re-associate the same device ptr+offset with the same "
36          "host ptr, nothing to do\n");
37       return OFFLOAD_SUCCESS;
38     } else {
39       DP("Not allowed to re-associate a different device ptr+offset with the "
40          "same host ptr\n");
41       return OFFLOAD_FAIL;
42     }
43   }
44 
45   // Mapping does not exist, allocate it with refCount=INF
46   HostDataToTargetTy newEntry((uintptr_t) HstPtrBegin /*HstPtrBase*/,
47                               (uintptr_t) HstPtrBegin /*HstPtrBegin*/,
48                               (uintptr_t) HstPtrBegin + Size /*HstPtrEnd*/,
49                               (uintptr_t) TgtPtrBegin /*TgtPtrBegin*/,
50                               true /*IsRefCountINF*/);
51 
52   DP("Creating new map entry: HstBase=" DPxMOD ", HstBegin=" DPxMOD ", HstEnd="
53       DPxMOD ", TgtBegin=" DPxMOD "\n", DPxPTR(newEntry.HstPtrBase),
54       DPxPTR(newEntry.HstPtrBegin), DPxPTR(newEntry.HstPtrEnd),
55       DPxPTR(newEntry.TgtPtrBegin));
56   HostDataToTargetMap.insert(newEntry);
57 
58   DataMapMtx.unlock();
59 
60   return OFFLOAD_SUCCESS;
61 }
62 
disassociatePtr(void * HstPtrBegin)63 int DeviceTy::disassociatePtr(void *HstPtrBegin) {
64   DataMapMtx.lock();
65 
66   auto search = HostDataToTargetMap.find(HstPtrBeginTy{(uintptr_t)HstPtrBegin});
67   if (search != HostDataToTargetMap.end()) {
68     // Mapping exists
69     if (search->isRefCountInf()) {
70       DP("Association found, removing it\n");
71       HostDataToTargetMap.erase(search);
72       DataMapMtx.unlock();
73       return OFFLOAD_SUCCESS;
74     } else {
75       DP("Trying to disassociate a pointer which was not mapped via "
76          "omp_target_associate_ptr\n");
77     }
78   }
79 
80   // Mapping not found
81   DataMapMtx.unlock();
82   DP("Association not found\n");
83   return OFFLOAD_FAIL;
84 }
85 
86 // Get ref count of map entry containing HstPtrBegin
getMapEntryRefCnt(void * HstPtrBegin)87 uint64_t DeviceTy::getMapEntryRefCnt(void *HstPtrBegin) {
88   uintptr_t hp = (uintptr_t)HstPtrBegin;
89   uint64_t RefCnt = 0;
90 
91   DataMapMtx.lock();
92   if (!HostDataToTargetMap.empty()) {
93     auto upper = HostDataToTargetMap.upper_bound(hp);
94     if (upper != HostDataToTargetMap.begin()) {
95       upper--;
96       if (hp >= upper->HstPtrBegin && hp < upper->HstPtrEnd) {
97         DP("DeviceTy::getMapEntry: requested entry found\n");
98         RefCnt = upper->getRefCount();
99       }
100     }
101   }
102   DataMapMtx.unlock();
103 
104   if (RefCnt == 0) {
105     DP("DeviceTy::getMapEntry: requested entry not found\n");
106   }
107 
108   return RefCnt;
109 }
110 
lookupMapping(void * HstPtrBegin,int64_t Size)111 LookupResult DeviceTy::lookupMapping(void *HstPtrBegin, int64_t Size) {
112   uintptr_t hp = (uintptr_t)HstPtrBegin;
113   LookupResult lr;
114 
115   DP("Looking up mapping(HstPtrBegin=" DPxMOD ", Size=%ld)...\n", DPxPTR(hp),
116       Size);
117 
118   if (HostDataToTargetMap.empty())
119     return lr;
120 
121   auto upper = HostDataToTargetMap.upper_bound(hp);
122   // check the left bin
123   if (upper != HostDataToTargetMap.begin()) {
124     lr.Entry = std::prev(upper);
125     auto &HT = *lr.Entry;
126     // Is it contained?
127     lr.Flags.IsContained = hp >= HT.HstPtrBegin && hp < HT.HstPtrEnd &&
128         (hp+Size) <= HT.HstPtrEnd;
129     // Does it extend beyond the mapped region?
130     lr.Flags.ExtendsAfter = hp < HT.HstPtrEnd && (hp + Size) > HT.HstPtrEnd;
131   }
132 
133   // check the right bin
134   if (!(lr.Flags.IsContained || lr.Flags.ExtendsAfter) &&
135       upper != HostDataToTargetMap.end()) {
136     lr.Entry = upper;
137     auto &HT = *lr.Entry;
138     // Does it extend into an already mapped region?
139     lr.Flags.ExtendsBefore = hp < HT.HstPtrBegin && (hp+Size) > HT.HstPtrBegin;
140     // Does it extend beyond the mapped region?
141     lr.Flags.ExtendsAfter = hp < HT.HstPtrEnd && (hp+Size) > HT.HstPtrEnd;
142   }
143 
144   if (lr.Flags.ExtendsBefore) {
145     DP("WARNING: Pointer is not mapped but section extends into already "
146         "mapped data\n");
147   }
148   if (lr.Flags.ExtendsAfter) {
149     DP("WARNING: Pointer is already mapped but section extends beyond mapped "
150         "region\n");
151   }
152 
153   return lr;
154 }
155 
156 // Used by target_data_begin
157 // Return the target pointer begin (where the data will be moved).
158 // Allocate memory if this is the first occurrence of this mapping.
159 // Increment the reference counter.
160 // If NULL is returned, then either data allocation failed or the user tried
161 // to do an illegal mapping.
getOrAllocTgtPtr(void * HstPtrBegin,void * HstPtrBase,int64_t Size,bool & IsNew,bool & IsHostPtr,bool IsImplicit,bool UpdateRefCount,bool HasCloseModifier)162 void *DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase,
163     int64_t Size, bool &IsNew, bool &IsHostPtr, bool IsImplicit,
164     bool UpdateRefCount, bool HasCloseModifier) {
165   void *rc = NULL;
166   IsHostPtr = false;
167   IsNew = false;
168   DataMapMtx.lock();
169   LookupResult lr = lookupMapping(HstPtrBegin, Size);
170 
171   // Check if the pointer is contained.
172   // If a variable is mapped to the device manually by the user - which would
173   // lead to the IsContained flag to be true - then we must ensure that the
174   // device address is returned even under unified memory conditions.
175   if (lr.Flags.IsContained ||
176       ((lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) && IsImplicit)) {
177     auto &HT = *lr.Entry;
178     IsNew = false;
179 
180     if (UpdateRefCount)
181       HT.incRefCount();
182 
183     uintptr_t tp = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin);
184     DP("Mapping exists%s with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", "
185         "Size=%ld,%s RefCount=%s\n", (IsImplicit ? " (implicit)" : ""),
186         DPxPTR(HstPtrBegin), DPxPTR(tp), Size,
187         (UpdateRefCount ? " updated" : ""),
188         HT.isRefCountInf() ? "INF" : std::to_string(HT.getRefCount()).c_str());
189     rc = (void *)tp;
190   } else if ((lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) && !IsImplicit) {
191     // Explicit extension of mapped data - not allowed.
192     DP("Explicit extension of mapping is not allowed.\n");
193   } else if (Size) {
194     // If unified shared memory is active, implicitly mapped variables that are not
195     // privatized use host address. Any explicitly mapped variables also use
196     // host address where correctness is not impeded. In all other cases
197     // maps are respected.
198     // In addition to the mapping rules above, the close map
199     // modifier forces the mapping of the variable to the device.
200     if (RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
201         !HasCloseModifier) {
202       DP("Return HstPtrBegin " DPxMOD " Size=%ld RefCount=%s\n",
203          DPxPTR((uintptr_t)HstPtrBegin), Size, (UpdateRefCount ? " updated" : ""));
204       IsHostPtr = true;
205       rc = HstPtrBegin;
206     } else {
207       // If it is not contained and Size > 0 we should create a new entry for it.
208       IsNew = true;
209       uintptr_t tp = (uintptr_t)RTL->data_alloc(RTLDeviceID, Size, HstPtrBegin);
210       DP("Creating new map entry: HstBase=" DPxMOD ", HstBegin=" DPxMOD ", "
211          "HstEnd=" DPxMOD ", TgtBegin=" DPxMOD "\n", DPxPTR(HstPtrBase),
212          DPxPTR(HstPtrBegin), DPxPTR((uintptr_t)HstPtrBegin + Size), DPxPTR(tp));
213       HostDataToTargetMap.emplace(
214           HostDataToTargetTy((uintptr_t)HstPtrBase, (uintptr_t)HstPtrBegin,
215                              (uintptr_t)HstPtrBegin + Size, tp));
216       rc = (void *)tp;
217     }
218   }
219 
220   DataMapMtx.unlock();
221   return rc;
222 }
223 
224 // Used by target_data_begin, target_data_end, target_data_update and target.
225 // Return the target pointer begin (where the data will be moved).
226 // Decrement the reference counter if called from target_data_end.
getTgtPtrBegin(void * HstPtrBegin,int64_t Size,bool & IsLast,bool UpdateRefCount,bool & IsHostPtr)227 void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
228     bool UpdateRefCount, bool &IsHostPtr) {
229   void *rc = NULL;
230   IsHostPtr = false;
231   IsLast = false;
232   DataMapMtx.lock();
233   LookupResult lr = lookupMapping(HstPtrBegin, Size);
234 
235   if (lr.Flags.IsContained || lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) {
236     auto &HT = *lr.Entry;
237     IsLast = HT.getRefCount() == 1;
238 
239     if (!IsLast && UpdateRefCount)
240       HT.decRefCount();
241 
242     uintptr_t tp = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin);
243     DP("Mapping exists with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", "
244         "Size=%ld,%s RefCount=%s\n", DPxPTR(HstPtrBegin), DPxPTR(tp), Size,
245         (UpdateRefCount ? " updated" : ""),
246         HT.isRefCountInf() ? "INF" : std::to_string(HT.getRefCount()).c_str());
247     rc = (void *)tp;
248   } else if (RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) {
249     // If the value isn't found in the mapping and unified shared memory
250     // is on then it means we have stumbled upon a value which we need to
251     // use directly from the host.
252     DP("Get HstPtrBegin " DPxMOD " Size=%ld RefCount=%s\n",
253        DPxPTR((uintptr_t)HstPtrBegin), Size, (UpdateRefCount ? " updated" : ""));
254     IsHostPtr = true;
255     rc = HstPtrBegin;
256   }
257 
258   DataMapMtx.unlock();
259   return rc;
260 }
261 
262 // Return the target pointer begin (where the data will be moved).
263 // Lock-free version called when loading global symbols from the fat binary.
getTgtPtrBegin(void * HstPtrBegin,int64_t Size)264 void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size) {
265   uintptr_t hp = (uintptr_t)HstPtrBegin;
266   LookupResult lr = lookupMapping(HstPtrBegin, Size);
267   if (lr.Flags.IsContained || lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) {
268     auto &HT = *lr.Entry;
269     uintptr_t tp = HT.TgtPtrBegin + (hp - HT.HstPtrBegin);
270     return (void *)tp;
271   }
272 
273   return NULL;
274 }
275 
deallocTgtPtr(void * HstPtrBegin,int64_t Size,bool ForceDelete,bool HasCloseModifier)276 int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, bool ForceDelete,
277                             bool HasCloseModifier) {
278   if (RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && !HasCloseModifier)
279     return OFFLOAD_SUCCESS;
280   // Check if the pointer is contained in any sub-nodes.
281   int rc;
282   DataMapMtx.lock();
283   LookupResult lr = lookupMapping(HstPtrBegin, Size);
284   if (lr.Flags.IsContained || lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) {
285     auto &HT = *lr.Entry;
286     if (ForceDelete)
287       HT.resetRefCount();
288     if (HT.decRefCount() == 0) {
289       DP("Deleting tgt data " DPxMOD " of size %ld\n",
290           DPxPTR(HT.TgtPtrBegin), Size);
291       RTL->data_delete(RTLDeviceID, (void *)HT.TgtPtrBegin);
292       DP("Removing%s mapping with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD
293           ", Size=%ld\n", (ForceDelete ? " (forced)" : ""),
294           DPxPTR(HT.HstPtrBegin), DPxPTR(HT.TgtPtrBegin), Size);
295       HostDataToTargetMap.erase(lr.Entry);
296     }
297     rc = OFFLOAD_SUCCESS;
298   } else {
299     DP("Section to delete (hst addr " DPxMOD ") does not exist in the allocated"
300        " memory\n", DPxPTR(HstPtrBegin));
301     rc = OFFLOAD_FAIL;
302   }
303 
304   DataMapMtx.unlock();
305   return rc;
306 }
307 
308 /// Init device, should not be called directly.
init()309 void DeviceTy::init() {
310   // Make call to init_requires if it exists for this plugin.
311   if (RTL->init_requires)
312     RTL->init_requires(RTLs->RequiresFlags);
313   int32_t rc = RTL->init_device(RTLDeviceID);
314   if (rc == OFFLOAD_SUCCESS) {
315     IsInit = true;
316   }
317 }
318 
319 /// Thread-safe method to initialize the device only once.
initOnce()320 int32_t DeviceTy::initOnce() {
321   std::call_once(InitFlag, &DeviceTy::init, this);
322 
323   // At this point, if IsInit is true, then either this thread or some other
324   // thread in the past successfully initialized the device, so we can return
325   // OFFLOAD_SUCCESS. If this thread executed init() via call_once() and it
326   // failed, return OFFLOAD_FAIL. If call_once did not invoke init(), it means
327   // that some other thread already attempted to execute init() and if IsInit
328   // is still false, return OFFLOAD_FAIL.
329   if (IsInit)
330     return OFFLOAD_SUCCESS;
331   else
332     return OFFLOAD_FAIL;
333 }
334 
335 // Load binary to device.
load_binary(void * Img)336 __tgt_target_table *DeviceTy::load_binary(void *Img) {
337   RTL->Mtx.lock();
338   __tgt_target_table *rc = RTL->load_binary(RTLDeviceID, Img);
339   RTL->Mtx.unlock();
340   return rc;
341 }
342 
343 // Submit data to device
data_submit(void * TgtPtrBegin,void * HstPtrBegin,int64_t Size,__tgt_async_info * AsyncInfoPtr)344 int32_t DeviceTy::data_submit(void *TgtPtrBegin, void *HstPtrBegin,
345                               int64_t Size, __tgt_async_info *AsyncInfoPtr) {
346   if (!AsyncInfoPtr || !RTL->data_submit_async || !RTL->synchronize)
347     return RTL->data_submit(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size);
348   else
349     return RTL->data_submit_async(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size,
350                                   AsyncInfoPtr);
351 }
352 
353 // Retrieve data from device
data_retrieve(void * HstPtrBegin,void * TgtPtrBegin,int64_t Size,__tgt_async_info * AsyncInfoPtr)354 int32_t DeviceTy::data_retrieve(void *HstPtrBegin, void *TgtPtrBegin,
355                                 int64_t Size, __tgt_async_info *AsyncInfoPtr) {
356   if (!AsyncInfoPtr || !RTL->data_retrieve_async || !RTL->synchronize)
357     return RTL->data_retrieve(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size);
358   else
359     return RTL->data_retrieve_async(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size,
360                                     AsyncInfoPtr);
361 }
362 
363 // Copy data from current device to destination device directly
data_exchange(void * SrcPtr,DeviceTy DstDev,void * DstPtr,int64_t Size,__tgt_async_info * AsyncInfoPtr)364 int32_t DeviceTy::data_exchange(void *SrcPtr, DeviceTy DstDev, void *DstPtr,
365                                 int64_t Size, __tgt_async_info *AsyncInfoPtr) {
366   if (!AsyncInfoPtr || !RTL->data_exchange_async || !RTL->synchronize) {
367     assert(RTL->data_exchange && "RTL->data_exchange is nullptr");
368     return RTL->data_exchange(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, DstPtr,
369                               Size);
370   } else
371     return RTL->data_exchange_async(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID,
372                                     DstPtr, Size, AsyncInfoPtr);
373 }
374 
375 // Run region on device
run_region(void * TgtEntryPtr,void ** TgtVarsPtr,ptrdiff_t * TgtOffsets,int32_t TgtVarsSize,__tgt_async_info * AsyncInfoPtr)376 int32_t DeviceTy::run_region(void *TgtEntryPtr, void **TgtVarsPtr,
377                              ptrdiff_t *TgtOffsets, int32_t TgtVarsSize,
378                              __tgt_async_info *AsyncInfoPtr) {
379   if (!AsyncInfoPtr || !RTL->run_region || !RTL->synchronize)
380     return RTL->run_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets,
381                            TgtVarsSize);
382   else
383     return RTL->run_region_async(RTLDeviceID, TgtEntryPtr, TgtVarsPtr,
384                                  TgtOffsets, TgtVarsSize, AsyncInfoPtr);
385 }
386 
387 // Run team region on device.
run_team_region(void * TgtEntryPtr,void ** TgtVarsPtr,ptrdiff_t * TgtOffsets,int32_t TgtVarsSize,int32_t NumTeams,int32_t ThreadLimit,uint64_t LoopTripCount,__tgt_async_info * AsyncInfoPtr)388 int32_t DeviceTy::run_team_region(void *TgtEntryPtr, void **TgtVarsPtr,
389                                   ptrdiff_t *TgtOffsets, int32_t TgtVarsSize,
390                                   int32_t NumTeams, int32_t ThreadLimit,
391                                   uint64_t LoopTripCount,
392                                   __tgt_async_info *AsyncInfoPtr) {
393   if (!AsyncInfoPtr || !RTL->run_team_region_async || !RTL->synchronize)
394     return RTL->run_team_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr,
395                                 TgtOffsets, TgtVarsSize, NumTeams, ThreadLimit,
396                                 LoopTripCount);
397   else
398     return RTL->run_team_region_async(RTLDeviceID, TgtEntryPtr, TgtVarsPtr,
399                                       TgtOffsets, TgtVarsSize, NumTeams,
400                                       ThreadLimit, LoopTripCount, AsyncInfoPtr);
401 }
402 
403 // Whether data can be copied to DstDevice directly
isDataExchangable(const DeviceTy & DstDevice)404 bool DeviceTy::isDataExchangable(const DeviceTy &DstDevice) {
405   if (RTL != DstDevice.RTL || !RTL->is_data_exchangable)
406     return false;
407 
408   if (RTL->is_data_exchangable(RTLDeviceID, DstDevice.RTLDeviceID))
409     return (RTL->data_exchange != nullptr) ||
410            (RTL->data_exchange_async != nullptr);
411 
412   return false;
413 }
414 
415 /// Check whether a device has an associated RTL and initialize it if it's not
416 /// already initialized.
device_is_ready(int device_num)417 bool device_is_ready(int device_num) {
418   DP("Checking whether device %d is ready.\n", device_num);
419   // Devices.size() can only change while registering a new
420   // library, so try to acquire the lock of RTLs' mutex.
421   RTLsMtx->lock();
422   size_t Devices_size = Devices.size();
423   RTLsMtx->unlock();
424   if (Devices_size <= (size_t)device_num) {
425     DP("Device ID  %d does not have a matching RTL\n", device_num);
426     return false;
427   }
428 
429   // Get device info
430   DeviceTy &Device = Devices[device_num];
431 
432   DP("Is the device %d (local ID %d) initialized? %d\n", device_num,
433        Device.RTLDeviceID, Device.IsInit);
434 
435   // Init the device if not done before
436   if (!Device.IsInit && Device.initOnce() != OFFLOAD_SUCCESS) {
437     DP("Failed to init device %d\n", device_num);
438     return false;
439   }
440 
441   DP("Device %d is ready to use.\n", device_num);
442 
443   return true;
444 }
445