1 //===--------- device.cpp - Target independent OpenMP target RTL ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Functionality for managing devices that are handled by RTL plugins.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "device.h"
14 #include "private.h"
15 #include "rtl.h"
16
17 #include <cassert>
18 #include <climits>
19 #include <string>
20
21 /// Map between Device ID (i.e. openmp device id) and its DeviceTy.
22 DevicesTy Devices;
23
associatePtr(void * HstPtrBegin,void * TgtPtrBegin,int64_t Size)24 int DeviceTy::associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size) {
25 DataMapMtx.lock();
26
27 // Check if entry exists
28 auto search = HostDataToTargetMap.find(HstPtrBeginTy{(uintptr_t)HstPtrBegin});
29 if (search != HostDataToTargetMap.end()) {
30 // Mapping already exists
31 bool isValid = search->HstPtrEnd == (uintptr_t)HstPtrBegin + Size &&
32 search->TgtPtrBegin == (uintptr_t)TgtPtrBegin;
33 DataMapMtx.unlock();
34 if (isValid) {
35 DP("Attempt to re-associate the same device ptr+offset with the same "
36 "host ptr, nothing to do\n");
37 return OFFLOAD_SUCCESS;
38 } else {
39 DP("Not allowed to re-associate a different device ptr+offset with the "
40 "same host ptr\n");
41 return OFFLOAD_FAIL;
42 }
43 }
44
45 // Mapping does not exist, allocate it with refCount=INF
46 HostDataToTargetTy newEntry((uintptr_t) HstPtrBegin /*HstPtrBase*/,
47 (uintptr_t) HstPtrBegin /*HstPtrBegin*/,
48 (uintptr_t) HstPtrBegin + Size /*HstPtrEnd*/,
49 (uintptr_t) TgtPtrBegin /*TgtPtrBegin*/,
50 true /*IsRefCountINF*/);
51
52 DP("Creating new map entry: HstBase=" DPxMOD ", HstBegin=" DPxMOD ", HstEnd="
53 DPxMOD ", TgtBegin=" DPxMOD "\n", DPxPTR(newEntry.HstPtrBase),
54 DPxPTR(newEntry.HstPtrBegin), DPxPTR(newEntry.HstPtrEnd),
55 DPxPTR(newEntry.TgtPtrBegin));
56 HostDataToTargetMap.insert(newEntry);
57
58 DataMapMtx.unlock();
59
60 return OFFLOAD_SUCCESS;
61 }
62
disassociatePtr(void * HstPtrBegin)63 int DeviceTy::disassociatePtr(void *HstPtrBegin) {
64 DataMapMtx.lock();
65
66 auto search = HostDataToTargetMap.find(HstPtrBeginTy{(uintptr_t)HstPtrBegin});
67 if (search != HostDataToTargetMap.end()) {
68 // Mapping exists
69 if (search->isRefCountInf()) {
70 DP("Association found, removing it\n");
71 HostDataToTargetMap.erase(search);
72 DataMapMtx.unlock();
73 return OFFLOAD_SUCCESS;
74 } else {
75 DP("Trying to disassociate a pointer which was not mapped via "
76 "omp_target_associate_ptr\n");
77 }
78 }
79
80 // Mapping not found
81 DataMapMtx.unlock();
82 DP("Association not found\n");
83 return OFFLOAD_FAIL;
84 }
85
86 // Get ref count of map entry containing HstPtrBegin
getMapEntryRefCnt(void * HstPtrBegin)87 uint64_t DeviceTy::getMapEntryRefCnt(void *HstPtrBegin) {
88 uintptr_t hp = (uintptr_t)HstPtrBegin;
89 uint64_t RefCnt = 0;
90
91 DataMapMtx.lock();
92 if (!HostDataToTargetMap.empty()) {
93 auto upper = HostDataToTargetMap.upper_bound(hp);
94 if (upper != HostDataToTargetMap.begin()) {
95 upper--;
96 if (hp >= upper->HstPtrBegin && hp < upper->HstPtrEnd) {
97 DP("DeviceTy::getMapEntry: requested entry found\n");
98 RefCnt = upper->getRefCount();
99 }
100 }
101 }
102 DataMapMtx.unlock();
103
104 if (RefCnt == 0) {
105 DP("DeviceTy::getMapEntry: requested entry not found\n");
106 }
107
108 return RefCnt;
109 }
110
lookupMapping(void * HstPtrBegin,int64_t Size)111 LookupResult DeviceTy::lookupMapping(void *HstPtrBegin, int64_t Size) {
112 uintptr_t hp = (uintptr_t)HstPtrBegin;
113 LookupResult lr;
114
115 DP("Looking up mapping(HstPtrBegin=" DPxMOD ", Size=%ld)...\n", DPxPTR(hp),
116 Size);
117
118 if (HostDataToTargetMap.empty())
119 return lr;
120
121 auto upper = HostDataToTargetMap.upper_bound(hp);
122 // check the left bin
123 if (upper != HostDataToTargetMap.begin()) {
124 lr.Entry = std::prev(upper);
125 auto &HT = *lr.Entry;
126 // Is it contained?
127 lr.Flags.IsContained = hp >= HT.HstPtrBegin && hp < HT.HstPtrEnd &&
128 (hp+Size) <= HT.HstPtrEnd;
129 // Does it extend beyond the mapped region?
130 lr.Flags.ExtendsAfter = hp < HT.HstPtrEnd && (hp + Size) > HT.HstPtrEnd;
131 }
132
133 // check the right bin
134 if (!(lr.Flags.IsContained || lr.Flags.ExtendsAfter) &&
135 upper != HostDataToTargetMap.end()) {
136 lr.Entry = upper;
137 auto &HT = *lr.Entry;
138 // Does it extend into an already mapped region?
139 lr.Flags.ExtendsBefore = hp < HT.HstPtrBegin && (hp+Size) > HT.HstPtrBegin;
140 // Does it extend beyond the mapped region?
141 lr.Flags.ExtendsAfter = hp < HT.HstPtrEnd && (hp+Size) > HT.HstPtrEnd;
142 }
143
144 if (lr.Flags.ExtendsBefore) {
145 DP("WARNING: Pointer is not mapped but section extends into already "
146 "mapped data\n");
147 }
148 if (lr.Flags.ExtendsAfter) {
149 DP("WARNING: Pointer is already mapped but section extends beyond mapped "
150 "region\n");
151 }
152
153 return lr;
154 }
155
156 // Used by target_data_begin
157 // Return the target pointer begin (where the data will be moved).
158 // Allocate memory if this is the first occurrence of this mapping.
159 // Increment the reference counter.
160 // If NULL is returned, then either data allocation failed or the user tried
161 // to do an illegal mapping.
getOrAllocTgtPtr(void * HstPtrBegin,void * HstPtrBase,int64_t Size,bool & IsNew,bool & IsHostPtr,bool IsImplicit,bool UpdateRefCount,bool HasCloseModifier)162 void *DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase,
163 int64_t Size, bool &IsNew, bool &IsHostPtr, bool IsImplicit,
164 bool UpdateRefCount, bool HasCloseModifier) {
165 void *rc = NULL;
166 IsHostPtr = false;
167 IsNew = false;
168 DataMapMtx.lock();
169 LookupResult lr = lookupMapping(HstPtrBegin, Size);
170
171 // Check if the pointer is contained.
172 // If a variable is mapped to the device manually by the user - which would
173 // lead to the IsContained flag to be true - then we must ensure that the
174 // device address is returned even under unified memory conditions.
175 if (lr.Flags.IsContained ||
176 ((lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) && IsImplicit)) {
177 auto &HT = *lr.Entry;
178 IsNew = false;
179
180 if (UpdateRefCount)
181 HT.incRefCount();
182
183 uintptr_t tp = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin);
184 DP("Mapping exists%s with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", "
185 "Size=%ld,%s RefCount=%s\n", (IsImplicit ? " (implicit)" : ""),
186 DPxPTR(HstPtrBegin), DPxPTR(tp), Size,
187 (UpdateRefCount ? " updated" : ""),
188 HT.isRefCountInf() ? "INF" : std::to_string(HT.getRefCount()).c_str());
189 rc = (void *)tp;
190 } else if ((lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) && !IsImplicit) {
191 // Explicit extension of mapped data - not allowed.
192 DP("Explicit extension of mapping is not allowed.\n");
193 } else if (Size) {
194 // If unified shared memory is active, implicitly mapped variables that are not
195 // privatized use host address. Any explicitly mapped variables also use
196 // host address where correctness is not impeded. In all other cases
197 // maps are respected.
198 // In addition to the mapping rules above, the close map
199 // modifier forces the mapping of the variable to the device.
200 if (RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
201 !HasCloseModifier) {
202 DP("Return HstPtrBegin " DPxMOD " Size=%ld RefCount=%s\n",
203 DPxPTR((uintptr_t)HstPtrBegin), Size, (UpdateRefCount ? " updated" : ""));
204 IsHostPtr = true;
205 rc = HstPtrBegin;
206 } else {
207 // If it is not contained and Size > 0 we should create a new entry for it.
208 IsNew = true;
209 uintptr_t tp = (uintptr_t)RTL->data_alloc(RTLDeviceID, Size, HstPtrBegin);
210 DP("Creating new map entry: HstBase=" DPxMOD ", HstBegin=" DPxMOD ", "
211 "HstEnd=" DPxMOD ", TgtBegin=" DPxMOD "\n", DPxPTR(HstPtrBase),
212 DPxPTR(HstPtrBegin), DPxPTR((uintptr_t)HstPtrBegin + Size), DPxPTR(tp));
213 HostDataToTargetMap.emplace(
214 HostDataToTargetTy((uintptr_t)HstPtrBase, (uintptr_t)HstPtrBegin,
215 (uintptr_t)HstPtrBegin + Size, tp));
216 rc = (void *)tp;
217 }
218 }
219
220 DataMapMtx.unlock();
221 return rc;
222 }
223
224 // Used by target_data_begin, target_data_end, target_data_update and target.
225 // Return the target pointer begin (where the data will be moved).
226 // Decrement the reference counter if called from target_data_end.
getTgtPtrBegin(void * HstPtrBegin,int64_t Size,bool & IsLast,bool UpdateRefCount,bool & IsHostPtr)227 void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
228 bool UpdateRefCount, bool &IsHostPtr) {
229 void *rc = NULL;
230 IsHostPtr = false;
231 IsLast = false;
232 DataMapMtx.lock();
233 LookupResult lr = lookupMapping(HstPtrBegin, Size);
234
235 if (lr.Flags.IsContained || lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) {
236 auto &HT = *lr.Entry;
237 IsLast = HT.getRefCount() == 1;
238
239 if (!IsLast && UpdateRefCount)
240 HT.decRefCount();
241
242 uintptr_t tp = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin);
243 DP("Mapping exists with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", "
244 "Size=%ld,%s RefCount=%s\n", DPxPTR(HstPtrBegin), DPxPTR(tp), Size,
245 (UpdateRefCount ? " updated" : ""),
246 HT.isRefCountInf() ? "INF" : std::to_string(HT.getRefCount()).c_str());
247 rc = (void *)tp;
248 } else if (RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) {
249 // If the value isn't found in the mapping and unified shared memory
250 // is on then it means we have stumbled upon a value which we need to
251 // use directly from the host.
252 DP("Get HstPtrBegin " DPxMOD " Size=%ld RefCount=%s\n",
253 DPxPTR((uintptr_t)HstPtrBegin), Size, (UpdateRefCount ? " updated" : ""));
254 IsHostPtr = true;
255 rc = HstPtrBegin;
256 }
257
258 DataMapMtx.unlock();
259 return rc;
260 }
261
262 // Return the target pointer begin (where the data will be moved).
263 // Lock-free version called when loading global symbols from the fat binary.
getTgtPtrBegin(void * HstPtrBegin,int64_t Size)264 void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size) {
265 uintptr_t hp = (uintptr_t)HstPtrBegin;
266 LookupResult lr = lookupMapping(HstPtrBegin, Size);
267 if (lr.Flags.IsContained || lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) {
268 auto &HT = *lr.Entry;
269 uintptr_t tp = HT.TgtPtrBegin + (hp - HT.HstPtrBegin);
270 return (void *)tp;
271 }
272
273 return NULL;
274 }
275
deallocTgtPtr(void * HstPtrBegin,int64_t Size,bool ForceDelete,bool HasCloseModifier)276 int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, bool ForceDelete,
277 bool HasCloseModifier) {
278 if (RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && !HasCloseModifier)
279 return OFFLOAD_SUCCESS;
280 // Check if the pointer is contained in any sub-nodes.
281 int rc;
282 DataMapMtx.lock();
283 LookupResult lr = lookupMapping(HstPtrBegin, Size);
284 if (lr.Flags.IsContained || lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) {
285 auto &HT = *lr.Entry;
286 if (ForceDelete)
287 HT.resetRefCount();
288 if (HT.decRefCount() == 0) {
289 DP("Deleting tgt data " DPxMOD " of size %ld\n",
290 DPxPTR(HT.TgtPtrBegin), Size);
291 RTL->data_delete(RTLDeviceID, (void *)HT.TgtPtrBegin);
292 DP("Removing%s mapping with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD
293 ", Size=%ld\n", (ForceDelete ? " (forced)" : ""),
294 DPxPTR(HT.HstPtrBegin), DPxPTR(HT.TgtPtrBegin), Size);
295 HostDataToTargetMap.erase(lr.Entry);
296 }
297 rc = OFFLOAD_SUCCESS;
298 } else {
299 DP("Section to delete (hst addr " DPxMOD ") does not exist in the allocated"
300 " memory\n", DPxPTR(HstPtrBegin));
301 rc = OFFLOAD_FAIL;
302 }
303
304 DataMapMtx.unlock();
305 return rc;
306 }
307
308 /// Init device, should not be called directly.
init()309 void DeviceTy::init() {
310 // Make call to init_requires if it exists for this plugin.
311 if (RTL->init_requires)
312 RTL->init_requires(RTLs->RequiresFlags);
313 int32_t rc = RTL->init_device(RTLDeviceID);
314 if (rc == OFFLOAD_SUCCESS) {
315 IsInit = true;
316 }
317 }
318
319 /// Thread-safe method to initialize the device only once.
initOnce()320 int32_t DeviceTy::initOnce() {
321 std::call_once(InitFlag, &DeviceTy::init, this);
322
323 // At this point, if IsInit is true, then either this thread or some other
324 // thread in the past successfully initialized the device, so we can return
325 // OFFLOAD_SUCCESS. If this thread executed init() via call_once() and it
326 // failed, return OFFLOAD_FAIL. If call_once did not invoke init(), it means
327 // that some other thread already attempted to execute init() and if IsInit
328 // is still false, return OFFLOAD_FAIL.
329 if (IsInit)
330 return OFFLOAD_SUCCESS;
331 else
332 return OFFLOAD_FAIL;
333 }
334
335 // Load binary to device.
load_binary(void * Img)336 __tgt_target_table *DeviceTy::load_binary(void *Img) {
337 RTL->Mtx.lock();
338 __tgt_target_table *rc = RTL->load_binary(RTLDeviceID, Img);
339 RTL->Mtx.unlock();
340 return rc;
341 }
342
343 // Submit data to device
data_submit(void * TgtPtrBegin,void * HstPtrBegin,int64_t Size,__tgt_async_info * AsyncInfoPtr)344 int32_t DeviceTy::data_submit(void *TgtPtrBegin, void *HstPtrBegin,
345 int64_t Size, __tgt_async_info *AsyncInfoPtr) {
346 if (!AsyncInfoPtr || !RTL->data_submit_async || !RTL->synchronize)
347 return RTL->data_submit(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size);
348 else
349 return RTL->data_submit_async(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size,
350 AsyncInfoPtr);
351 }
352
353 // Retrieve data from device
data_retrieve(void * HstPtrBegin,void * TgtPtrBegin,int64_t Size,__tgt_async_info * AsyncInfoPtr)354 int32_t DeviceTy::data_retrieve(void *HstPtrBegin, void *TgtPtrBegin,
355 int64_t Size, __tgt_async_info *AsyncInfoPtr) {
356 if (!AsyncInfoPtr || !RTL->data_retrieve_async || !RTL->synchronize)
357 return RTL->data_retrieve(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size);
358 else
359 return RTL->data_retrieve_async(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size,
360 AsyncInfoPtr);
361 }
362
363 // Copy data from current device to destination device directly
data_exchange(void * SrcPtr,DeviceTy DstDev,void * DstPtr,int64_t Size,__tgt_async_info * AsyncInfoPtr)364 int32_t DeviceTy::data_exchange(void *SrcPtr, DeviceTy DstDev, void *DstPtr,
365 int64_t Size, __tgt_async_info *AsyncInfoPtr) {
366 if (!AsyncInfoPtr || !RTL->data_exchange_async || !RTL->synchronize) {
367 assert(RTL->data_exchange && "RTL->data_exchange is nullptr");
368 return RTL->data_exchange(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, DstPtr,
369 Size);
370 } else
371 return RTL->data_exchange_async(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID,
372 DstPtr, Size, AsyncInfoPtr);
373 }
374
375 // Run region on device
run_region(void * TgtEntryPtr,void ** TgtVarsPtr,ptrdiff_t * TgtOffsets,int32_t TgtVarsSize,__tgt_async_info * AsyncInfoPtr)376 int32_t DeviceTy::run_region(void *TgtEntryPtr, void **TgtVarsPtr,
377 ptrdiff_t *TgtOffsets, int32_t TgtVarsSize,
378 __tgt_async_info *AsyncInfoPtr) {
379 if (!AsyncInfoPtr || !RTL->run_region || !RTL->synchronize)
380 return RTL->run_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets,
381 TgtVarsSize);
382 else
383 return RTL->run_region_async(RTLDeviceID, TgtEntryPtr, TgtVarsPtr,
384 TgtOffsets, TgtVarsSize, AsyncInfoPtr);
385 }
386
387 // Run team region on device.
run_team_region(void * TgtEntryPtr,void ** TgtVarsPtr,ptrdiff_t * TgtOffsets,int32_t TgtVarsSize,int32_t NumTeams,int32_t ThreadLimit,uint64_t LoopTripCount,__tgt_async_info * AsyncInfoPtr)388 int32_t DeviceTy::run_team_region(void *TgtEntryPtr, void **TgtVarsPtr,
389 ptrdiff_t *TgtOffsets, int32_t TgtVarsSize,
390 int32_t NumTeams, int32_t ThreadLimit,
391 uint64_t LoopTripCount,
392 __tgt_async_info *AsyncInfoPtr) {
393 if (!AsyncInfoPtr || !RTL->run_team_region_async || !RTL->synchronize)
394 return RTL->run_team_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr,
395 TgtOffsets, TgtVarsSize, NumTeams, ThreadLimit,
396 LoopTripCount);
397 else
398 return RTL->run_team_region_async(RTLDeviceID, TgtEntryPtr, TgtVarsPtr,
399 TgtOffsets, TgtVarsSize, NumTeams,
400 ThreadLimit, LoopTripCount, AsyncInfoPtr);
401 }
402
403 // Whether data can be copied to DstDevice directly
isDataExchangable(const DeviceTy & DstDevice)404 bool DeviceTy::isDataExchangable(const DeviceTy &DstDevice) {
405 if (RTL != DstDevice.RTL || !RTL->is_data_exchangable)
406 return false;
407
408 if (RTL->is_data_exchangable(RTLDeviceID, DstDevice.RTLDeviceID))
409 return (RTL->data_exchange != nullptr) ||
410 (RTL->data_exchange_async != nullptr);
411
412 return false;
413 }
414
415 /// Check whether a device has an associated RTL and initialize it if it's not
416 /// already initialized.
device_is_ready(int device_num)417 bool device_is_ready(int device_num) {
418 DP("Checking whether device %d is ready.\n", device_num);
419 // Devices.size() can only change while registering a new
420 // library, so try to acquire the lock of RTLs' mutex.
421 RTLsMtx->lock();
422 size_t Devices_size = Devices.size();
423 RTLsMtx->unlock();
424 if (Devices_size <= (size_t)device_num) {
425 DP("Device ID %d does not have a matching RTL\n", device_num);
426 return false;
427 }
428
429 // Get device info
430 DeviceTy &Device = Devices[device_num];
431
432 DP("Is the device %d (local ID %d) initialized? %d\n", device_num,
433 Device.RTLDeviceID, Device.IsInit);
434
435 // Init the device if not done before
436 if (!Device.IsInit && Device.initOnce() != OFFLOAD_SUCCESS) {
437 DP("Failed to init device %d\n", device_num);
438 return false;
439 }
440
441 DP("Device %d is ready to use.\n", device_num);
442
443 return true;
444 }
445