1 //===----------- device.h - Target independent OpenMP target RTL ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
add_total_to_cdef($cdef, $total_ids, $graph_config_scale)8 //
9 // Declarations for managing devices that are handled by RTL plugins.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #ifndef _OMPTARGET_DEVICE_H
14 #define _OMPTARGET_DEVICE_H
15
16 #include <cassert>
17 #include <cstddef>
18 #include <list>
19 #include <map>
20 #include <memory>
21 #include <mutex>
22 #include <set>
23 #include <vector>
24
25 #include "omptarget.h"
26 #include "rtl.h"
27
28 // Forward declarations.
29 struct RTLInfoTy;
30 struct __tgt_bin_desc;
31 struct __tgt_target_table;
32
33 using map_var_info_t = void *;
34
graphdef_add_series($graphdef, $series, $series_type, $graph_config_percent, $graph_config_scale, $stack_counter, $series_id, $max_label_length, $conf_graphreport_stats, $conf_graphreport_stat_items)35 // enum for OMP_TARGET_OFFLOAD; keep in sync with kmp.h definition
36 enum kmp_target_offload_kind {
37 tgt_disabled = 0,
38 tgt_default = 1,
39 tgt_mandatory = 2
40 };
41 typedef enum kmp_target_offload_kind kmp_target_offload_kind_t;
42
43 /// Map between host data and target data.
44 struct HostDataToTargetTy {
45 uintptr_t HstPtrBase; // host info.
46 uintptr_t HstPtrBegin;
47 uintptr_t HstPtrEnd; // non-inclusive.
48 map_var_info_t HstPtrName; // Optional source name of mapped variable.
49
50 uintptr_t TgtPtrBegin; // target info.
51
52 private:
53 /// use mutable to allow modification via std::set iterator which is const.
54 mutable uint64_t RefCount;
55 static const uint64_t INFRefCount = ~(uint64_t)0;
56 /// This mutex will be locked when data movement is issued. For targets that
57 /// doesn't support async data movement, this mutex can guarantee that after
58 /// it is released, memory region on the target is update to date. For targets
59 /// that support async data movement, this can guarantee that data movement
60 /// has been issued. This mutex *must* be locked right before releasing the
61 /// mapping table lock.
62 std::shared_ptr<std::mutex> UpdateMtx;
63
64 public:
65 HostDataToTargetTy(uintptr_t BP, uintptr_t B, uintptr_t E, uintptr_t TB,
66 map_var_info_t Name = nullptr, bool IsINF = false)
67 : HstPtrBase(BP), HstPtrBegin(B), HstPtrEnd(E), HstPtrName(Name),
68 TgtPtrBegin(TB), RefCount(IsINF ? INFRefCount : 1),
69 UpdateMtx(std::make_shared<std::mutex>()) {}
70
71 uint64_t getRefCount() const { return RefCount; }
72
73 uint64_t resetRefCount() const {
74 if (RefCount != INFRefCount)
75 RefCount = 1;
76
77 return RefCount;
78 }
79
80 uint64_t incRefCount() const {
81 if (RefCount != INFRefCount) {
82 ++RefCount;
83 assert(RefCount < INFRefCount && "refcount overflow");
84 }
85
86 return RefCount;
87 }
88
89 uint64_t decRefCount() const {
90 if (RefCount != INFRefCount) {
91 assert(RefCount > 0 && "refcount underflow");
92 --RefCount;
93 }
94
95 return RefCount;
96 }
97
98 bool isRefCountInf() const { return RefCount == INFRefCount; }
99
100 std::string refCountToStr() const {
101 return isRefCountInf() ? "INF" : std::to_string(getRefCount());
102 }
103
104 /// Should one decrement of the reference count (after resetting it if
105 /// \c AfterReset) remove this mapping?
106 bool decShouldRemove(bool AfterReset = false) const {
107 if (AfterReset)
108 return !isRefCountInf();
109 return getRefCount() == 1;
110 }
111
112 void lock() const { UpdateMtx->lock(); }
113
114 void unlock() const { UpdateMtx->unlock(); }
115 };
116
117 typedef uintptr_t HstPtrBeginTy;
118 inline bool operator<(const HostDataToTargetTy &lhs, const HstPtrBeginTy &rhs) {
rrdtool_graph_merge_args_from_json($rrdtool_graph, $graph_config, $context, $size, $conf_rrds, $conf_graphreport_stats, $conf_graphreport_stat_items)119 return lhs.HstPtrBegin < rhs;
120 }
121 inline bool operator<(const HstPtrBeginTy &lhs, const HostDataToTargetTy &rhs) {
122 return lhs < rhs.HstPtrBegin;
123 }
124 inline bool operator<(const HostDataToTargetTy &lhs,
125 const HostDataToTargetTy &rhs) {
126 return lhs.HstPtrBegin < rhs.HstPtrBegin;
127 }
128
129 typedef std::set<HostDataToTargetTy, std::less<>> HostDataToTargetListTy;
130
131 struct LookupResult {
132 struct {
133 unsigned IsContained : 1;
134 unsigned ExtendsBefore : 1;
135 unsigned ExtendsAfter : 1;
136 } Flags;
137
138 HostDataToTargetListTy::iterator Entry;
139
140 LookupResult() : Flags({0, 0, 0}), Entry() {}
141 };
142
143 /// This struct will be returned by \p DeviceTy::getOrAllocTgtPtr which provides
144 /// more data than just a target pointer.
145 struct TargetPointerResultTy {
146 struct {
147 /// If the map table entry is just created
148 unsigned IsNewEntry : 1;
149 /// If the pointer is actually a host pointer (when unified memory enabled)
150 unsigned IsHostPointer : 1;
151 } Flags = {0, 0};
152
153 /// The iterator to the corresponding map table entry
154 HostDataToTargetListTy::iterator MapTableEntry{};
155
156 /// The corresponding target pointer
157 void *TargetPointer = nullptr;
158 };
159
160 /// Map for shadow pointers
161 struct ShadowPtrValTy {
162 void *HstPtrVal;
163 void *TgtPtrAddr;
164 void *TgtPtrVal;
165 };
166 typedef std::map<void *, ShadowPtrValTy> ShadowPtrListTy;
167
168 ///
169 struct PendingCtorDtorListsTy {
170 std::list<void *> PendingCtors;
171 std::list<void *> PendingDtors;
172 };
173 typedef std::map<__tgt_bin_desc *, PendingCtorDtorListsTy>
174 PendingCtorsDtorsPerLibrary;
175
176 enum class MoveDataStateTy : uint32_t { REQUIRED, NONE, UNKNOWN };
177
178 struct DeviceTy {
179 int32_t DeviceID;
180 RTLInfoTy *RTL;
181 int32_t RTLDeviceID;
182
183 bool IsInit;
184 std::once_flag InitFlag;
185 bool HasPendingGlobals;
186
187 HostDataToTargetListTy HostDataToTargetMap;
188 PendingCtorsDtorsPerLibrary PendingCtorsDtors;
189
190 ShadowPtrListTy ShadowPtrMap;
191
192 std::mutex DataMapMtx, PendingGlobalsMtx, ShadowMtx;
193
194 // NOTE: Once libomp gains full target-task support, this state should be
195 // moved into the target task in libomp.
196 std::map<int32_t, uint64_t> LoopTripCnt;
197
198 DeviceTy(RTLInfoTy *RTL);
199
200 // The existence of mutexes makes DeviceTy non-copyable. We need to
201 // provide a copy constructor and an assignment operator explicitly.
202 DeviceTy(const DeviceTy &D);
203
204 DeviceTy &operator=(const DeviceTy &D);
205
206 ~DeviceTy();
207
208 // Return true if data can be copied to DstDevice directly
209 bool isDataExchangable(const DeviceTy &DstDevice);
210
211 LookupResult lookupMapping(void *HstPtrBegin, int64_t Size);
212 /// Get the target pointer based on host pointer begin and base. If the
213 /// mapping already exists, the target pointer will be returned directly. In
214 /// addition, if \p MoveData is true, the memory region pointed by \p
215 /// HstPtrBegin of size \p Size will also be transferred to the device. If the
216 /// mapping doesn't exist, and if unified memory is not enabled, a new mapping
217 /// will be created and the data will also be transferred accordingly. nullptr
218 /// will be returned because of any of following reasons:
219 /// - Data allocation failed;
220 /// - The user tried to do an illegal mapping;
221 /// - Data transfer issue fails.
222 TargetPointerResultTy
223 getTargetPointer(void *HstPtrBegin, void *HstPtrBase, int64_t Size,
224 map_var_info_t HstPtrName, MoveDataStateTy MoveData,
225 bool IsImplicit, bool UpdateRefCount, bool HasCloseModifier,
226 bool HasPresentModifier, AsyncInfoTy &AsyncInfo);
227 void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size);
228 void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
229 bool UpdateRefCount, bool &IsHostPtr,
230 bool MustContain = false, bool ForceDelete = false);
231 int deallocTgtPtr(void *TgtPtrBegin, int64_t Size,
232 bool HasCloseModifier = false);
233 int associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size);
234 int disassociatePtr(void *HstPtrBegin);
235
236 // calls to RTL
237 int32_t initOnce();
build_aggregate_graph_config_from_url($conf_graph_colors)238 __tgt_target_table *load_binary(void *Img);
239
240 // device memory allocation/deallocation routines
241 /// Allocates \p Size bytes on the device, host or shared memory space
242 /// (depending on \p Kind) and returns the address/nullptr when
243 /// succeeds/fails. \p HstPtr is an address of the host data which the
244 /// allocated target data will be associated with. If it is unknown, the
245 /// default value of \p HstPtr is nullptr. Note: this function doesn't do
246 /// pointer association. Actually, all the __tgt_rtl_data_alloc
247 /// implementations ignore \p HstPtr. \p Kind dictates what allocator should
248 /// be used (host, shared, device).
249 void *allocData(int64_t Size, void *HstPtr = nullptr,
250 int32_t Kind = TARGET_ALLOC_DEFAULT);
251 /// Deallocates memory which \p TgtPtrBegin points at and returns
252 /// OFFLOAD_SUCCESS/OFFLOAD_FAIL when succeeds/fails.
253 int32_t deleteData(void *TgtPtrBegin);
254
255 // Data transfer. When AsyncInfo is nullptr, the transfer will be
256 // synchronous.
257 // Copy data from host to device
258 int32_t submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size,
259 AsyncInfoTy &AsyncInfo);
260 // Copy data from device back to host
261 int32_t retrieveData(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size,
262 AsyncInfoTy &AsyncInfo);
263 // Copy data from current device to destination device directly
264 int32_t dataExchange(void *SrcPtr, DeviceTy &DstDev, void *DstPtr,
265 int64_t Size, AsyncInfoTy &AsyncInfo);
266
267 int32_t runRegion(void *TgtEntryPtr, void **TgtVarsPtr, ptrdiff_t *TgtOffsets,
268 int32_t TgtVarsSize, AsyncInfoTy &AsyncInfo);
269 int32_t runTeamRegion(void *TgtEntryPtr, void **TgtVarsPtr,
270 ptrdiff_t *TgtOffsets, int32_t TgtVarsSize,
271 int32_t NumTeams, int32_t ThreadLimit,
272 uint64_t LoopTripCount, AsyncInfoTy &AsyncInfo);
273
274 /// Synchronize device/queue/event based on \p AsyncInfo and return
275 /// OFFLOAD_SUCCESS/OFFLOAD_FAIL when succeeds/fails.
276 int32_t synchronize(AsyncInfoTy &AsyncInfo);
277
278 /// Calls the corresponding print in the \p RTLDEVID
279 /// device RTL to obtain the information of the specific device.
280 bool printDeviceInfo(int32_t RTLDevID);
281
282 private:
283 // Call to RTL
284 void init(); // To be called only via DeviceTy::initOnce()
285 };
286
287 /// Map between Device ID (i.e. openmp device id) and its DeviceTy.
288 typedef std::vector<DeviceTy> DevicesTy;
289
290 extern bool device_is_ready(int device_num);
291
292 /// Struct for the data required to handle plugins
293 struct PluginManager {
294 /// RTLs identified on the host
295 RTLsTy RTLs;
296
297 /// Devices associated with RTLs
298 DevicesTy Devices;
299 std::mutex RTLsMtx; ///< For RTLs and Devices
300
301 /// Translation table retreived from the binary
302 HostEntriesBeginToTransTableTy HostEntriesBeginToTransTable;
303 std::mutex TrlTblMtx; ///< For Translation Table
304 /// Host offload entries in order of image registration
305 std::vector<__tgt_offload_entry *> HostEntriesBeginRegistrationOrder;
306
307 /// Map from ptrs on the host to an entry in the Translation Table
308 HostPtrToTableMapTy HostPtrToTableMap;
309 std::mutex TblMapMtx; ///< For HostPtrToTableMap
310
311 // Store target policy (disabled, mandatory, default)
312 kmp_target_offload_kind_t TargetOffloadPolicy = tgt_default;
313 std::mutex TargetOffloadMtx; ///< For TargetOffloadPolicy
314 };
315
316 extern PluginManager *PM;
317
318 #endif
319