1 //===----------- device.h - Target independent OpenMP target RTL ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
add_total_to_cdef($cdef, $total_ids, $graph_config_scale)8 //
9 // Declarations for managing devices that are handled by RTL plugins.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef _OMPTARGET_DEVICE_H
14 #define _OMPTARGET_DEVICE_H
15 
16 #include <cassert>
17 #include <cstddef>
18 #include <list>
19 #include <map>
20 #include <memory>
21 #include <mutex>
22 #include <set>
23 #include <vector>
24 
25 #include "omptarget.h"
26 #include "rtl.h"
27 
28 // Forward declarations.
29 struct RTLInfoTy;
30 struct __tgt_bin_desc;
31 struct __tgt_target_table;
32 
33 using map_var_info_t = void *;
34 
graphdef_add_series($graphdef, $series, $series_type, $graph_config_percent, $graph_config_scale, $stack_counter, $series_id, $max_label_length, $conf_graphreport_stats, $conf_graphreport_stat_items)35 // enum for OMP_TARGET_OFFLOAD; keep in sync with kmp.h definition
36 enum kmp_target_offload_kind {
37   tgt_disabled = 0,
38   tgt_default = 1,
39   tgt_mandatory = 2
40 };
41 typedef enum kmp_target_offload_kind kmp_target_offload_kind_t;
42 
43 /// Map between host data and target data.
44 struct HostDataToTargetTy {
45   uintptr_t HstPtrBase; // host info.
46   uintptr_t HstPtrBegin;
47   uintptr_t HstPtrEnd;       // non-inclusive.
48   map_var_info_t HstPtrName; // Optional source name of mapped variable.
49 
50   uintptr_t TgtPtrBegin; // target info.
51 
52 private:
53   /// use mutable to allow modification via std::set iterator which is const.
54   mutable uint64_t RefCount;
55   static const uint64_t INFRefCount = ~(uint64_t)0;
56   /// This mutex will be locked when data movement is issued. For targets that
57   /// doesn't support async data movement, this mutex can guarantee that after
58   /// it is released, memory region on the target is update to date. For targets
59   /// that support async data movement, this can guarantee that data movement
60   /// has been issued. This mutex *must* be locked right before releasing the
61   /// mapping table lock.
62   std::shared_ptr<std::mutex> UpdateMtx;
63 
64 public:
65   HostDataToTargetTy(uintptr_t BP, uintptr_t B, uintptr_t E, uintptr_t TB,
66                      map_var_info_t Name = nullptr, bool IsINF = false)
67       : HstPtrBase(BP), HstPtrBegin(B), HstPtrEnd(E), HstPtrName(Name),
68         TgtPtrBegin(TB), RefCount(IsINF ? INFRefCount : 1),
69         UpdateMtx(std::make_shared<std::mutex>()) {}
70 
71   uint64_t getRefCount() const { return RefCount; }
72 
73   uint64_t resetRefCount() const {
74     if (RefCount != INFRefCount)
75       RefCount = 1;
76 
77     return RefCount;
78   }
79 
80   uint64_t incRefCount() const {
81     if (RefCount != INFRefCount) {
82       ++RefCount;
83       assert(RefCount < INFRefCount && "refcount overflow");
84     }
85 
86     return RefCount;
87   }
88 
89   uint64_t decRefCount() const {
90     if (RefCount != INFRefCount) {
91       assert(RefCount > 0 && "refcount underflow");
92       --RefCount;
93     }
94 
95     return RefCount;
96   }
97 
98   bool isRefCountInf() const { return RefCount == INFRefCount; }
99 
100   std::string refCountToStr() const {
101     return isRefCountInf() ? "INF" : std::to_string(getRefCount());
102   }
103 
104   /// Should one decrement of the reference count (after resetting it if
105   /// \c AfterReset) remove this mapping?
106   bool decShouldRemove(bool AfterReset = false) const {
107     if (AfterReset)
108       return !isRefCountInf();
109     return getRefCount() == 1;
110   }
111 
112   void lock() const { UpdateMtx->lock(); }
113 
114   void unlock() const { UpdateMtx->unlock(); }
115 };
116 
117 typedef uintptr_t HstPtrBeginTy;
118 inline bool operator<(const HostDataToTargetTy &lhs, const HstPtrBeginTy &rhs) {
rrdtool_graph_merge_args_from_json($rrdtool_graph, $graph_config, $context, $size, $conf_rrds, $conf_graphreport_stats, $conf_graphreport_stat_items)119   return lhs.HstPtrBegin < rhs;
120 }
121 inline bool operator<(const HstPtrBeginTy &lhs, const HostDataToTargetTy &rhs) {
122   return lhs < rhs.HstPtrBegin;
123 }
124 inline bool operator<(const HostDataToTargetTy &lhs,
125                       const HostDataToTargetTy &rhs) {
126   return lhs.HstPtrBegin < rhs.HstPtrBegin;
127 }
128 
129 typedef std::set<HostDataToTargetTy, std::less<>> HostDataToTargetListTy;
130 
131 struct LookupResult {
132   struct {
133     unsigned IsContained : 1;
134     unsigned ExtendsBefore : 1;
135     unsigned ExtendsAfter : 1;
136   } Flags;
137 
138   HostDataToTargetListTy::iterator Entry;
139 
140   LookupResult() : Flags({0, 0, 0}), Entry() {}
141 };
142 
143 /// This struct will be returned by \p DeviceTy::getOrAllocTgtPtr which provides
144 /// more data than just a target pointer.
145 struct TargetPointerResultTy {
146   struct {
147     /// If the map table entry is just created
148     unsigned IsNewEntry : 1;
149     /// If the pointer is actually a host pointer (when unified memory enabled)
150     unsigned IsHostPointer : 1;
151   } Flags = {0, 0};
152 
153   /// The iterator to the corresponding map table entry
154   HostDataToTargetListTy::iterator MapTableEntry{};
155 
156   /// The corresponding target pointer
157   void *TargetPointer = nullptr;
158 };
159 
160 /// Map for shadow pointers
161 struct ShadowPtrValTy {
162   void *HstPtrVal;
163   void *TgtPtrAddr;
164   void *TgtPtrVal;
165 };
166 typedef std::map<void *, ShadowPtrValTy> ShadowPtrListTy;
167 
168 ///
169 struct PendingCtorDtorListsTy {
170   std::list<void *> PendingCtors;
171   std::list<void *> PendingDtors;
172 };
173 typedef std::map<__tgt_bin_desc *, PendingCtorDtorListsTy>
174     PendingCtorsDtorsPerLibrary;
175 
176 enum class MoveDataStateTy : uint32_t { REQUIRED, NONE, UNKNOWN };
177 
178 struct DeviceTy {
179   int32_t DeviceID;
180   RTLInfoTy *RTL;
181   int32_t RTLDeviceID;
182 
183   bool IsInit;
184   std::once_flag InitFlag;
185   bool HasPendingGlobals;
186 
187   HostDataToTargetListTy HostDataToTargetMap;
188   PendingCtorsDtorsPerLibrary PendingCtorsDtors;
189 
190   ShadowPtrListTy ShadowPtrMap;
191 
192   std::mutex DataMapMtx, PendingGlobalsMtx, ShadowMtx;
193 
194   // NOTE: Once libomp gains full target-task support, this state should be
195   // moved into the target task in libomp.
196   std::map<int32_t, uint64_t> LoopTripCnt;
197 
198   DeviceTy(RTLInfoTy *RTL);
199 
200   // The existence of mutexes makes DeviceTy non-copyable. We need to
201   // provide a copy constructor and an assignment operator explicitly.
202   DeviceTy(const DeviceTy &D);
203 
204   DeviceTy &operator=(const DeviceTy &D);
205 
206   ~DeviceTy();
207 
208   // Return true if data can be copied to DstDevice directly
209   bool isDataExchangable(const DeviceTy &DstDevice);
210 
211   LookupResult lookupMapping(void *HstPtrBegin, int64_t Size);
212   /// Get the target pointer based on host pointer begin and base. If the
213   /// mapping already exists, the target pointer will be returned directly. In
214   /// addition, if \p MoveData is true, the memory region pointed by \p
215   /// HstPtrBegin of size \p Size will also be transferred to the device. If the
216   /// mapping doesn't exist, and if unified memory is not enabled, a new mapping
217   /// will be created and the data will also be transferred accordingly. nullptr
218   /// will be returned because of any of following reasons:
219   /// - Data allocation failed;
220   /// - The user tried to do an illegal mapping;
221   /// - Data transfer issue fails.
222   TargetPointerResultTy
223   getTargetPointer(void *HstPtrBegin, void *HstPtrBase, int64_t Size,
224                    map_var_info_t HstPtrName, MoveDataStateTy MoveData,
225                    bool IsImplicit, bool UpdateRefCount, bool HasCloseModifier,
226                    bool HasPresentModifier, AsyncInfoTy &AsyncInfo);
227   void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size);
228   void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
229                        bool UpdateRefCount, bool &IsHostPtr,
230                        bool MustContain = false, bool ForceDelete = false);
231   int deallocTgtPtr(void *TgtPtrBegin, int64_t Size,
232                     bool HasCloseModifier = false);
233   int associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size);
234   int disassociatePtr(void *HstPtrBegin);
235 
236   // calls to RTL
237   int32_t initOnce();
build_aggregate_graph_config_from_url($conf_graph_colors)238   __tgt_target_table *load_binary(void *Img);
239 
240   // device memory allocation/deallocation routines
241   /// Allocates \p Size bytes on the device, host or shared memory space
242   /// (depending on \p Kind) and returns the address/nullptr when
243   /// succeeds/fails. \p HstPtr is an address of the host data which the
244   /// allocated target data will be associated with. If it is unknown, the
245   /// default value of \p HstPtr is nullptr. Note: this function doesn't do
246   /// pointer association. Actually, all the __tgt_rtl_data_alloc
247   /// implementations ignore \p HstPtr. \p Kind dictates what allocator should
248   /// be used (host, shared, device).
249   void *allocData(int64_t Size, void *HstPtr = nullptr,
250                   int32_t Kind = TARGET_ALLOC_DEFAULT);
251   /// Deallocates memory which \p TgtPtrBegin points at and returns
252   /// OFFLOAD_SUCCESS/OFFLOAD_FAIL when succeeds/fails.
253   int32_t deleteData(void *TgtPtrBegin);
254 
255   // Data transfer. When AsyncInfo is nullptr, the transfer will be
256   // synchronous.
257   // Copy data from host to device
258   int32_t submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size,
259                      AsyncInfoTy &AsyncInfo);
260   // Copy data from device back to host
261   int32_t retrieveData(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size,
262                        AsyncInfoTy &AsyncInfo);
263   // Copy data from current device to destination device directly
264   int32_t dataExchange(void *SrcPtr, DeviceTy &DstDev, void *DstPtr,
265                        int64_t Size, AsyncInfoTy &AsyncInfo);
266 
267   int32_t runRegion(void *TgtEntryPtr, void **TgtVarsPtr, ptrdiff_t *TgtOffsets,
268                     int32_t TgtVarsSize, AsyncInfoTy &AsyncInfo);
269   int32_t runTeamRegion(void *TgtEntryPtr, void **TgtVarsPtr,
270                         ptrdiff_t *TgtOffsets, int32_t TgtVarsSize,
271                         int32_t NumTeams, int32_t ThreadLimit,
272                         uint64_t LoopTripCount, AsyncInfoTy &AsyncInfo);
273 
274   /// Synchronize device/queue/event based on \p AsyncInfo and return
275   /// OFFLOAD_SUCCESS/OFFLOAD_FAIL when succeeds/fails.
276   int32_t synchronize(AsyncInfoTy &AsyncInfo);
277 
278   /// Calls the corresponding print in the \p RTLDEVID
279   /// device RTL to obtain the information of the specific device.
280   bool printDeviceInfo(int32_t RTLDevID);
281 
282 private:
283   // Call to RTL
284   void init(); // To be called only via DeviceTy::initOnce()
285 };
286 
287 /// Map between Device ID (i.e. openmp device id) and its DeviceTy.
288 typedef std::vector<DeviceTy> DevicesTy;
289 
290 extern bool device_is_ready(int device_num);
291 
292 /// Struct for the data required to handle plugins
293 struct PluginManager {
294   /// RTLs identified on the host
295   RTLsTy RTLs;
296 
297   /// Devices associated with RTLs
298   DevicesTy Devices;
299   std::mutex RTLsMtx; ///< For RTLs and Devices
300 
301   /// Translation table retreived from the binary
302   HostEntriesBeginToTransTableTy HostEntriesBeginToTransTable;
303   std::mutex TrlTblMtx; ///< For Translation Table
304   /// Host offload entries in order of image registration
305   std::vector<__tgt_offload_entry *> HostEntriesBeginRegistrationOrder;
306 
307   /// Map from ptrs on the host to an entry in the Translation Table
308   HostPtrToTableMapTy HostPtrToTableMap;
309   std::mutex TblMapMtx; ///< For HostPtrToTableMap
310 
311   // Store target policy (disabled, mandatory, default)
312   kmp_target_offload_kind_t TargetOffloadPolicy = tgt_default;
313   std::mutex TargetOffloadMtx; ///< For TargetOffloadPolicy
314 };
315 
316 extern PluginManager *PM;
317 
318 #endif
319