1 //===----------- MemoryManager.cpp - Target independent memory manager ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Functionality for managing target memory.
10 // It is very expensive to call alloc/free functions of target devices. The
11 // MemoryManagerTy in this file is to reduce the number of invocations of those
12 // functions by buffering allocated device memory. In this way, when a memory is
13 // not used, it will not be freed on the device directly. The buffer is
14 // organized in a number of buckets for efficient look up. A memory will go to
15 // corresponding bucket based on its size. When a new memory request comes in,
16 // it will first check whether there is free memory of same size. If yes,
17 // returns it directly. Otherwise, allocate one on device.
18 //
19 // It also provides a way to opt out the memory manager. Memory
20 // allocation/deallocation will only be managed if the requested size is less
21 // than SizeThreshold, which can be configured via an environment variable
22 // LIBOMPTARGET_MEMORY_MANAGER_THRESHOLD.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "MemoryManager.h"
27 #include "device.h"
28 #include "private.h"
29 #include "rtl.h"
30 
31 namespace {
32 constexpr const size_t BucketSize[] = {
33     0,       1U << 2, 1U << 3,  1U << 4,  1U << 5,  1U << 6, 1U << 7,
34     1U << 8, 1U << 9, 1U << 10, 1U << 11, 1U << 12, 1U << 13};
35 
36 constexpr const int NumBuckets = sizeof(BucketSize) / sizeof(BucketSize[0]);
37 
38 /// The threshold to manage memory using memory manager. If the request size is
39 /// larger than \p SizeThreshold, the allocation will not be managed by the
40 /// memory manager. This variable can be configured via an env \p
41 /// LIBOMPTARGET_MEMORY_MANAGER_THRESHOLD. By default, the value is 8KB.
42 size_t SizeThreshold = 1U << 13;
43 
44 /// Find the previous number that is power of 2 given a number that is not power
45 /// of 2.
floorToPowerOfTwo(size_t Num)46 size_t floorToPowerOfTwo(size_t Num) {
47   Num |= Num >> 1;
48   Num |= Num >> 2;
49   Num |= Num >> 4;
50   Num |= Num >> 8;
51   Num |= Num >> 16;
52 #if INTPTR_MAX == INT64_MAX
53   Num |= Num >> 32;
54 #elif INTPTR_MAX == INT32_MAX
55   // Do nothing with 32-bit
56 #else
57 #error Unsupported architecture
58 #endif
59   Num += 1;
60   return Num >> 1;
61 }
62 
63 /// Find a suitable bucket
findBucket(size_t Size)64 int findBucket(size_t Size) {
65   const size_t F = floorToPowerOfTwo(Size);
66 
67   DP("findBucket: Size %zu is floored to %zu.\n", Size, F);
68 
69   int L = 0, H = NumBuckets - 1;
70   while (H - L > 1) {
71     int M = (L + H) >> 1;
72     if (BucketSize[M] == F)
73       return M;
74     if (BucketSize[M] > F)
75       H = M - 1;
76     else
77       L = M;
78   }
79 
80   assert(L >= 0 && L < NumBuckets && "L is out of range");
81 
82   DP("findBucket: Size %zu goes to bucket %d\n", Size, L);
83 
84   return L;
85 }
86 } // namespace
87 
MemoryManagerTy(DeviceTy & Dev,size_t Threshold)88 MemoryManagerTy::MemoryManagerTy(DeviceTy &Dev, size_t Threshold)
89     : FreeLists(NumBuckets), FreeListLocks(NumBuckets), Device(Dev) {
90   if (Threshold)
91     SizeThreshold = Threshold;
92 }
93 
~MemoryManagerTy()94 MemoryManagerTy::~MemoryManagerTy() {
95   // TODO: There is a little issue that target plugin is destroyed before this
96   // object, therefore the memory free will not succeed.
97   // Deallocate all memory in map
98   for (auto Itr = PtrToNodeTable.begin(); Itr != PtrToNodeTable.end(); ++Itr) {
99     assert(Itr->second.Ptr && "nullptr in map table");
100     deleteOnDevice(Itr->second.Ptr);
101   }
102 }
103 
allocateOnDevice(size_t Size,void * HstPtr) const104 void *MemoryManagerTy::allocateOnDevice(size_t Size, void *HstPtr) const {
105   return Device.RTL->data_alloc(Device.RTLDeviceID, Size, HstPtr);
106 }
107 
deleteOnDevice(void * Ptr) const108 int MemoryManagerTy::deleteOnDevice(void *Ptr) const {
109   return Device.RTL->data_delete(Device.RTLDeviceID, Ptr);
110 }
111 
freeAndAllocate(size_t Size,void * HstPtr)112 void *MemoryManagerTy::freeAndAllocate(size_t Size, void *HstPtr) {
113   std::vector<void *> RemoveList;
114 
115   // Deallocate all memory in FreeList
116   for (int I = 0; I < NumBuckets; ++I) {
117     FreeListTy &List = FreeLists[I];
118     std::lock_guard<std::mutex> Lock(FreeListLocks[I]);
119     if (List.empty())
120       continue;
121     for (const NodeTy &N : List) {
122       deleteOnDevice(N.Ptr);
123       RemoveList.push_back(N.Ptr);
124     }
125     FreeLists[I].clear();
126   }
127 
128   // Remove all nodes in the map table which have been released
129   if (!RemoveList.empty()) {
130     std::lock_guard<std::mutex> LG(MapTableLock);
131     for (void *P : RemoveList)
132       PtrToNodeTable.erase(P);
133   }
134 
135   // Try allocate memory again
136   return allocateOnDevice(Size, HstPtr);
137 }
138 
allocateOrFreeAndAllocateOnDevice(size_t Size,void * HstPtr)139 void *MemoryManagerTy::allocateOrFreeAndAllocateOnDevice(size_t Size,
140                                                          void *HstPtr) {
141   void *TgtPtr = allocateOnDevice(Size, HstPtr);
142   // We cannot get memory from the device. It might be due to OOM. Let's
143   // free all memory in FreeLists and try again.
144   if (TgtPtr == nullptr) {
145     DP("Failed to get memory on device. Free all memory in FreeLists and "
146        "try again.\n");
147     TgtPtr = freeAndAllocate(Size, HstPtr);
148   }
149 
150 #ifdef OMPTARGET_DEBUG
151   if (TgtPtr == nullptr)
152     DP("Still cannot get memory on device probably because the device is "
153        "OOM.\n");
154 #endif
155 
156   return TgtPtr;
157 }
158 
allocate(size_t Size,void * HstPtr)159 void *MemoryManagerTy::allocate(size_t Size, void *HstPtr) {
160   // If the size is zero, we will not bother the target device. Just return
161   // nullptr directly.
162   if (Size == 0)
163     return nullptr;
164 
165   DP("MemoryManagerTy::allocate: size %zu with host pointer " DPxMOD ".\n",
166      Size, DPxPTR(HstPtr));
167 
168   // If the size is greater than the threshold, allocate it directly from
169   // device.
170   if (Size > SizeThreshold) {
171     DP("%zu is greater than the threshold %zu. Allocate it directly from "
172        "device\n",
173        Size, SizeThreshold);
174     void *TgtPtr = allocateOrFreeAndAllocateOnDevice(Size, HstPtr);
175 
176     DP("Got target pointer " DPxMOD ". Return directly.\n", DPxPTR(TgtPtr));
177 
178     return TgtPtr;
179   }
180 
181   NodeTy *NodePtr = nullptr;
182 
183   // Try to get a node from FreeList
184   {
185     const int B = findBucket(Size);
186     FreeListTy &List = FreeLists[B];
187 
188     NodeTy TempNode(Size, nullptr);
189     std::lock_guard<std::mutex> LG(FreeListLocks[B]);
190     FreeListTy::const_iterator Itr = List.find(TempNode);
191 
192     if (Itr != List.end()) {
193       NodePtr = &Itr->get();
194       List.erase(Itr);
195     }
196   }
197 
198 #ifdef OMPTARGET_DEBUG
199   if (NodePtr != nullptr)
200     DP("Find one node " DPxMOD " in the bucket.\n", DPxPTR(NodePtr));
201 #endif
202 
203   // We cannot find a valid node in FreeLists. Let's allocate on device and
204   // create a node for it.
205   if (NodePtr == nullptr) {
206     DP("Cannot find a node in the FreeLists. Allocate on device.\n");
207     // Allocate one on device
208     void *TgtPtr = allocateOrFreeAndAllocateOnDevice(Size, HstPtr);
209 
210     if (TgtPtr == nullptr)
211       return nullptr;
212 
213     // Create a new node and add it into the map table
214     {
215       std::lock_guard<std::mutex> Guard(MapTableLock);
216       auto Itr = PtrToNodeTable.emplace(TgtPtr, NodeTy(Size, TgtPtr));
217       NodePtr = &Itr.first->second;
218     }
219 
220     DP("Node address " DPxMOD ", target pointer " DPxMOD ", size %zu\n",
221        DPxPTR(NodePtr), DPxPTR(TgtPtr), Size);
222   }
223 
224   assert(NodePtr && "NodePtr should not be nullptr at this point");
225 
226   return NodePtr->Ptr;
227 }
228 
free(void * TgtPtr)229 int MemoryManagerTy::free(void *TgtPtr) {
230   DP("MemoryManagerTy::free: target memory " DPxMOD ".\n", DPxPTR(TgtPtr));
231 
232   NodeTy *P = nullptr;
233 
234   // Look it up into the table
235   {
236     std::lock_guard<std::mutex> G(MapTableLock);
237     auto Itr = PtrToNodeTable.find(TgtPtr);
238 
239     // We don't remove the node from the map table because the map does not
240     // change.
241     if (Itr != PtrToNodeTable.end())
242       P = &Itr->second;
243   }
244 
245   // The memory is not managed by the manager
246   if (P == nullptr) {
247     DP("Cannot find its node. Delete it on device directly.\n");
248     return deleteOnDevice(TgtPtr);
249   }
250 
251   // Insert the node to the free list
252   const int B = findBucket(P->Size);
253 
254   DP("Found its node " DPxMOD ". Insert it to bucket %d.\n", DPxPTR(P), B);
255 
256   {
257     std::lock_guard<std::mutex> G(FreeListLocks[B]);
258     FreeLists[B].insert(*P);
259   }
260 
261   return OFFLOAD_SUCCESS;
262 }
263