1 /*
2     Copyright (c) 2005-2021 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #ifndef __TBB_tbbmalloc_internal_H
18     #error tbbmalloc_internal.h must be included at this point
19 #endif
20 
21 #ifndef __TBB_backend_H
22 #define __TBB_backend_H
23 
24 // Included from namespace rml::internal
25 
26 // global state of blocks currently in processing
27 class BackendSync {
28     // Class instances should reside in zero-initialized memory!
29     // The number of blocks currently removed from a bin and not returned back
30     std::atomic<intptr_t> inFlyBlocks;        // to another
31     std::atomic<intptr_t> binsModifications;  // incremented on every bin modification
32     Backend *backend;
33 public:
init(Backend * b)34     void init(Backend *b) { backend = b; }
blockConsumed()35     void blockConsumed() { inFlyBlocks++; }
binsModified()36     void binsModified() { binsModifications++; }
blockReleased()37     void blockReleased() {
38 #if __TBB_MALLOC_BACKEND_STAT
39         MALLOC_ITT_SYNC_RELEASING(&inFlyBlocks);
40 #endif
41         binsModifications++;
42         intptr_t prev = inFlyBlocks.fetch_sub(1);
43         MALLOC_ASSERT(prev > 0, ASSERT_TEXT);
44         suppress_unused_warning(prev);
45     }
getNumOfMods()46     intptr_t getNumOfMods() const { return binsModifications.load(std::memory_order_acquire); }
47     // return true if need re-do the blocks search
48     inline bool waitTillBlockReleased(intptr_t startModifiedCnt);
49 };
50 
51 class CoalRequestQ { // queue of free blocks that coalescing was delayed
52 private:
53     std::atomic<FreeBlock*> blocksToFree;
54     BackendSync *bkndSync;
55     // counted blocks in blocksToFree and that are leaved blocksToFree
56     // and still in active coalescing
57     std::atomic<intptr_t> inFlyBlocks;
58 public:
init(BackendSync * bSync)59     void init(BackendSync *bSync) { bkndSync = bSync; }
60     FreeBlock *getAll(); // return current list of blocks and make queue empty
61     void putBlock(FreeBlock *fBlock);
62     inline void blockWasProcessed();
blocksInFly()63     intptr_t blocksInFly() const { return inFlyBlocks.load(std::memory_order_acquire); }
64 };
65 
66 class MemExtendingSema {
67     std::atomic<intptr_t>    active;
68 public:
wait()69     bool wait() {
70         bool rescanBins = false;
71         // up to 3 threads can add more memory from OS simultaneously,
72         // rest of threads have to wait
73         intptr_t prevCnt = active.load(std::memory_order_acquire);
74         for (;;) {
75             if (prevCnt < 3) {
76                 if (active.compare_exchange_strong(prevCnt, prevCnt + 1)) {
77                     break;
78                 }
79             } else {
80                 SpinWaitWhileEq(active, prevCnt);
81                 rescanBins = true;
82                 break;
83             }
84         }
85         return rescanBins;
86     }
signal()87     void signal() { active.fetch_sub(1); }
88 };
89 
90 enum MemRegionType {
91     // The region holds only slabs
92     MEMREG_SLAB_BLOCKS = 0,
93     // The region can hold several large object blocks
94     MEMREG_LARGE_BLOCKS,
95     // The region holds only one block with a requested size
96     MEMREG_ONE_BLOCK
97 };
98 
99 class MemRegionList {
100     MallocMutex regionListLock;
101 public:
102     MemRegion  *head;
103     void add(MemRegion *r);
104     void remove(MemRegion *r);
105     int reportStat(FILE *f);
106 };
107 
108 class Backend {
109 private:
110 /* Blocks in range [minBinnedSize; getMaxBinnedSize()] are kept in bins,
111    one region can contains several blocks. Larger blocks are allocated directly
112    and one region always contains one block.
113 */
114     enum {
115         minBinnedSize = 8*1024UL,
116         /*   If huge pages are available, maxBinned_HugePage used.
117              If not, maxBinned_SmallPage is the threshold.
118              TODO: use pool's granularity for upper bound setting.*/
119         maxBinned_SmallPage = 1024*1024UL,
120         // TODO: support other page sizes
121         maxBinned_HugePage = 4*1024*1024UL
122     };
123     enum {
124         VALID_BLOCK_IN_BIN = 1 // valid block added to bin, not returned as result
125     };
126 public:
127     // Backend bins step is the same as CacheStep for large object cache
128     static const size_t   freeBinsStep = LargeObjectCache::LargeBSProps::CacheStep;
129     static const unsigned freeBinsNum = (maxBinned_HugePage-minBinnedSize)/freeBinsStep + 1;
130 
131     // if previous access missed per-thread slabs pool,
132     // allocate numOfSlabAllocOnMiss blocks in advance
133     static const int numOfSlabAllocOnMiss = 2;
134 
135     enum {
136         NO_BIN = -1,
137         // special bin for blocks >= maxBinned_HugePage, blocks go to this bin
138         // when pool is created with keepAllMemory policy
139         // TODO: currently this bin is scanned using "1st fit", as it accumulates
140         // blocks of different sizes, "best fit" is preferred in terms of fragmentation
141         HUGE_BIN = freeBinsNum-1
142     };
143 
144     // Bin keeps 2-linked list of free blocks. It must be 2-linked
145     // because during coalescing a block it's removed from a middle of the list.
146     struct Bin {
147         std::atomic<FreeBlock*> head;
148         FreeBlock*              tail;
149         MallocMutex             tLock;
150 
151         void removeBlock(FreeBlock *fBlock);
resetBin152         void reset() {
153             head.store(nullptr, std::memory_order_relaxed);
154             tail = nullptr;
155         }
emptyBin156         bool empty() const { return !head.load(std::memory_order_relaxed); }
157 
158         size_t countFreeBlocks();
159         size_t reportFreeBlocks(FILE *f);
160         void reportStat(FILE *f);
161     };
162 
163     typedef BitMaskMin<Backend::freeBinsNum> BitMaskBins;
164 
165     // array of bins supplemented with bitmask for fast finding of non-empty bins
166     class IndexedBins {
167         BitMaskBins bitMask;
168         Bin         freeBins[Backend::freeBinsNum];
169         FreeBlock *getFromBin(int binIdx, BackendSync *sync, size_t size,
170                 bool needAlignedBlock, bool alignedBin, bool wait, int *resLocked);
171     public:
172         FreeBlock *findBlock(int nativeBin, BackendSync *sync, size_t size,
173                 bool needAlignedBlock, bool alignedBin,int *numOfLockedBins);
174         bool tryReleaseRegions(int binIdx, Backend *backend);
175         void lockRemoveBlock(int binIdx, FreeBlock *fBlock);
176         void addBlock(int binIdx, FreeBlock *fBlock, size_t blockSz, bool addToTail);
177         bool tryAddBlock(int binIdx, FreeBlock *fBlock, bool addToTail);
178         int  getMinNonemptyBin(unsigned startBin) const;
179         void verify();
180         void reset();
181         void reportStat(FILE *f);
182     };
183 
184 private:
185     class AdvRegionsBins {
186         BitMaskBins bins;
187     public:
registerBin(int regBin)188         void registerBin(int regBin) { bins.set(regBin, 1); }
getMinUsedBin(int start)189         int getMinUsedBin(int start) const { return bins.getMinTrue(start); }
reset()190         void reset() { bins.reset(); }
191     };
192     // auxiliary class to atomic maximum request finding
193     class MaxRequestComparator {
194         const Backend *backend;
195     public:
MaxRequestComparator(const Backend * be)196         MaxRequestComparator(const Backend *be) : backend(be) {}
197         inline bool operator()(size_t oldMaxReq, size_t requestSize) const;
198     };
199 
200 #if CHECK_ALLOCATION_RANGE
201     // Keep min and max of all addresses requested from OS,
202     // use it for checking memory possibly allocated by replaced allocators
203     // and for debugging purposes. Valid only for default memory pool.
204     class UsedAddressRange {
205         static const uintptr_t ADDRESS_UPPER_BOUND = UINTPTR_MAX;
206 
207         std::atomic<uintptr_t> leftBound,
208                                rightBound;
209         MallocMutex mutex;
210     public:
211         // rightBound is zero-initialized
init()212         void init() { leftBound.store(ADDRESS_UPPER_BOUND, std::memory_order_relaxed); }
213         void registerAlloc(uintptr_t left, uintptr_t right);
214         void registerFree(uintptr_t left, uintptr_t right);
215         // as only left and right bounds are kept, we can return true
216         // for pointer not allocated by us, if more than single region
217         // was requested from OS
inRange(void * ptr)218         bool inRange(void *ptr) const {
219             const uintptr_t p = (uintptr_t)ptr;
220             return leftBound.load(std::memory_order_relaxed)<=p &&
221                    p<=rightBound.load(std::memory_order_relaxed);
222         }
223     };
224 #else
225     class UsedAddressRange {
226     public:
init()227         void init() { }
registerAlloc(uintptr_t,uintptr_t)228         void registerAlloc(uintptr_t, uintptr_t) {}
registerFree(uintptr_t,uintptr_t)229         void registerFree(uintptr_t, uintptr_t) {}
inRange(void *)230         bool inRange(void *) const { return true; }
231     };
232 #endif
233 
234     ExtMemoryPool   *extMemPool;
235     // used for release every region on pool destroying
236     MemRegionList    regionList;
237 
238     CoalRequestQ     coalescQ; // queue of coalescing requests
239     BackendSync      bkndSync;
240     // semaphore protecting adding more more memory from OS
241     MemExtendingSema memExtendingSema;
242     //size_t           totalMemSize,
243     //                 memSoftLimit;
244     std::atomic<size_t> totalMemSize;
245     std::atomic<size_t> memSoftLimit;
246     UsedAddressRange usedAddrRange;
247     // to keep 1st allocation large than requested, keep bootstrapping status
248     enum {
249         bootsrapMemNotDone = 0,
250         bootsrapMemInitializing,
251         bootsrapMemDone
252     };
253     std::atomic<intptr_t> bootsrapMemStatus;
254     MallocMutex      bootsrapMemStatusMutex;
255 
256     // Using of maximal observed requested size allows decrease
257     // memory consumption for small requests and decrease fragmentation
258     // for workloads when small and large allocation requests are mixed.
259     // TODO: decrease, not only increase it
260     std::atomic<size_t> maxRequestedSize;
261 
262     // register bins related to advance regions
263     AdvRegionsBins advRegBins;
264     // Storage for split FreeBlocks
265     IndexedBins freeLargeBlockBins,
266                 freeSlabAlignedBins;
267 
268     // Our friends
269     friend class BackendSync;
270 
271     /******************************** Backend methods ******************************/
272 
273     /*--------------------------- Coalescing functions ----------------------------*/
274     void coalescAndPut(FreeBlock *fBlock, size_t blockSz, bool slabAligned);
275     bool coalescAndPutList(FreeBlock *head, bool forceCoalescQDrop, bool reportBlocksProcessed);
276 
277     // Main coalescing operation
278     FreeBlock *doCoalesc(FreeBlock *fBlock, MemRegion **memRegion);
279 
280     // Queue for conflicted blocks during coalescing
281     bool scanCoalescQ(bool forceCoalescQDrop);
blocksInCoalescing()282     intptr_t blocksInCoalescing() const { return coalescQ.blocksInFly(); }
283 
284     /*--------------------- FreeBlock backend accessors ---------------------------*/
285     FreeBlock *genericGetBlock(int num, size_t size, bool slabAligned);
286     void genericPutBlock(FreeBlock *fBlock, size_t blockSz, bool slabAligned);
287 
288     // Split the block and return remaining parts to backend if possible
289     FreeBlock *splitBlock(FreeBlock *fBlock, int num, size_t size, bool isAligned, bool needAlignedBlock);
290 
291     void removeBlockFromBin(FreeBlock *fBlock);
292 
293     // TODO: combine with returnLargeObject
294     void putLargeBlock(LargeMemoryBlock *lmb);
295 
296     /*------------------- Starting point for OS allocation ------------------------*/
297     void requestBootstrapMem();
298     FreeBlock *askMemFromOS(size_t totalReqSize, intptr_t startModifiedCnt,
299                             int *lockedBinsThreshold, int numOfLockedBins,
300                             bool *splittable, bool needSlabRegion);
301 
302     /*---------------------- Memory regions allocation ----------------------------*/
303     FreeBlock *addNewRegion(size_t size, MemRegionType type, bool addToBin);
304     void releaseRegion(MemRegion *region);
305 
306     // TODO: combine in one initMemoryRegion function
307     FreeBlock *findBlockInRegion(MemRegion *region, size_t exactBlockSize);
308     void startUseBlock(MemRegion *region, FreeBlock *fBlock, bool addToBin);
309 
310     /*------------------------- Raw memory accessors ------------------------------*/
311     void *allocRawMem(size_t &size);
312     bool freeRawMem(void *object, size_t size);
313 
314     /*------------------------------ Cleanup functions ----------------------------*/
315     // Clean all memory from all caches (extMemPool hard cleanup)
316     FreeBlock *releaseMemInCaches(intptr_t startModifiedCnt, int *lockedBinsThreshold, int numOfLockedBins);
317     // Soft heap limit (regular cleanup, then maybe hard cleanup)
318     void releaseCachesToLimit();
319 
320     /*---------------------------------- Utility ----------------------------------*/
321     // TODO: move inside IndexedBins class
sizeToBin(size_t size)322     static int sizeToBin(size_t size) {
323         if (size >= maxBinned_HugePage)
324             return HUGE_BIN;
325         else if (size < minBinnedSize)
326             return NO_BIN;
327 
328         int bin = (size - minBinnedSize)/freeBinsStep;
329 
330         MALLOC_ASSERT(bin < HUGE_BIN, "Invalid size.");
331         return bin;
332     }
toAlignedBin(FreeBlock * block,size_t size)333     static bool toAlignedBin(FreeBlock *block, size_t size) {
334         return isAligned((char*)block + size, slabSize) && size >= slabSize;
335     }
336 
337 public:
338     /*--------------------- Init, reset, destroy, verify  -------------------------*/
339     void init(ExtMemoryPool *extMemoryPool);
340     bool destroy();
341 
342     void verify();
343     void reset();
344     bool clean(); // clean on caches cleanup
345 
346     /*------------------------- Slab block request --------------------------------*/
347     BlockI *getSlabBlock(int num);
348     void putSlabBlock(BlockI *block);
349 
350     /*-------------------------- Large object request -----------------------------*/
351     LargeMemoryBlock *getLargeBlock(size_t size);
352     // TODO: make consistent with getLargeBlock
353     void returnLargeObject(LargeMemoryBlock *lmb);
354 
355     /*-------------------------- Backreference memory request ----------------------*/
356     void *getBackRefSpace(size_t size, bool *rawMemUsed);
357     void putBackRefSpace(void *b, size_t size, bool rawMemUsed);
358 
359     /*----------------------------- Remap object ----------------------------------*/
360     void *remap(void *ptr, size_t oldSize, size_t newSize, size_t alignment);
361 
362     /*---------------------------- Validation -------------------------------------*/
363     bool inUserPool() const;
ptrCanBeValid(void * ptr)364     bool ptrCanBeValid(void *ptr) const { return usedAddrRange.inRange(ptr); }
365 
366     /*-------------------------- Configuration API --------------------------------*/
367     // Soft heap limit
setRecommendedMaxSize(size_t softLimit)368     void setRecommendedMaxSize(size_t softLimit) {
369         memSoftLimit = softLimit;
370         releaseCachesToLimit();
371     }
372 
373     /*------------------------------- Info ----------------------------------------*/
374     size_t getMaxBinnedSize() const;
375 
376     /*-------------------------- Testing, statistics ------------------------------*/
377 #if __TBB_MALLOC_WHITEBOX_TEST
getTotalMemSize()378     size_t getTotalMemSize() const { return totalMemSize.load(std::memory_order_relaxed); }
379 #endif
380 #if __TBB_MALLOC_BACKEND_STAT
381     void reportStat(FILE *f);
382 private:
binToSize(int bin)383     static size_t binToSize(int bin) {
384         MALLOC_ASSERT(bin <= HUGE_BIN, "Invalid bin.");
385 
386         return bin*freeBinsStep + minBinnedSize;
387     }
388 #endif
389 };
390 
391 #endif // __TBB_backend_H
392