1 /*
2     Copyright (c) 2005-2020 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #ifndef __TBB_tbbmalloc_internal_H
18     #error tbbmalloc_internal.h must be included at this point
19 #endif
20 
21 #ifndef __TBB_backend_H
22 #define __TBB_backend_H
23 
24 // Included from namespace rml::internal
25 
26 // global state of blocks currently in processing
27 class BackendSync {
28     // Class instances should reside in zero-initialized memory!
29     // The number of blocks currently removed from a bin and not returned back
30     intptr_t inFlyBlocks;         // to another
31     intptr_t binsModifications;   // incremented on every bin modification
32     Backend *backend;
33 public:
init(Backend * b)34     void init(Backend *b) { backend = b; }
blockConsumed()35     void blockConsumed() { AtomicIncrement(inFlyBlocks); }
binsModified()36     void binsModified() { AtomicIncrement(binsModifications); }
blockReleased()37     void blockReleased() {
38 #if __TBB_MALLOC_BACKEND_STAT
39         MALLOC_ITT_SYNC_RELEASING(&inFlyBlocks);
40 #endif
41         AtomicIncrement(binsModifications);
42         intptr_t prev = AtomicAdd(inFlyBlocks, -1);
43         MALLOC_ASSERT(prev > 0, ASSERT_TEXT);
44         suppress_unused_warning(prev);
45     }
getNumOfMods()46     intptr_t getNumOfMods() const { return FencedLoad(binsModifications); }
47     // return true if need re-do the blocks search
48     inline bool waitTillBlockReleased(intptr_t startModifiedCnt);
49 };
50 
51 class CoalRequestQ { // queue of free blocks that coalescing was delayed
52 private:
53     FreeBlock   *blocksToFree;
54     BackendSync *bkndSync;
55     // counted blocks in blocksToFree and that are leaved blocksToFree
56     // and still in active coalescing
57     intptr_t     inFlyBlocks;
58 public:
init(BackendSync * bSync)59     void init(BackendSync *bSync) { bkndSync = bSync; }
60     FreeBlock *getAll(); // return current list of blocks and make queue empty
61     void putBlock(FreeBlock *fBlock);
62     inline void blockWasProcessed();
blocksInFly()63     intptr_t blocksInFly() const { return FencedLoad(inFlyBlocks); }
64 };
65 
66 class MemExtendingSema {
67     intptr_t     active;
68 public:
wait()69     bool wait() {
70         bool rescanBins = false;
71         // up to 3 threads can add more memory from OS simultaneously,
72         // rest of threads have to wait
73         for (;;) {
74             intptr_t prevCnt = FencedLoad(active);
75             if (prevCnt < 3) {
76                 intptr_t n = AtomicCompareExchange(active, prevCnt+1, prevCnt);
77                 if (n == prevCnt)
78                     break;
79             } else {
80                 SpinWaitWhileEq(active, prevCnt);
81                 rescanBins = true;
82                 break;
83             }
84         }
85         return rescanBins;
86     }
signal()87     void signal() { AtomicAdd(active, -1); }
88 };
89 
90 enum MemRegionType {
91     // The region holds only slabs
92     MEMREG_SLAB_BLOCKS = 0,
93     // The region can hold several large object blocks
94     MEMREG_LARGE_BLOCKS,
95     // The region holds only one block with a requested size
96     MEMREG_ONE_BLOCK
97 };
98 
99 class MemRegionList {
100     MallocMutex regionListLock;
101 public:
102     MemRegion  *head;
103     void add(MemRegion *r);
104     void remove(MemRegion *r);
105     int reportStat(FILE *f);
106 };
107 
108 class Backend {
109 private:
110 /* Blocks in range [minBinnedSize; getMaxBinnedSize()] are kept in bins,
111    one region can contains several blocks. Larger blocks are allocated directly
112    and one region always contains one block.
113 */
114     enum {
115         minBinnedSize = 8*1024UL,
116         /*   If huge pages are available, maxBinned_HugePage used.
117              If not, maxBinned_SmallPage is the threshold.
118              TODO: use pool's granularity for upper bound setting.*/
119         maxBinned_SmallPage = 1024*1024UL,
120         // TODO: support other page sizes
121         maxBinned_HugePage = 4*1024*1024UL
122     };
123     enum {
124         VALID_BLOCK_IN_BIN = 1 // valid block added to bin, not returned as result
125     };
126 public:
127     // Backend bins step is the same as CacheStep for large object cache
128     static const size_t   freeBinsStep = LargeObjectCache::LargeBSProps::CacheStep;
129     static const unsigned freeBinsNum = (maxBinned_HugePage-minBinnedSize)/freeBinsStep + 1;
130 
131     // if previous access missed per-thread slabs pool,
132     // allocate numOfSlabAllocOnMiss blocks in advance
133     static const int numOfSlabAllocOnMiss = 2;
134 
135     enum {
136         NO_BIN = -1,
137         // special bin for blocks >= maxBinned_HugePage, blocks go to this bin
138         // when pool is created with keepAllMemory policy
139         // TODO: currently this bin is scanned using "1st fit", as it accumulates
140         // blocks of different sizes, "best fit" is preferred in terms of fragmentation
141         HUGE_BIN = freeBinsNum-1
142     };
143 
144     // Bin keeps 2-linked list of free blocks. It must be 2-linked
145     // because during coalescing a block it's removed from a middle of the list.
146     struct Bin {
147         FreeBlock   *head,
148                     *tail;
149         MallocMutex  tLock;
150 
151         void removeBlock(FreeBlock *fBlock);
resetBin152         void reset() { head = tail = 0; }
emptyBin153         bool empty() const { return !head; }
154 
155         size_t countFreeBlocks();
156         size_t reportFreeBlocks(FILE *f);
157         void reportStat(FILE *f);
158     };
159 
160     typedef BitMaskMin<Backend::freeBinsNum> BitMaskBins;
161 
162     // array of bins supplemented with bitmask for fast finding of non-empty bins
163     class IndexedBins {
164         BitMaskBins bitMask;
165         Bin         freeBins[Backend::freeBinsNum];
166         FreeBlock *getFromBin(int binIdx, BackendSync *sync, size_t size,
167                 bool needAlignedBlock, bool alignedBin, bool wait, int *resLocked);
168     public:
169         FreeBlock *findBlock(int nativeBin, BackendSync *sync, size_t size,
170                 bool needAlignedBlock, bool alignedBin,int *numOfLockedBins);
171         bool tryReleaseRegions(int binIdx, Backend *backend);
172         void lockRemoveBlock(int binIdx, FreeBlock *fBlock);
173         void addBlock(int binIdx, FreeBlock *fBlock, size_t blockSz, bool addToTail);
174         bool tryAddBlock(int binIdx, FreeBlock *fBlock, bool addToTail);
175         int  getMinNonemptyBin(unsigned startBin) const;
176         void verify();
177         void reset();
178         void reportStat(FILE *f);
179     };
180 
181 private:
182     class AdvRegionsBins {
183         BitMaskBins bins;
184     public:
registerBin(int regBin)185         void registerBin(int regBin) { bins.set(regBin, 1); }
getMinUsedBin(int start)186         int getMinUsedBin(int start) const { return bins.getMinTrue(start); }
reset()187         void reset() { bins.reset(); }
188     };
189     // auxiliary class to atomic maximum request finding
190     class MaxRequestComparator {
191         const Backend *backend;
192     public:
MaxRequestComparator(const Backend * be)193         MaxRequestComparator(const Backend *be) : backend(be) {}
194         inline bool operator()(size_t oldMaxReq, size_t requestSize) const;
195     };
196 
197 #if CHECK_ALLOCATION_RANGE
198     // Keep min and max of all addresses requested from OS,
199     // use it for checking memory possibly allocated by replaced allocators
200     // and for debugging purposes. Valid only for default memory pool.
201     class UsedAddressRange {
202         static const uintptr_t ADDRESS_UPPER_BOUND = UINTPTR_MAX;
203 
204         uintptr_t   leftBound,
205                     rightBound;
206         MallocMutex mutex;
207     public:
208         // rightBound is zero-initialized
init()209         void init() { leftBound = ADDRESS_UPPER_BOUND; }
210         void registerAlloc(uintptr_t left, uintptr_t right);
211         void registerFree(uintptr_t left, uintptr_t right);
212         // as only left and right bounds are kept, we can return true
213         // for pointer not allocated by us, if more than single region
214         // was requested from OS
inRange(void * ptr)215         bool inRange(void *ptr) const {
216             const uintptr_t p = (uintptr_t)ptr;
217             return leftBound<=p && p<=rightBound;
218         }
219     };
220 #else
221     class UsedAddressRange {
222     public:
init()223         void init() { }
registerAlloc(uintptr_t,uintptr_t)224         void registerAlloc(uintptr_t, uintptr_t) {}
registerFree(uintptr_t,uintptr_t)225         void registerFree(uintptr_t, uintptr_t) {}
inRange(void *)226         bool inRange(void *) const { return true; }
227     };
228 #endif
229 
230     ExtMemoryPool   *extMemPool;
231     // used for release every region on pool destroying
232     MemRegionList    regionList;
233 
234     CoalRequestQ     coalescQ; // queue of coalescing requests
235     BackendSync      bkndSync;
236     // semaphore protecting adding more more memory from OS
237     MemExtendingSema memExtendingSema;
238     size_t           totalMemSize,
239                      memSoftLimit;
240     UsedAddressRange usedAddrRange;
241     // to keep 1st allocation large than requested, keep bootstrapping status
242     enum {
243         bootsrapMemNotDone = 0,
244         bootsrapMemInitializing,
245         bootsrapMemDone
246     };
247     intptr_t         bootsrapMemStatus;
248     MallocMutex      bootsrapMemStatusMutex;
249 
250     // Using of maximal observed requested size allows decrease
251     // memory consumption for small requests and decrease fragmentation
252     // for workloads when small and large allocation requests are mixed.
253     // TODO: decrease, not only increase it
254     size_t           maxRequestedSize;
255 
256     // register bins related to advance regions
257     AdvRegionsBins advRegBins;
258     // Storage for split FreeBlocks
259     IndexedBins freeLargeBlockBins,
260                 freeSlabAlignedBins;
261 
262     // Our friends
263     friend class BackendSync;
264 
265     /******************************** Backend methods ******************************/
266 
267     /*--------------------------- Coalescing functions ----------------------------*/
268     void coalescAndPut(FreeBlock *fBlock, size_t blockSz, bool slabAligned);
269     bool coalescAndPutList(FreeBlock *head, bool forceCoalescQDrop, bool reportBlocksProcessed);
270 
271     // Main coalescing operation
272     FreeBlock *doCoalesc(FreeBlock *fBlock, MemRegion **memRegion);
273 
274     // Queue for conflicted blocks during coalescing
275     bool scanCoalescQ(bool forceCoalescQDrop);
blocksInCoalescing()276     intptr_t blocksInCoalescing() const { return coalescQ.blocksInFly(); }
277 
278     /*--------------------- FreeBlock backend accessors ---------------------------*/
279     FreeBlock *genericGetBlock(int num, size_t size, bool slabAligned);
280     void genericPutBlock(FreeBlock *fBlock, size_t blockSz, bool slabAligned);
281 
282     // Split the block and return remaining parts to backend if possible
283     FreeBlock *splitBlock(FreeBlock *fBlock, int num, size_t size, bool isAligned, bool needAlignedBlock);
284 
285     void removeBlockFromBin(FreeBlock *fBlock);
286 
287     // TODO: combine with returnLargeObject
288     void putLargeBlock(LargeMemoryBlock *lmb);
289 
290     /*------------------- Starting point for OS allocation ------------------------*/
291     void requestBootstrapMem();
292     FreeBlock *askMemFromOS(size_t totalReqSize, intptr_t startModifiedCnt,
293                             int *lockedBinsThreshold, int numOfLockedBins,
294                             bool *splittable, bool needSlabRegion);
295 
296     /*---------------------- Memory regions allocation ----------------------------*/
297     FreeBlock *addNewRegion(size_t size, MemRegionType type, bool addToBin);
298     void releaseRegion(MemRegion *region);
299 
300     // TODO: combine in one initMemoryRegion function
301     FreeBlock *findBlockInRegion(MemRegion *region, size_t exactBlockSize);
302     void startUseBlock(MemRegion *region, FreeBlock *fBlock, bool addToBin);
303 
304     /*------------------------- Raw memory accessors ------------------------------*/
305     void *allocRawMem(size_t &size);
306     bool freeRawMem(void *object, size_t size);
307 
308     /*------------------------------ Cleanup functions ----------------------------*/
309     // Clean all memory from all caches (extMemPool hard cleanup)
310     FreeBlock *releaseMemInCaches(intptr_t startModifiedCnt, int *lockedBinsThreshold, int numOfLockedBins);
311     // Soft heap limit (regular cleanup, then maybe hard cleanup)
312     void releaseCachesToLimit();
313 
314     /*---------------------------------- Utility ----------------------------------*/
315     // TODO: move inside IndexedBins class
sizeToBin(size_t size)316     static int sizeToBin(size_t size) {
317         if (size >= maxBinned_HugePage)
318             return HUGE_BIN;
319         else if (size < minBinnedSize)
320             return NO_BIN;
321 
322         int bin = (size - minBinnedSize)/freeBinsStep;
323 
324         MALLOC_ASSERT(bin < HUGE_BIN, "Invalid size.");
325         return bin;
326     }
toAlignedBin(FreeBlock * block,size_t size)327     static bool toAlignedBin(FreeBlock *block, size_t size) {
328         return isAligned((char*)block + size, slabSize) && size >= slabSize;
329     }
330 
331 public:
332     /*--------------------- Init, reset, destroy, verify  -------------------------*/
333     void init(ExtMemoryPool *extMemoryPool);
334     bool destroy();
335 
336     void verify();
337     void reset();
338     bool clean(); // clean on caches cleanup
339 
340     /*------------------------- Slab block request --------------------------------*/
341     BlockI *getSlabBlock(int num);
342     void putSlabBlock(BlockI *block);
343 
344     /*-------------------------- Large object request -----------------------------*/
345     LargeMemoryBlock *getLargeBlock(size_t size);
346     // TODO: make consistent with getLargeBlock
347     void returnLargeObject(LargeMemoryBlock *lmb);
348 
349     /*-------------------------- Backreference memory request ----------------------*/
350     void *getBackRefSpace(size_t size, bool *rawMemUsed);
351     void putBackRefSpace(void *b, size_t size, bool rawMemUsed);
352 
353     /*----------------------------- Remap object ----------------------------------*/
354     void *remap(void *ptr, size_t oldSize, size_t newSize, size_t alignment);
355 
356     /*---------------------------- Validation -------------------------------------*/
357     bool inUserPool() const;
ptrCanBeValid(void * ptr)358     bool ptrCanBeValid(void *ptr) const { return usedAddrRange.inRange(ptr); }
359 
360     /*-------------------------- Configuration API --------------------------------*/
361     // Soft heap limit
setRecommendedMaxSize(size_t softLimit)362     void setRecommendedMaxSize(size_t softLimit) {
363         memSoftLimit = softLimit;
364         releaseCachesToLimit();
365     }
366 
367     /*------------------------------- Info ----------------------------------------*/
368     size_t getMaxBinnedSize() const;
369 
370     /*-------------------------- Testing, statistics ------------------------------*/
371 #if __TBB_MALLOC_WHITEBOX_TEST
getTotalMemSize()372     size_t getTotalMemSize() const { return totalMemSize; }
373 #endif
374 #if __TBB_MALLOC_BACKEND_STAT
375     void reportStat(FILE *f);
376 private:
binToSize(int bin)377     static size_t binToSize(int bin) {
378         MALLOC_ASSERT(bin <= HUGE_BIN, "Invalid bin.");
379 
380         return bin*freeBinsStep + minBinnedSize;
381     }
382 #endif
383 };
384 
385 #endif // __TBB_backend_H
386