1 /* 2 Copyright (c) 2005-2021 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 #ifndef __TBB_tbbmalloc_internal_H 18 #error tbbmalloc_internal.h must be included at this point 19 #endif 20 21 #ifndef __TBB_backend_H 22 #define __TBB_backend_H 23 24 // Included from namespace rml::internal 25 26 // global state of blocks currently in processing 27 class BackendSync { 28 // Class instances should reside in zero-initialized memory! 29 // The number of blocks currently removed from a bin and not returned back 30 std::atomic<intptr_t> inFlyBlocks; // to another 31 std::atomic<intptr_t> binsModifications; // incremented on every bin modification 32 Backend *backend; 33 public: init(Backend * b)34 void init(Backend *b) { backend = b; } blockConsumed()35 void blockConsumed() { inFlyBlocks++; } binsModified()36 void binsModified() { binsModifications++; } blockReleased()37 void blockReleased() { 38 #if __TBB_MALLOC_BACKEND_STAT 39 MALLOC_ITT_SYNC_RELEASING(&inFlyBlocks); 40 #endif 41 binsModifications++; 42 intptr_t prev = inFlyBlocks.fetch_sub(1); 43 MALLOC_ASSERT(prev > 0, ASSERT_TEXT); 44 suppress_unused_warning(prev); 45 } getNumOfMods()46 intptr_t getNumOfMods() const { return binsModifications.load(std::memory_order_acquire); } 47 // return true if need re-do the blocks search 48 inline bool waitTillBlockReleased(intptr_t startModifiedCnt); 49 }; 50 51 class CoalRequestQ { // queue of free blocks that coalescing was delayed 52 private: 53 std::atomic<FreeBlock*> blocksToFree; 54 BackendSync *bkndSync; 55 // counted blocks in blocksToFree and that are leaved blocksToFree 56 // and still in active coalescing 57 std::atomic<intptr_t> inFlyBlocks; 58 public: init(BackendSync * bSync)59 void init(BackendSync *bSync) { bkndSync = bSync; } 60 FreeBlock *getAll(); // return current list of blocks and make queue empty 61 void putBlock(FreeBlock *fBlock); 62 inline void blockWasProcessed(); blocksInFly()63 intptr_t blocksInFly() const { return inFlyBlocks.load(std::memory_order_acquire); } 64 }; 65 66 class MemExtendingSema { 67 std::atomic<intptr_t> active; 68 public: wait()69 bool wait() { 70 bool rescanBins = false; 71 // up to 3 threads can add more memory from OS simultaneously, 72 // rest of threads have to wait 73 intptr_t prevCnt = active.load(std::memory_order_acquire); 74 for (;;) { 75 if (prevCnt < 3) { 76 if (active.compare_exchange_strong(prevCnt, prevCnt + 1)) { 77 break; 78 } 79 } else { 80 SpinWaitWhileEq(active, prevCnt); 81 rescanBins = true; 82 break; 83 } 84 } 85 return rescanBins; 86 } signal()87 void signal() { active.fetch_sub(1); } 88 }; 89 90 enum MemRegionType { 91 // The region holds only slabs 92 MEMREG_SLAB_BLOCKS = 0, 93 // The region can hold several large object blocks 94 MEMREG_LARGE_BLOCKS, 95 // The region holds only one block with a requested size 96 MEMREG_ONE_BLOCK 97 }; 98 99 class MemRegionList { 100 MallocMutex regionListLock; 101 public: 102 MemRegion *head; 103 void add(MemRegion *r); 104 void remove(MemRegion *r); 105 int reportStat(FILE *f); 106 }; 107 108 class Backend { 109 private: 110 /* Blocks in range [minBinnedSize; getMaxBinnedSize()] are kept in bins, 111 one region can contains several blocks. Larger blocks are allocated directly 112 and one region always contains one block. 113 */ 114 enum { 115 minBinnedSize = 8*1024UL, 116 /* If huge pages are available, maxBinned_HugePage used. 117 If not, maxBinned_SmallPage is the threshold. 118 TODO: use pool's granularity for upper bound setting.*/ 119 maxBinned_SmallPage = 1024*1024UL, 120 // TODO: support other page sizes 121 maxBinned_HugePage = 4*1024*1024UL 122 }; 123 enum { 124 VALID_BLOCK_IN_BIN = 1 // valid block added to bin, not returned as result 125 }; 126 public: 127 // Backend bins step is the same as CacheStep for large object cache 128 static const size_t freeBinsStep = LargeObjectCache::LargeBSProps::CacheStep; 129 static const unsigned freeBinsNum = (maxBinned_HugePage-minBinnedSize)/freeBinsStep + 1; 130 131 // if previous access missed per-thread slabs pool, 132 // allocate numOfSlabAllocOnMiss blocks in advance 133 static const int numOfSlabAllocOnMiss = 2; 134 135 enum { 136 NO_BIN = -1, 137 // special bin for blocks >= maxBinned_HugePage, blocks go to this bin 138 // when pool is created with keepAllMemory policy 139 // TODO: currently this bin is scanned using "1st fit", as it accumulates 140 // blocks of different sizes, "best fit" is preferred in terms of fragmentation 141 HUGE_BIN = freeBinsNum-1 142 }; 143 144 // Bin keeps 2-linked list of free blocks. It must be 2-linked 145 // because during coalescing a block it's removed from a middle of the list. 146 struct Bin { 147 std::atomic<FreeBlock*> head; 148 FreeBlock* tail; 149 MallocMutex tLock; 150 151 void removeBlock(FreeBlock *fBlock); resetBin152 void reset() { 153 head.store(nullptr, std::memory_order_relaxed); 154 tail = nullptr; 155 } emptyBin156 bool empty() const { return !head.load(std::memory_order_relaxed); } 157 158 size_t countFreeBlocks(); 159 size_t reportFreeBlocks(FILE *f); 160 void reportStat(FILE *f); 161 }; 162 163 typedef BitMaskMin<Backend::freeBinsNum> BitMaskBins; 164 165 // array of bins supplemented with bitmask for fast finding of non-empty bins 166 class IndexedBins { 167 BitMaskBins bitMask; 168 Bin freeBins[Backend::freeBinsNum]; 169 FreeBlock *getFromBin(int binIdx, BackendSync *sync, size_t size, 170 bool needAlignedBlock, bool alignedBin, bool wait, int *resLocked); 171 public: 172 FreeBlock *findBlock(int nativeBin, BackendSync *sync, size_t size, 173 bool needAlignedBlock, bool alignedBin,int *numOfLockedBins); 174 bool tryReleaseRegions(int binIdx, Backend *backend); 175 void lockRemoveBlock(int binIdx, FreeBlock *fBlock); 176 void addBlock(int binIdx, FreeBlock *fBlock, size_t blockSz, bool addToTail); 177 bool tryAddBlock(int binIdx, FreeBlock *fBlock, bool addToTail); 178 int getMinNonemptyBin(unsigned startBin) const; 179 void verify(); 180 void reset(); 181 void reportStat(FILE *f); 182 }; 183 184 private: 185 class AdvRegionsBins { 186 BitMaskBins bins; 187 public: registerBin(int regBin)188 void registerBin(int regBin) { bins.set(regBin, 1); } getMinUsedBin(int start)189 int getMinUsedBin(int start) const { return bins.getMinTrue(start); } reset()190 void reset() { bins.reset(); } 191 }; 192 // auxiliary class to atomic maximum request finding 193 class MaxRequestComparator { 194 const Backend *backend; 195 public: MaxRequestComparator(const Backend * be)196 MaxRequestComparator(const Backend *be) : backend(be) {} 197 inline bool operator()(size_t oldMaxReq, size_t requestSize) const; 198 }; 199 200 #if CHECK_ALLOCATION_RANGE 201 // Keep min and max of all addresses requested from OS, 202 // use it for checking memory possibly allocated by replaced allocators 203 // and for debugging purposes. Valid only for default memory pool. 204 class UsedAddressRange { 205 static const uintptr_t ADDRESS_UPPER_BOUND = UINTPTR_MAX; 206 207 std::atomic<uintptr_t> leftBound, 208 rightBound; 209 MallocMutex mutex; 210 public: 211 // rightBound is zero-initialized init()212 void init() { leftBound.store(ADDRESS_UPPER_BOUND, std::memory_order_relaxed); } 213 void registerAlloc(uintptr_t left, uintptr_t right); 214 void registerFree(uintptr_t left, uintptr_t right); 215 // as only left and right bounds are kept, we can return true 216 // for pointer not allocated by us, if more than single region 217 // was requested from OS inRange(void * ptr)218 bool inRange(void *ptr) const { 219 const uintptr_t p = (uintptr_t)ptr; 220 return leftBound.load(std::memory_order_relaxed)<=p && 221 p<=rightBound.load(std::memory_order_relaxed); 222 } 223 }; 224 #else 225 class UsedAddressRange { 226 public: init()227 void init() { } registerAlloc(uintptr_t,uintptr_t)228 void registerAlloc(uintptr_t, uintptr_t) {} registerFree(uintptr_t,uintptr_t)229 void registerFree(uintptr_t, uintptr_t) {} inRange(void *)230 bool inRange(void *) const { return true; } 231 }; 232 #endif 233 234 ExtMemoryPool *extMemPool; 235 // used for release every region on pool destroying 236 MemRegionList regionList; 237 238 CoalRequestQ coalescQ; // queue of coalescing requests 239 BackendSync bkndSync; 240 // semaphore protecting adding more more memory from OS 241 MemExtendingSema memExtendingSema; 242 //size_t totalMemSize, 243 // memSoftLimit; 244 std::atomic<size_t> totalMemSize; 245 std::atomic<size_t> memSoftLimit; 246 UsedAddressRange usedAddrRange; 247 // to keep 1st allocation large than requested, keep bootstrapping status 248 enum { 249 bootsrapMemNotDone = 0, 250 bootsrapMemInitializing, 251 bootsrapMemDone 252 }; 253 std::atomic<intptr_t> bootsrapMemStatus; 254 MallocMutex bootsrapMemStatusMutex; 255 256 // Using of maximal observed requested size allows decrease 257 // memory consumption for small requests and decrease fragmentation 258 // for workloads when small and large allocation requests are mixed. 259 // TODO: decrease, not only increase it 260 std::atomic<size_t> maxRequestedSize; 261 262 // register bins related to advance regions 263 AdvRegionsBins advRegBins; 264 // Storage for split FreeBlocks 265 IndexedBins freeLargeBlockBins, 266 freeSlabAlignedBins; 267 268 // Our friends 269 friend class BackendSync; 270 271 /******************************** Backend methods ******************************/ 272 273 /*--------------------------- Coalescing functions ----------------------------*/ 274 void coalescAndPut(FreeBlock *fBlock, size_t blockSz, bool slabAligned); 275 bool coalescAndPutList(FreeBlock *head, bool forceCoalescQDrop, bool reportBlocksProcessed); 276 277 // Main coalescing operation 278 FreeBlock *doCoalesc(FreeBlock *fBlock, MemRegion **memRegion); 279 280 // Queue for conflicted blocks during coalescing 281 bool scanCoalescQ(bool forceCoalescQDrop); blocksInCoalescing()282 intptr_t blocksInCoalescing() const { return coalescQ.blocksInFly(); } 283 284 /*--------------------- FreeBlock backend accessors ---------------------------*/ 285 FreeBlock *genericGetBlock(int num, size_t size, bool slabAligned); 286 void genericPutBlock(FreeBlock *fBlock, size_t blockSz, bool slabAligned); 287 288 // Split the block and return remaining parts to backend if possible 289 FreeBlock *splitBlock(FreeBlock *fBlock, int num, size_t size, bool isAligned, bool needAlignedBlock); 290 291 void removeBlockFromBin(FreeBlock *fBlock); 292 293 // TODO: combine with returnLargeObject 294 void putLargeBlock(LargeMemoryBlock *lmb); 295 296 /*------------------- Starting point for OS allocation ------------------------*/ 297 void requestBootstrapMem(); 298 FreeBlock *askMemFromOS(size_t totalReqSize, intptr_t startModifiedCnt, 299 int *lockedBinsThreshold, int numOfLockedBins, 300 bool *splittable, bool needSlabRegion); 301 302 /*---------------------- Memory regions allocation ----------------------------*/ 303 FreeBlock *addNewRegion(size_t size, MemRegionType type, bool addToBin); 304 void releaseRegion(MemRegion *region); 305 306 // TODO: combine in one initMemoryRegion function 307 FreeBlock *findBlockInRegion(MemRegion *region, size_t exactBlockSize); 308 void startUseBlock(MemRegion *region, FreeBlock *fBlock, bool addToBin); 309 310 /*------------------------- Raw memory accessors ------------------------------*/ 311 void *allocRawMem(size_t &size); 312 bool freeRawMem(void *object, size_t size); 313 314 /*------------------------------ Cleanup functions ----------------------------*/ 315 // Clean all memory from all caches (extMemPool hard cleanup) 316 FreeBlock *releaseMemInCaches(intptr_t startModifiedCnt, int *lockedBinsThreshold, int numOfLockedBins); 317 // Soft heap limit (regular cleanup, then maybe hard cleanup) 318 void releaseCachesToLimit(); 319 320 /*---------------------------------- Utility ----------------------------------*/ 321 // TODO: move inside IndexedBins class sizeToBin(size_t size)322 static int sizeToBin(size_t size) { 323 if (size >= maxBinned_HugePage) 324 return HUGE_BIN; 325 else if (size < minBinnedSize) 326 return NO_BIN; 327 328 int bin = (size - minBinnedSize)/freeBinsStep; 329 330 MALLOC_ASSERT(bin < HUGE_BIN, "Invalid size."); 331 return bin; 332 } toAlignedBin(FreeBlock * block,size_t size)333 static bool toAlignedBin(FreeBlock *block, size_t size) { 334 return isAligned((char*)block + size, slabSize) && size >= slabSize; 335 } 336 337 public: 338 /*--------------------- Init, reset, destroy, verify -------------------------*/ 339 void init(ExtMemoryPool *extMemoryPool); 340 bool destroy(); 341 342 void verify(); 343 void reset(); 344 bool clean(); // clean on caches cleanup 345 346 /*------------------------- Slab block request --------------------------------*/ 347 BlockI *getSlabBlock(int num); 348 void putSlabBlock(BlockI *block); 349 350 /*-------------------------- Large object request -----------------------------*/ 351 LargeMemoryBlock *getLargeBlock(size_t size); 352 // TODO: make consistent with getLargeBlock 353 void returnLargeObject(LargeMemoryBlock *lmb); 354 355 /*-------------------------- Backreference memory request ----------------------*/ 356 void *getBackRefSpace(size_t size, bool *rawMemUsed); 357 void putBackRefSpace(void *b, size_t size, bool rawMemUsed); 358 359 /*----------------------------- Remap object ----------------------------------*/ 360 void *remap(void *ptr, size_t oldSize, size_t newSize, size_t alignment); 361 362 /*---------------------------- Validation -------------------------------------*/ 363 bool inUserPool() const; ptrCanBeValid(void * ptr)364 bool ptrCanBeValid(void *ptr) const { return usedAddrRange.inRange(ptr); } 365 366 /*-------------------------- Configuration API --------------------------------*/ 367 // Soft heap limit setRecommendedMaxSize(size_t softLimit)368 void setRecommendedMaxSize(size_t softLimit) { 369 memSoftLimit = softLimit; 370 releaseCachesToLimit(); 371 } 372 373 /*------------------------------- Info ----------------------------------------*/ 374 size_t getMaxBinnedSize() const; 375 376 /*-------------------------- Testing, statistics ------------------------------*/ 377 #if __TBB_MALLOC_WHITEBOX_TEST getTotalMemSize()378 size_t getTotalMemSize() const { return totalMemSize.load(std::memory_order_relaxed); } 379 #endif 380 #if __TBB_MALLOC_BACKEND_STAT 381 void reportStat(FILE *f); 382 private: binToSize(int bin)383 static size_t binToSize(int bin) { 384 MALLOC_ASSERT(bin <= HUGE_BIN, "Invalid bin."); 385 386 return bin*freeBinsStep + minBinnedSize; 387 } 388 #endif 389 }; 390 391 #endif // __TBB_backend_H 392