1 /*
2 Copyright (c) 2005-2020 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 */
16
17
18 #include "tbbmalloc_internal.h"
19 #include <errno.h>
20 #include <new> /* for placement new */
21 #include <string.h> /* for memset */
22
23 #include "../tbb/tbb_version.h"
24 #include "../tbb/tbb_environment.h"
25 #include "../tbb/itt_notify.h" // for __TBB_load_ittnotify()
26
27 #if USE_PTHREAD
28 #define TlsSetValue_func pthread_setspecific
29 #define TlsGetValue_func pthread_getspecific
30 #define GetMyTID() pthread_self()
31 #include <sched.h>
do_yield()32 inline void do_yield() {sched_yield();}
33 extern "C" { static void mallocThreadShutdownNotification(void*); }
34 #if __sun || __SUNPRO_CC
35 #define __asm__ asm
36 #endif
37 #include <unistd.h> // sysconf(_SC_PAGESIZE)
38 #elif USE_WINTHREAD
39 #define GetMyTID() GetCurrentThreadId()
40 #if __TBB_WIN8UI_SUPPORT
41 #include<thread>
42 #define TlsSetValue_func FlsSetValue
43 #define TlsGetValue_func FlsGetValue
44 #define TlsAlloc() FlsAlloc(NULL)
45 #define TLS_ALLOC_FAILURE FLS_OUT_OF_INDEXES
46 #define TlsFree FlsFree
do_yield()47 inline void do_yield() {std::this_thread::yield();}
48 #else
49 #define TlsSetValue_func TlsSetValue
50 #define TlsGetValue_func TlsGetValue
51 #define TLS_ALLOC_FAILURE TLS_OUT_OF_INDEXES
do_yield()52 inline void do_yield() {SwitchToThread();}
53 #endif
54 #else
55 #error Must define USE_PTHREAD or USE_WINTHREAD
56
57 #endif
58
59
60 #define FREELIST_NONBLOCKING 1
61
62 namespace rml {
63 class MemoryPool;
64 namespace internal {
65
66 class Block;
67 class MemoryPool;
68
69 #if MALLOC_CHECK_RECURSION
70
71 inline bool isMallocInitialized();
72
noRecursion()73 bool RecursiveMallocCallProtector::noRecursion() {
74 MALLOC_ASSERT(isMallocInitialized(),
75 "Recursion status can be checked only when initialization was done.");
76 return !mallocRecursionDetected;
77 }
78
79 #endif // MALLOC_CHECK_RECURSION
80
81 /** Support for handling the special UNUSABLE pointer state **/
82 const intptr_t UNUSABLE = 0x1;
isSolidPtr(void * ptr)83 inline bool isSolidPtr( void* ptr ) {
84 return (UNUSABLE|(intptr_t)ptr)!=UNUSABLE;
85 }
isNotForUse(void * ptr)86 inline bool isNotForUse( void* ptr ) {
87 return (intptr_t)ptr==UNUSABLE;
88 }
89
90 /*
91 * Block::objectSize value used to mark blocks allocated by startupAlloc
92 */
93 const uint16_t startupAllocObjSizeMark = ~(uint16_t)0;
94
95 /*
96 * The following constant is used to define the size of struct Block, the block header.
97 * The intent is to have the size of a Block multiple of the cache line size, this allows us to
98 * get good alignment at the cost of some overhead equal to the amount of padding included in the Block.
99 */
100 const int blockHeaderAlignment = estimatedCacheLineSize;
101
102 /********* The data structures and global objects **************/
103
104 /*
105 * The malloc routines themselves need to be able to occasionally malloc some space,
106 * in order to set up the structures used by the thread local structures. This
107 * routine performs that functions.
108 */
109 class BootStrapBlocks {
110 MallocMutex bootStrapLock;
111 Block *bootStrapBlock;
112 Block *bootStrapBlockUsed;
113 FreeObject *bootStrapObjectList;
114 public:
115 void *allocate(MemoryPool *memPool, size_t size);
116 void free(void* ptr);
117 void reset();
118 };
119
120 #if USE_INTERNAL_TID
121 class ThreadId {
122 static tls_key_t Tid_key;
123 static intptr_t ThreadCount;
124
125 unsigned int id;
126
tlsNumber()127 static unsigned int tlsNumber() {
128 unsigned int result = reinterpret_cast<intptr_t>(TlsGetValue_func(Tid_key));
129 if( !result ) {
130 RecursiveMallocCallProtector scoped;
131 // Thread-local value is zero -> first call from this thread,
132 // need to initialize with next ID value (IDs start from 1)
133 result = AtomicIncrement(ThreadCount); // returned new value!
134 TlsSetValue_func( Tid_key, reinterpret_cast<void*>(result) );
135 }
136 return result;
137 }
138 public:
init()139 static bool init() {
140 #if USE_WINTHREAD
141 Tid_key = TlsAlloc();
142 if (Tid_key == TLS_ALLOC_FAILURE)
143 return false;
144 #else
145 int status = pthread_key_create( &Tid_key, NULL );
146 if ( status ) {
147 fprintf (stderr, "The memory manager cannot create tls key during initialization\n");
148 return false;
149 }
150 #endif /* USE_WINTHREAD */
151 return true;
152 }
destroy()153 static void destroy() {
154 if( Tid_key ) {
155 #if USE_WINTHREAD
156 BOOL status = !(TlsFree( Tid_key )); // fail is zero
157 #else
158 int status = pthread_key_delete( Tid_key );
159 #endif /* USE_WINTHREAD */
160 if ( status )
161 fprintf (stderr, "The memory manager cannot delete tls key\n");
162 Tid_key = 0;
163 }
164 }
165
ThreadId()166 ThreadId() : id(ThreadId::tlsNumber()) {}
isCurrentThreadId() const167 bool isCurrentThreadId() const { return id == ThreadId::tlsNumber(); }
168
169 #if COLLECT_STATISTICS || MALLOC_TRACE
getThreadId()170 friend unsigned int getThreadId() { return ThreadId::tlsNumber(); }
171 #endif
172 #if COLLECT_STATISTICS
getMaxThreadId()173 static unsigned getMaxThreadId() { return ThreadCount; }
174
175 friend int STAT_increment(ThreadId tid, int bin, int ctr);
176 #endif
177 };
178
179 tls_key_t ThreadId::Tid_key;
180 intptr_t ThreadId::ThreadCount;
181
182 #if COLLECT_STATISTICS
STAT_increment(ThreadId tid,int bin,int ctr)183 int STAT_increment(ThreadId tid, int bin, int ctr)
184 {
185 return ::STAT_increment(tid.id, bin, ctr);
186 }
187 #endif
188
189 #else // USE_INTERNAL_TID
190
191 class ThreadId {
192 #if USE_PTHREAD
193 pthread_t tid;
194 #else
195 DWORD tid;
196 #endif
197 public:
ThreadId()198 ThreadId() : tid(GetMyTID()) {}
199 #if USE_PTHREAD
isCurrentThreadId() const200 bool isCurrentThreadId() const { return pthread_equal(pthread_self(), tid); }
201 #else
isCurrentThreadId() const202 bool isCurrentThreadId() const { return GetCurrentThreadId() == tid; }
203 #endif
init()204 static bool init() { return true; }
destroy()205 static void destroy() {}
206 };
207
208 #endif // USE_INTERNAL_TID
209
210 /*********** Code to provide thread ID and a thread-local void pointer **********/
211
init()212 bool TLSKey::init()
213 {
214 #if USE_WINTHREAD
215 TLS_pointer_key = TlsAlloc();
216 if (TLS_pointer_key == TLS_ALLOC_FAILURE)
217 return false;
218 #else
219 int status = pthread_key_create( &TLS_pointer_key, mallocThreadShutdownNotification );
220 if ( status )
221 return false;
222 #endif /* USE_WINTHREAD */
223 return true;
224 }
225
destroy()226 bool TLSKey::destroy()
227 {
228 #if USE_WINTHREAD
229 BOOL status1 = !(TlsFree(TLS_pointer_key)); // fail is zero
230 #else
231 int status1 = pthread_key_delete(TLS_pointer_key);
232 #endif /* USE_WINTHREAD */
233 MALLOC_ASSERT(!status1, "The memory manager cannot delete tls key.");
234 return status1==0;
235 }
236
getThreadMallocTLS() const237 inline TLSData* TLSKey::getThreadMallocTLS() const
238 {
239 return (TLSData *)TlsGetValue_func( TLS_pointer_key );
240 }
241
setThreadMallocTLS(TLSData * newvalue)242 inline void TLSKey::setThreadMallocTLS( TLSData * newvalue ) {
243 RecursiveMallocCallProtector scoped;
244 TlsSetValue_func( TLS_pointer_key, newvalue );
245 }
246
247 /* The 'next' field in the block header has to maintain some invariants:
248 * it needs to be on a 16K boundary and the first field in the block.
249 * Any value stored there needs to have the lower 14 bits set to 0
250 * so that various assert work. This means that if you want to smash this memory
251 * for debugging purposes you will need to obey this invariant.
252 * The total size of the header needs to be a power of 2 to simplify
253 * the alignment requirements. For now it is a 128 byte structure.
254 * To avoid false sharing, the fields changed only locally are separated
255 * from the fields changed by foreign threads.
256 * Changing the size of the block header would require to change
257 * some bin allocation sizes, in particular "fitting" sizes (see above).
258 */
259 class Bin;
260 class StartupBlock;
261
262 class MemoryPool {
263 // if no explicit grainsize, expect to see malloc in user's pAlloc
264 // and set reasonable low granularity
265 static const size_t defaultGranularity = estimatedCacheLineSize;
266
267 MemoryPool(); // deny
268 public:
269 static MallocMutex memPoolListLock;
270
271 // list of all active pools is used to release
272 // all TLS data on thread termination or library unload
273 MemoryPool *next,
274 *prev;
275 ExtMemoryPool extMemPool;
276 BootStrapBlocks bootStrapBlocks;
277
278 static void initDefaultPool();
279
280 bool init(intptr_t poolId, const MemPoolPolicy* memPoolPolicy);
281 bool reset();
282 bool destroy();
283 void onThreadShutdown(TLSData *tlsData);
284
285 inline TLSData *getTLS(bool create);
clearTLS()286 void clearTLS() { extMemPool.tlsPointerKey.setThreadMallocTLS(NULL); }
287
288 Block *getEmptyBlock(size_t size);
289 void returnEmptyBlock(Block *block, bool poolTheBlock);
290
291 // get/put large object to/from local large object cache
292 void *getFromLLOCache(TLSData *tls, size_t size, size_t alignment);
293 void putToLLOCache(TLSData *tls, void *object);
294 };
295
296 static intptr_t defaultMemPool_space[sizeof(MemoryPool)/sizeof(intptr_t) +
297 (sizeof(MemoryPool)%sizeof(intptr_t)? 1 : 0)];
298 static MemoryPool *defaultMemPool = (MemoryPool*)defaultMemPool_space;
299 const size_t MemoryPool::defaultGranularity;
300 // zero-initialized
301 MallocMutex MemoryPool::memPoolListLock;
302 // TODO: move huge page status to default pool, because that's its states
303 HugePagesStatus hugePages;
304 static bool usedBySrcIncluded = false;
305
306 // Padding helpers
307 template<size_t padd>
308 struct PaddingImpl {
309 size_t __padding[padd];
310 };
311
312 template<>
313 struct PaddingImpl<0> {};
314
315 template<int N>
316 struct Padding : PaddingImpl<N/sizeof(size_t)> {};
317
318 // Slab block is 16KB-aligned. To prevent false sharing, separate locally-accessed
319 // fields and fields commonly accessed by not owner threads.
320 class GlobalBlockFields : public BlockI {
321 protected:
322 FreeObject *publicFreeList;
323 Block *nextPrivatizable;
324 MemoryPool *poolPtr;
325 };
326
327 class LocalBlockFields : public GlobalBlockFields, Padding<blockHeaderAlignment - sizeof(GlobalBlockFields)> {
328 protected:
329 Block *next;
330 Block *previous; /* Use double linked list to speed up removal */
331 FreeObject *bumpPtr; /* Bump pointer moves from the end to the beginning of a block */
332 FreeObject *freeList;
333 /* Pointer to local data for the owner thread. Used for fast finding tls
334 when releasing object from a block that current thread owned.
335 NULL for orphaned blocks. */
336 TLSData *tlsPtr;
337 ThreadId ownerTid; /* the ID of the thread that owns or last owned the block */
338 BackRefIdx backRefIdx;
339 uint16_t allocatedCount; /* Number of objects allocated (obviously by the owning thread) */
340 uint16_t objectSize;
341 bool isFull;
342
343 friend class FreeBlockPool;
344 friend class StartupBlock;
345 friend class LifoList;
346 friend void *BootStrapBlocks::allocate(MemoryPool *, size_t);
347 friend bool OrphanedBlocks::cleanup(Backend*);
348 friend Block *MemoryPool::getEmptyBlock(size_t);
349 };
350
351 // Use inheritance to guarantee that a user data start on next cache line.
352 // Can't use member for it, because when LocalBlockFields already on cache line,
353 // we must have no additional memory consumption for all compilers.
354 class Block : public LocalBlockFields,
355 Padding<2*blockHeaderAlignment - sizeof(LocalBlockFields)> {
356 public:
empty() const357 bool empty() const {
358 if (allocatedCount > 0) return false;
359 MALLOC_ASSERT(!isSolidPtr(publicFreeList), ASSERT_TEXT);
360 return true;
361 }
362 inline FreeObject* allocate();
363 inline FreeObject *allocateFromFreeList();
364
365 inline bool adjustFullness();
366 void adjustPositionInBin(Bin* bin = NULL);
367
freeListNonNull()368 bool freeListNonNull() { return freeList; }
369 void freePublicObject(FreeObject *objectToFree);
370 inline void freeOwnObject(void *object);
371 void reset();
372 void privatizePublicFreeList( bool reset = true );
373 void restoreBumpPtr();
374 void privatizeOrphaned(TLSData *tls, unsigned index);
375 bool readyToShare();
376 void shareOrphaned(intptr_t binTag, unsigned index);
getSize() const377 unsigned int getSize() const {
378 MALLOC_ASSERT(isStartupAllocObject() || objectSize<minLargeObjectSize,
379 "Invalid object size");
380 return isStartupAllocObject()? 0 : objectSize;
381 }
getBackRefIdx() const382 const BackRefIdx *getBackRefIdx() const { return &backRefIdx; }
383 inline bool isOwnedByCurrentThread() const;
isStartupAllocObject() const384 bool isStartupAllocObject() const { return objectSize == startupAllocObjSizeMark; }
385 inline FreeObject *findObjectToFree(const void *object) const;
checkFreePrecond(const void * object) const386 void checkFreePrecond(const void *object) const {
387 #if MALLOC_DEBUG
388 const char *msg = "Possible double free or heap corruption.";
389 // small objects are always at least sizeof(size_t) Byte aligned,
390 // try to check this before this dereference as for invalid objects
391 // this may be unreadable
392 MALLOC_ASSERT(isAligned(object, sizeof(size_t)), "Try to free invalid small object");
393 // releasing to free slab
394 MALLOC_ASSERT(allocatedCount>0, msg);
395 // must not point to slab's header
396 MALLOC_ASSERT((uintptr_t)object - (uintptr_t)this >= sizeof(Block), msg);
397 if (startupAllocObjSizeMark == objectSize) // startup block
398 MALLOC_ASSERT(object<=bumpPtr, msg);
399 else {
400 // non-startup objects are 8 Byte aligned
401 MALLOC_ASSERT(isAligned(object, 8), "Try to free invalid small object");
402 MALLOC_ASSERT(allocatedCount <= (slabSize-sizeof(Block))/objectSize
403 && (!bumpPtr || object>bumpPtr), msg);
404 FreeObject *toFree = findObjectToFree(object);
405 // check against head of freeList, as this is mostly
406 // expected after double free
407 MALLOC_ASSERT(toFree != freeList, msg);
408 // check against head of publicFreeList, to detect double free
409 // involving foreign thread
410 MALLOC_ASSERT(toFree != publicFreeList, msg);
411 }
412 #else
413 suppress_unused_warning(object);
414 #endif
415 }
416 void initEmptyBlock(TLSData *tls, size_t size);
417 size_t findObjectSize(void *object) const;
getMemPool() const418 MemoryPool *getMemPool() const { return poolPtr; } // do not use on the hot path!
419
420 protected:
421 void cleanBlockHeader();
422
423 private:
424 static const float emptyEnoughRatio; /* Threshold on free space needed to "reactivate" a block */
425
426 inline FreeObject *allocateFromBumpPtr();
427 inline FreeObject *findAllocatedObject(const void *address) const;
428 inline bool isProperlyPlaced(const void *object) const;
markOwned(TLSData * tls)429 inline void markOwned(TLSData *tls) {
430 MALLOC_ASSERT(!tlsPtr, ASSERT_TEXT);
431 ownerTid = ThreadId(); /* save the ID of the current thread */
432 tlsPtr = tls;
433 }
markOrphaned()434 inline void markOrphaned() {
435 MALLOC_ASSERT(tlsPtr, ASSERT_TEXT);
436 tlsPtr = NULL;
437 }
438
439 friend class Bin;
440 friend class TLSData;
441 friend bool MemoryPool::destroy();
442 };
443
444 const float Block::emptyEnoughRatio = 1.0 / 4.0;
445
446 MALLOC_STATIC_ASSERT(sizeof(Block) <= 2*estimatedCacheLineSize,
447 "The class Block does not fit into 2 cache lines on this platform. "
448 "Defining USE_INTERNAL_TID may help to fix it.");
449
450 class Bin {
451 private:
452 Block *activeBlk;
453 Block *mailbox;
454 MallocMutex mailLock;
455
456 public:
getActiveBlock() const457 inline Block* getActiveBlock() const { return activeBlk; }
resetActiveBlock()458 void resetActiveBlock() { activeBlk = NULL; }
activeBlockUnused() const459 bool activeBlockUnused() const { return activeBlk && !activeBlk->allocatedCount; }
460 inline void setActiveBlock(Block *block);
461 inline Block* setPreviousBlockActive();
462 Block* getPrivatizedFreeListBlock();
463 void moveBlockToFront(Block *block);
464 bool cleanPublicFreeLists();
465 void processEmptyBlock(Block *block, bool poolTheBlock);
466 void addPublicFreeListBlock(Block* block);
467
468 void outofTLSBin(Block* block);
469 void verifyTLSBin(size_t size) const;
470 void pushTLSBin(Block* block);
471
verifyInitState() const472 void verifyInitState() const {
473 MALLOC_ASSERT( !activeBlk, ASSERT_TEXT );
474 MALLOC_ASSERT( !mailbox, ASSERT_TEXT );
475 }
476
477 friend void Block::freePublicObject (FreeObject *objectToFree);
478 };
479
480 /********* End of the data structures **************/
481
482 /*
483 * There are bins for all 8 byte aligned objects less than this segregated size; 8 bins in total
484 */
485 const uint32_t minSmallObjectIndex = 0;
486 const uint32_t numSmallObjectBins = 8;
487 const uint32_t maxSmallObjectSize = 64;
488
489 /*
490 * There are 4 bins between each couple of powers of 2 [64-128-256-...]
491 * from maxSmallObjectSize till this size; 16 bins in total
492 */
493 const uint32_t minSegregatedObjectIndex = minSmallObjectIndex+numSmallObjectBins;
494 const uint32_t numSegregatedObjectBins = 16;
495 const uint32_t maxSegregatedObjectSize = 1024;
496
497 /*
498 * And there are 5 bins with allocation sizes that are multiples of estimatedCacheLineSize
499 * and selected to fit 9, 6, 4, 3, and 2 allocations in a block.
500 */
501 const uint32_t minFittingIndex = minSegregatedObjectIndex+numSegregatedObjectBins;
502 const uint32_t numFittingBins = 5;
503
504 const uint32_t fittingAlignment = estimatedCacheLineSize;
505
506 #define SET_FITTING_SIZE(N) ( (slabSize-sizeof(Block))/N ) & ~(fittingAlignment-1)
507 // For blockSize=16*1024, sizeof(Block)=2*estimatedCacheLineSize and fittingAlignment=estimatedCacheLineSize,
508 // the comments show the fitting sizes and the amounts left unused for estimatedCacheLineSize=64/128:
509 const uint32_t fittingSize1 = SET_FITTING_SIZE(9); // 1792/1792 128/000
510 const uint32_t fittingSize2 = SET_FITTING_SIZE(6); // 2688/2688 128/000
511 const uint32_t fittingSize3 = SET_FITTING_SIZE(4); // 4032/3968 128/256
512 const uint32_t fittingSize4 = SET_FITTING_SIZE(3); // 5376/5376 128/000
513 const uint32_t fittingSize5 = SET_FITTING_SIZE(2); // 8128/8064 000/000
514 #undef SET_FITTING_SIZE
515
516 /*
517 * The total number of thread-specific Block-based bins
518 */
519 const uint32_t numBlockBins = minFittingIndex+numFittingBins;
520
521 /*
522 * Objects of this size and larger are considered large objects.
523 */
524 const uint32_t minLargeObjectSize = fittingSize5 + 1;
525
526 /*
527 * Per-thread pool of slab blocks. Idea behind it is to not share with other
528 * threads memory that are likely in local cache(s) of our CPU.
529 */
530 class FreeBlockPool {
531 private:
532 Block *head;
533 int size;
534 Backend *backend;
535 bool lastAccessMiss;
536 public:
537 static const int POOL_HIGH_MARK = 32;
538 static const int POOL_LOW_MARK = 8;
539
540 class ResOfGet {
541 ResOfGet();
542 public:
543 Block* block;
544 bool lastAccMiss;
ResOfGet(Block * b,bool lastMiss)545 ResOfGet(Block *b, bool lastMiss) : block(b), lastAccMiss(lastMiss) {}
546 };
547
548 // allocated in zero-initialized memory
FreeBlockPool(Backend * bknd)549 FreeBlockPool(Backend *bknd) : backend(bknd) {}
550 ResOfGet getBlock();
551 void returnBlock(Block *block);
552 bool externalCleanup(); // can be called by another thread
553 };
554
555 template<int LOW_MARK, int HIGH_MARK>
556 class LocalLOCImpl {
557 private:
558 static const size_t MAX_TOTAL_SIZE = 4*1024*1024;
559 // TODO: can single-linked list be faster here?
560 LargeMemoryBlock *head,
561 *tail; // need it when do releasing on overflow
562 size_t totalSize;
563 int numOfBlocks;
564 public:
565 bool put(LargeMemoryBlock *object, ExtMemoryPool *extMemPool);
566 LargeMemoryBlock *get(size_t size);
567 bool externalCleanup(ExtMemoryPool *extMemPool);
568 #if __TBB_MALLOC_WHITEBOX_TEST
LocalLOCImpl()569 LocalLOCImpl() : head(NULL), tail(NULL), totalSize(0), numOfBlocks(0) {}
getMaxSize()570 static size_t getMaxSize() { return MAX_TOTAL_SIZE; }
571 static const int LOC_HIGH_MARK = HIGH_MARK;
572 #else
573 // no ctor, object must be created in zero-initialized memory
574 #endif
575 };
576
577 typedef LocalLOCImpl<8,32> LocalLOC; // set production code parameters
578
579 class TLSData : public TLSRemote {
580 MemoryPool *memPool;
581 public:
582 Bin bin[numBlockBinLimit];
583 FreeBlockPool freeSlabBlocks;
584 LocalLOC lloc;
585 unsigned currCacheIdx;
586 private:
587 bool unused;
588 public:
TLSData(MemoryPool * mPool,Backend * bknd)589 TLSData(MemoryPool *mPool, Backend *bknd) : memPool(mPool), freeSlabBlocks(bknd) {}
getMemPool() const590 MemoryPool *getMemPool() const { return memPool; }
591 Bin* getAllocationBin(size_t size);
592 void release();
externalCleanup(bool cleanOnlyUnused,bool cleanBins)593 bool externalCleanup(bool cleanOnlyUnused, bool cleanBins) {
594 if (!unused && cleanOnlyUnused) return false;
595 // Heavy operation in terms of synchronization complexity,
596 // should be called only for the current thread
597 bool released = cleanBins ? cleanupBlockBins() : false;
598 // both cleanups to be called, and the order is not important
599 return released | lloc.externalCleanup(&memPool->extMemPool) | freeSlabBlocks.externalCleanup();
600 }
601 bool cleanupBlockBins();
markUsed()602 void markUsed() { unused = false; } // called by owner when TLS touched
markUnused()603 void markUnused() { unused = true; } // can be called by not owner thread
604 };
605
createTLS(MemoryPool * memPool,Backend * backend)606 TLSData *TLSKey::createTLS(MemoryPool *memPool, Backend *backend)
607 {
608 MALLOC_ASSERT( sizeof(TLSData) >= sizeof(Bin) * numBlockBins + sizeof(FreeBlockPool), ASSERT_TEXT );
609 TLSData* tls = (TLSData*) memPool->bootStrapBlocks.allocate(memPool, sizeof(TLSData));
610 if ( !tls )
611 return NULL;
612 new(tls) TLSData(memPool, backend);
613 /* the block contains zeroes after bootStrapMalloc, so bins are initialized */
614 #if MALLOC_DEBUG
615 for (uint32_t i = 0; i < numBlockBinLimit; i++)
616 tls->bin[i].verifyInitState();
617 #endif
618 setThreadMallocTLS(tls);
619 memPool->extMemPool.allLocalCaches.registerThread(tls);
620 return tls;
621 }
622
cleanupBlockBins()623 bool TLSData::cleanupBlockBins()
624 {
625 bool released = false;
626 for (uint32_t i = 0; i < numBlockBinLimit; i++) {
627 released |= bin[i].cleanPublicFreeLists();
628 // After cleaning public free lists, only the active block might be empty.
629 // Do not use processEmptyBlock because it will just restore bumpPtr.
630 Block *block = bin[i].getActiveBlock();
631 if (block && block->empty()) {
632 bin[i].outofTLSBin(block);
633 memPool->returnEmptyBlock(block, /*poolTheBlock=*/false);
634 released = true;
635 }
636 }
637 return released;
638 }
639
releaseAllLocalCaches()640 bool ExtMemoryPool::releaseAllLocalCaches()
641 {
642 // Iterate all registered TLS data and clean LLOC and Slab pools
643 bool released = allLocalCaches.cleanup(/*cleanOnlyUnused=*/false);
644
645 // Bins privatization is done only for the current thread
646 if (TLSData *tlsData = tlsPointerKey.getThreadMallocTLS())
647 released |= tlsData->cleanupBlockBins();
648
649 return released;
650 }
651
registerThread(TLSRemote * tls)652 void AllLocalCaches::registerThread(TLSRemote *tls)
653 {
654 tls->prev = NULL;
655 MallocMutex::scoped_lock lock(listLock);
656 MALLOC_ASSERT(head!=tls, ASSERT_TEXT);
657 tls->next = head;
658 if (head)
659 head->prev = tls;
660 head = tls;
661 MALLOC_ASSERT(head->next!=head, ASSERT_TEXT);
662 }
663
unregisterThread(TLSRemote * tls)664 void AllLocalCaches::unregisterThread(TLSRemote *tls)
665 {
666 MallocMutex::scoped_lock lock(listLock);
667 MALLOC_ASSERT(head, "Can't unregister thread: no threads are registered.");
668 if (head == tls)
669 head = tls->next;
670 if (tls->next)
671 tls->next->prev = tls->prev;
672 if (tls->prev)
673 tls->prev->next = tls->next;
674 MALLOC_ASSERT(!tls->next || tls->next->next!=tls->next, ASSERT_TEXT);
675 }
676
cleanup(bool cleanOnlyUnused)677 bool AllLocalCaches::cleanup(bool cleanOnlyUnused)
678 {
679 bool released = false;
680 {
681 MallocMutex::scoped_lock lock(listLock);
682 for (TLSRemote *curr=head; curr; curr=curr->next)
683 released |= static_cast<TLSData*>(curr)->externalCleanup(cleanOnlyUnused, /*cleanBins=*/false);
684 }
685 return released;
686 }
687
markUnused()688 void AllLocalCaches::markUnused()
689 {
690 bool locked;
691 MallocMutex::scoped_lock lock(listLock, /*block=*/false, &locked);
692 if (!locked) // not wait for marking if someone doing something with it
693 return;
694
695 for (TLSRemote *curr=head; curr; curr=curr->next)
696 static_cast<TLSData*>(curr)->markUnused();
697 }
698
699 #if MALLOC_CHECK_RECURSION
700 MallocMutex RecursiveMallocCallProtector::rmc_mutex;
701 pthread_t RecursiveMallocCallProtector::owner_thread;
702 void *RecursiveMallocCallProtector::autoObjPtr;
703 bool RecursiveMallocCallProtector::mallocRecursionDetected;
704 #if __FreeBSD__
705 bool RecursiveMallocCallProtector::canUsePthread;
706 #endif
707
708 #endif
709
710 /*********** End code to provide thread ID and a TLS pointer **********/
711
712 // Parameter for isLargeObject, keeps our expectations on memory origin.
713 // Assertions must use unknownMem to reliably report object invalidity.
714 enum MemoryOrigin {
715 ourMem, // allocated by TBB allocator
716 unknownMem // can be allocated by system allocator or TBB allocator
717 };
718
719 template<MemoryOrigin> bool isLargeObject(void *object);
720 static void *internalMalloc(size_t size);
721 static void internalFree(void *object);
722 static void *internalPoolMalloc(MemoryPool* mPool, size_t size);
723 static bool internalPoolFree(MemoryPool *mPool, void *object, size_t size);
724
725 #if !MALLOC_DEBUG
726 #if __INTEL_COMPILER || _MSC_VER
727 #define NOINLINE(decl) __declspec(noinline) decl
728 #define ALWAYSINLINE(decl) __forceinline decl
729 #elif __GNUC__
730 #define NOINLINE(decl) decl __attribute__ ((noinline))
731 #define ALWAYSINLINE(decl) decl __attribute__ ((always_inline))
732 #else
733 #define NOINLINE(decl) decl
734 #define ALWAYSINLINE(decl) decl
735 #endif
736
737 static NOINLINE( bool doInitialization() );
738 ALWAYSINLINE( bool isMallocInitialized() );
739
740 #undef ALWAYSINLINE
741 #undef NOINLINE
742 #endif /* !MALLOC_DEBUG */
743
744
745 /********* Now some rough utility code to deal with indexing the size bins. **************/
746
747 /*
748 * Given a number return the highest non-zero bit in it. It is intended to work with 32-bit values only.
749 * Moreover, on IPF, for sake of simplicity and performance, it is narrowed to only serve for 64 to 1023.
750 * This is enough for current algorithm of distribution of sizes among bins.
751 * __TBB_Log2 is not used here to minimize dependencies on TBB specific sources.
752 */
753 #if _WIN64 && _MSC_VER>=1400 && !__INTEL_COMPILER
754 extern "C" unsigned char _BitScanReverse( unsigned long* i, unsigned long w );
755 #pragma intrinsic(_BitScanReverse)
756 #endif
highestBitPos(unsigned int n)757 static inline unsigned int highestBitPos(unsigned int n)
758 {
759 MALLOC_ASSERT( n>=64 && n<1024, ASSERT_TEXT ); // only needed for bsr array lookup, but always true
760 unsigned int pos;
761 #if __ARCH_x86_32||__ARCH_x86_64
762
763 # if __linux__||__APPLE__||__FreeBSD__||__NetBSD__||__OpenBSD__||__sun||__MINGW32__
764 __asm__ ("bsr %1,%0" : "=r"(pos) : "r"(n));
765 # elif (_WIN32 && (!_WIN64 || __INTEL_COMPILER))
766 __asm
767 {
768 bsr eax, n
769 mov pos, eax
770 }
771 # elif _WIN64 && _MSC_VER>=1400
772 _BitScanReverse((unsigned long*)&pos, (unsigned long)n);
773 # else
774 # error highestBitPos() not implemented for this platform
775 # endif
776 #elif __arm__
777 __asm__ __volatile__
778 (
779 "clz %0, %1\n"
780 "rsb %0, %0, %2\n"
781 :"=r" (pos) :"r" (n), "I" (31)
782 );
783 #else
784 static unsigned int bsr[16] = {0/*N/A*/,6,7,7,8,8,8,8,9,9,9,9,9,9,9,9};
785 pos = bsr[ n>>6 ];
786 #endif /* __ARCH_* */
787 return pos;
788 }
789
790 template<bool Is32Bit>
getSmallObjectIndex(unsigned int size)791 unsigned int getSmallObjectIndex(unsigned int size)
792 {
793 return (size-1)>>3;
794 }
795 template<>
getSmallObjectIndex(unsigned int size)796 unsigned int getSmallObjectIndex</*Is32Bit=*/false>(unsigned int size)
797 {
798 // For 64-bit malloc, 16 byte alignment is needed except for bin 0.
799 unsigned int result = (size-1)>>3;
800 if (result) result |= 1; // 0,1,3,5,7; bins 2,4,6 are not aligned to 16 bytes
801 return result;
802 }
803 /*
804 * Depending on indexRequest, for a given size return either the index into the bin
805 * for objects of this size, or the actual size of objects in this bin.
806 */
807 template<bool indexRequest>
getIndexOrObjectSize(unsigned int size)808 static unsigned int getIndexOrObjectSize (unsigned int size)
809 {
810 if (size <= maxSmallObjectSize) { // selection from 8/16/24/32/40/48/56/64
811 unsigned int index = getSmallObjectIndex</*Is32Bit=*/(sizeof(size_t)<=4)>( size );
812 /* Bin 0 is for 8 bytes, bin 1 is for 16, and so forth */
813 return indexRequest ? index : (index+1)<<3;
814 }
815 else if (size <= maxSegregatedObjectSize ) { // 80/96/112/128 / 160/192/224/256 / 320/384/448/512 / 640/768/896/1024
816 unsigned int order = highestBitPos(size-1); // which group of bin sizes?
817 MALLOC_ASSERT( 6<=order && order<=9, ASSERT_TEXT );
818 if (indexRequest)
819 return minSegregatedObjectIndex - (4*6) - 4 + (4*order) + ((size-1)>>(order-2));
820 else {
821 unsigned int alignment = 128 >> (9-order); // alignment in the group
822 MALLOC_ASSERT( alignment==16 || alignment==32 || alignment==64 || alignment==128, ASSERT_TEXT );
823 return alignUp(size,alignment);
824 }
825 }
826 else {
827 if( size <= fittingSize3 ) {
828 if( size <= fittingSize2 ) {
829 if( size <= fittingSize1 )
830 return indexRequest ? minFittingIndex : fittingSize1;
831 else
832 return indexRequest ? minFittingIndex+1 : fittingSize2;
833 } else
834 return indexRequest ? minFittingIndex+2 : fittingSize3;
835 } else {
836 if( size <= fittingSize5 ) {
837 if( size <= fittingSize4 )
838 return indexRequest ? minFittingIndex+3 : fittingSize4;
839 else
840 return indexRequest ? minFittingIndex+4 : fittingSize5;
841 } else {
842 MALLOC_ASSERT( 0,ASSERT_TEXT ); // this should not happen
843 return ~0U;
844 }
845 }
846 }
847 }
848
getIndex(unsigned int size)849 static unsigned int getIndex (unsigned int size)
850 {
851 return getIndexOrObjectSize</*indexRequest=*/true>(size);
852 }
853
getObjectSize(unsigned int size)854 static unsigned int getObjectSize (unsigned int size)
855 {
856 return getIndexOrObjectSize</*indexRequest=*/false>(size);
857 }
858
859
allocate(MemoryPool * memPool,size_t size)860 void *BootStrapBlocks::allocate(MemoryPool *memPool, size_t size)
861 {
862 FreeObject *result;
863
864 MALLOC_ASSERT( size == sizeof(TLSData), ASSERT_TEXT );
865
866 { // Lock with acquire
867 MallocMutex::scoped_lock scoped_cs(bootStrapLock);
868
869 if( bootStrapObjectList) {
870 result = bootStrapObjectList;
871 bootStrapObjectList = bootStrapObjectList->next;
872 } else {
873 if (!bootStrapBlock) {
874 bootStrapBlock = memPool->getEmptyBlock(size);
875 if (!bootStrapBlock) return NULL;
876 }
877 result = bootStrapBlock->bumpPtr;
878 bootStrapBlock->bumpPtr = (FreeObject *)((uintptr_t)bootStrapBlock->bumpPtr - bootStrapBlock->objectSize);
879 if ((uintptr_t)bootStrapBlock->bumpPtr < (uintptr_t)bootStrapBlock+sizeof(Block)) {
880 bootStrapBlock->bumpPtr = NULL;
881 bootStrapBlock->next = bootStrapBlockUsed;
882 bootStrapBlockUsed = bootStrapBlock;
883 bootStrapBlock = NULL;
884 }
885 }
886 } // Unlock with release
887
888 memset (result, 0, size);
889 return (void*)result;
890 }
891
free(void * ptr)892 void BootStrapBlocks::free(void* ptr)
893 {
894 MALLOC_ASSERT( ptr, ASSERT_TEXT );
895 { // Lock with acquire
896 MallocMutex::scoped_lock scoped_cs(bootStrapLock);
897 ((FreeObject*)ptr)->next = bootStrapObjectList;
898 bootStrapObjectList = (FreeObject*)ptr;
899 } // Unlock with release
900 }
901
reset()902 void BootStrapBlocks::reset()
903 {
904 bootStrapBlock = bootStrapBlockUsed = NULL;
905 bootStrapObjectList = NULL;
906 }
907
908 #if !(FREELIST_NONBLOCKING)
909 static MallocMutex publicFreeListLock; // lock for changes of publicFreeList
910 #endif
911
912 /********* End rough utility code **************/
913
914 /* LifoList assumes zero initialization so a vector of it can be created
915 * by just allocating some space with no call to constructor.
916 * On Linux, it seems to be necessary to avoid linking with C++ libraries.
917 *
918 * By usage convention there is no race on the initialization. */
LifoList()919 LifoList::LifoList( ) : top(NULL)
920 {
921 // MallocMutex assumes zero initialization
922 memset(&lock, 0, sizeof(MallocMutex));
923 }
924
push(Block * block)925 void LifoList::push(Block *block)
926 {
927 MallocMutex::scoped_lock scoped_cs(lock);
928 block->next = top;
929 top = block;
930 }
931
pop()932 Block *LifoList::pop()
933 {
934 Block *block=NULL;
935 if (top) {
936 MallocMutex::scoped_lock scoped_cs(lock);
937 if (top) {
938 block = top;
939 top = block->next;
940 }
941 }
942 return block;
943 }
944
grab()945 Block *LifoList::grab()
946 {
947 Block *block = NULL;
948 if (top) {
949 MallocMutex::scoped_lock scoped_cs(lock);
950 block = top;
951 top = NULL;
952 }
953 return block;
954 }
955
956 /********* Thread and block related code *************/
957
releaseAll(Backend * backend)958 template<bool poolDestroy> void AllLargeBlocksList::releaseAll(Backend *backend) {
959 LargeMemoryBlock *next, *lmb = loHead;
960 loHead = NULL;
961
962 for (; lmb; lmb = next) {
963 next = lmb->gNext;
964 if (poolDestroy) {
965 // as it's pool destruction, no need to return object to backend,
966 // only remove backrefs, as they are global
967 removeBackRef(lmb->backRefIdx);
968 } else {
969 // clean g(Next|Prev) to prevent removing lmb
970 // from AllLargeBlocksList inside returnLargeObject
971 lmb->gNext = lmb->gPrev = NULL;
972 backend->returnLargeObject(lmb);
973 }
974 }
975 }
976
getTLS(bool create)977 TLSData* MemoryPool::getTLS(bool create)
978 {
979 TLSData* tls = extMemPool.tlsPointerKey.getThreadMallocTLS();
980 if (create && !tls)
981 tls = extMemPool.tlsPointerKey.createTLS(this, &extMemPool.backend);
982 return tls;
983 }
984
985 /*
986 * Return the bin for the given size.
987 */
getAllocationBin(size_t size)988 inline Bin* TLSData::getAllocationBin(size_t size)
989 {
990 return bin + getIndex(size);
991 }
992
993 /* Return an empty uninitialized block in a non-blocking fashion. */
getEmptyBlock(size_t size)994 Block *MemoryPool::getEmptyBlock(size_t size)
995 {
996 TLSData* tls = getTLS(/*create=*/false);
997 // try to use per-thread cache, if TLS available
998 FreeBlockPool::ResOfGet resOfGet = tls?
999 tls->freeSlabBlocks.getBlock() : FreeBlockPool::ResOfGet(NULL, false);
1000 Block *result = resOfGet.block;
1001
1002 if (!result) { // not found in local cache, asks backend for slabs
1003 int num = resOfGet.lastAccMiss? Backend::numOfSlabAllocOnMiss : 1;
1004 BackRefIdx backRefIdx[Backend::numOfSlabAllocOnMiss];
1005
1006 result = static_cast<Block*>(extMemPool.backend.getSlabBlock(num));
1007 if (!result) return NULL;
1008
1009 if (!extMemPool.userPool())
1010 for (int i=0; i<num; i++) {
1011 backRefIdx[i] = BackRefIdx::newBackRef(/*largeObj=*/false);
1012 if (backRefIdx[i].isInvalid()) {
1013 // roll back resource allocation
1014 for (int j=0; j<i; j++)
1015 removeBackRef(backRefIdx[j]);
1016 Block *b = result;
1017 for (int j=0; j<num; b=(Block*)((uintptr_t)b+slabSize), j++)
1018 extMemPool.backend.putSlabBlock(b);
1019 return NULL;
1020 }
1021 }
1022 // resources were allocated, register blocks
1023 Block *b = result;
1024 for (int i=0; i<num; b=(Block*)((uintptr_t)b+slabSize), i++) {
1025 // slab block in user's pool must have invalid backRefIdx
1026 if (extMemPool.userPool()) {
1027 new (&b->backRefIdx) BackRefIdx();
1028 } else {
1029 setBackRef(backRefIdx[i], b);
1030 b->backRefIdx = backRefIdx[i];
1031 }
1032 b->tlsPtr = tls;
1033 b->poolPtr = this;
1034 // all but first one go to per-thread pool
1035 if (i > 0) {
1036 MALLOC_ASSERT(tls, ASSERT_TEXT);
1037 tls->freeSlabBlocks.returnBlock(b);
1038 }
1039 }
1040 }
1041 MALLOC_ASSERT(result, ASSERT_TEXT);
1042 result->initEmptyBlock(tls, size);
1043 STAT_increment(getThreadId(), getIndex(result->objectSize), allocBlockNew);
1044 return result;
1045 }
1046
returnEmptyBlock(Block * block,bool poolTheBlock)1047 void MemoryPool::returnEmptyBlock(Block *block, bool poolTheBlock)
1048 {
1049 block->reset();
1050 if (poolTheBlock) {
1051 getTLS(/*create=*/false)->freeSlabBlocks.returnBlock(block);
1052 } else {
1053 // slab blocks in user's pools do not have valid backRefIdx
1054 if (!extMemPool.userPool())
1055 removeBackRef(*(block->getBackRefIdx()));
1056 extMemPool.backend.putSlabBlock(block);
1057 }
1058 }
1059
init(intptr_t poolId,rawAllocType rawAlloc,rawFreeType rawFree,size_t granularity,bool keepAllMemory,bool fixedPool)1060 bool ExtMemoryPool::init(intptr_t poolId, rawAllocType rawAlloc,
1061 rawFreeType rawFree, size_t granularity,
1062 bool keepAllMemory, bool fixedPool)
1063 {
1064 this->poolId = poolId;
1065 this->rawAlloc = rawAlloc;
1066 this->rawFree = rawFree;
1067 this->granularity = granularity;
1068 this->keepAllMemory = keepAllMemory;
1069 this->fixedPool = fixedPool;
1070 this->delayRegsReleasing = false;
1071 if (!initTLS())
1072 return false;
1073 loc.init(this);
1074 backend.init(this);
1075 MALLOC_ASSERT(isPoolValid(), NULL);
1076 return true;
1077 }
1078
initTLS()1079 bool ExtMemoryPool::initTLS() { return tlsPointerKey.init(); }
1080
init(intptr_t poolId,const MemPoolPolicy * policy)1081 bool MemoryPool::init(intptr_t poolId, const MemPoolPolicy *policy)
1082 {
1083 if (!extMemPool.init(poolId, policy->pAlloc, policy->pFree,
1084 policy->granularity? policy->granularity : defaultGranularity,
1085 policy->keepAllMemory, policy->fixedPool))
1086 return false;
1087 {
1088 MallocMutex::scoped_lock lock(memPoolListLock);
1089 next = defaultMemPool->next;
1090 defaultMemPool->next = this;
1091 prev = defaultMemPool;
1092 if (next)
1093 next->prev = this;
1094 }
1095 return true;
1096 }
1097
reset()1098 bool MemoryPool::reset()
1099 {
1100 MALLOC_ASSERT(extMemPool.userPool(), "No reset for the system pool.");
1101 // memory is not releasing during pool reset
1102 // TODO: mark regions to release unused on next reset()
1103 extMemPool.delayRegionsReleasing(true);
1104
1105 bootStrapBlocks.reset();
1106 extMemPool.lmbList.releaseAll</*poolDestroy=*/false>(&extMemPool.backend);
1107 if (!extMemPool.reset())
1108 return false;
1109
1110 if (!extMemPool.initTLS())
1111 return false;
1112 extMemPool.delayRegionsReleasing(false);
1113 return true;
1114 }
1115
destroy()1116 bool MemoryPool::destroy()
1117 {
1118 #if __TBB_MALLOC_LOCACHE_STAT
1119 extMemPool.loc.reportStat(stdout);
1120 #endif
1121 #if __TBB_MALLOC_BACKEND_STAT
1122 extMemPool.backend.reportStat(stdout);
1123 #endif
1124 {
1125 MallocMutex::scoped_lock lock(memPoolListLock);
1126 // remove itself from global pool list
1127 if (prev)
1128 prev->next = next;
1129 if (next)
1130 next->prev = prev;
1131 }
1132 // slab blocks in non-default pool do not have backreferences,
1133 // only large objects do
1134 if (extMemPool.userPool())
1135 extMemPool.lmbList.releaseAll</*poolDestroy=*/true>(&extMemPool.backend);
1136 else {
1137 // only one non-userPool() is supported now
1138 MALLOC_ASSERT(this==defaultMemPool, NULL);
1139 // There and below in extMemPool.destroy(), do not restore initial state
1140 // for user pool, because it's just about to be released. But for system
1141 // pool restoring, we do not want to do zeroing of it on subsequent reload.
1142 bootStrapBlocks.reset();
1143 extMemPool.orphanedBlocks.reset();
1144 }
1145 return extMemPool.destroy();
1146 }
1147
onThreadShutdown(TLSData * tlsData)1148 void MemoryPool::onThreadShutdown(TLSData *tlsData)
1149 {
1150 if (tlsData) { // might be called for "empty" TLS
1151 tlsData->release();
1152 bootStrapBlocks.free(tlsData);
1153 clearTLS();
1154 }
1155 }
1156
1157 #if MALLOC_DEBUG
verifyTLSBin(size_t size) const1158 void Bin::verifyTLSBin (size_t size) const
1159 {
1160 /* The debug version verifies the TLSBin as needed */
1161 uint32_t objSize = getObjectSize(size);
1162
1163 if (activeBlk) {
1164 MALLOC_ASSERT( activeBlk->isOwnedByCurrentThread(), ASSERT_TEXT );
1165 MALLOC_ASSERT( activeBlk->objectSize == objSize, ASSERT_TEXT );
1166 #if MALLOC_DEBUG>1
1167 for (Block* temp = activeBlk->next; temp; temp=temp->next) {
1168 MALLOC_ASSERT( temp!=activeBlk, ASSERT_TEXT );
1169 MALLOC_ASSERT( temp->isOwnedByCurrentThread(), ASSERT_TEXT );
1170 MALLOC_ASSERT( temp->objectSize == objSize, ASSERT_TEXT );
1171 MALLOC_ASSERT( temp->previous->next == temp, ASSERT_TEXT );
1172 if (temp->next) {
1173 MALLOC_ASSERT( temp->next->previous == temp, ASSERT_TEXT );
1174 }
1175 }
1176 for (Block* temp = activeBlk->previous; temp; temp=temp->previous) {
1177 MALLOC_ASSERT( temp!=activeBlk, ASSERT_TEXT );
1178 MALLOC_ASSERT( temp->isOwnedByCurrentThread(), ASSERT_TEXT );
1179 MALLOC_ASSERT( temp->objectSize == objSize, ASSERT_TEXT );
1180 MALLOC_ASSERT( temp->next->previous == temp, ASSERT_TEXT );
1181 if (temp->previous) {
1182 MALLOC_ASSERT( temp->previous->next == temp, ASSERT_TEXT );
1183 }
1184 }
1185 #endif /* MALLOC_DEBUG>1 */
1186 }
1187 }
1188 #else /* MALLOC_DEBUG */
verifyTLSBin(size_t) const1189 inline void Bin::verifyTLSBin (size_t) const { }
1190 #endif /* MALLOC_DEBUG */
1191
1192 /*
1193 * Add a block to the start of this tls bin list.
1194 */
pushTLSBin(Block * block)1195 void Bin::pushTLSBin(Block* block)
1196 {
1197 /* The objectSize should be defined and not a parameter
1198 because the function is applied to partially filled blocks as well */
1199 unsigned int size = block->objectSize;
1200
1201 MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT );
1202 MALLOC_ASSERT( block->objectSize != 0, ASSERT_TEXT );
1203 MALLOC_ASSERT( block->next == NULL, ASSERT_TEXT );
1204 MALLOC_ASSERT( block->previous == NULL, ASSERT_TEXT );
1205
1206 MALLOC_ASSERT( this, ASSERT_TEXT );
1207 verifyTLSBin(size);
1208
1209 block->next = activeBlk;
1210 if( activeBlk ) {
1211 block->previous = activeBlk->previous;
1212 activeBlk->previous = block;
1213 if( block->previous )
1214 block->previous->next = block;
1215 } else {
1216 activeBlk = block;
1217 }
1218
1219 verifyTLSBin(size);
1220 }
1221
1222 /*
1223 * Take a block out of its tls bin (e.g. before removal).
1224 */
outofTLSBin(Block * block)1225 void Bin::outofTLSBin(Block* block)
1226 {
1227 unsigned int size = block->objectSize;
1228
1229 MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT );
1230 MALLOC_ASSERT( block->objectSize != 0, ASSERT_TEXT );
1231
1232 MALLOC_ASSERT( this, ASSERT_TEXT );
1233 verifyTLSBin(size);
1234
1235 if (block == activeBlk) {
1236 activeBlk = block->previous? block->previous : block->next;
1237 }
1238 /* Unlink the block */
1239 if (block->previous) {
1240 MALLOC_ASSERT( block->previous->next == block, ASSERT_TEXT );
1241 block->previous->next = block->next;
1242 }
1243 if (block->next) {
1244 MALLOC_ASSERT( block->next->previous == block, ASSERT_TEXT );
1245 block->next->previous = block->previous;
1246 }
1247 block->next = NULL;
1248 block->previous = NULL;
1249
1250 verifyTLSBin(size);
1251 }
1252
getPrivatizedFreeListBlock()1253 Block* Bin::getPrivatizedFreeListBlock()
1254 {
1255 Block* block;
1256 MALLOC_ASSERT( this, ASSERT_TEXT );
1257 // if this method is called, active block usage must be unsuccessful
1258 MALLOC_ASSERT( !activeBlk && !mailbox || activeBlk && activeBlk->isFull, ASSERT_TEXT );
1259
1260 // the counter should be changed STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList);
1261 if (!FencedLoad((intptr_t&)mailbox)) // hotpath is empty mailbox
1262 return NULL;
1263 else { // mailbox is not empty, take lock and inspect it
1264 MallocMutex::scoped_lock scoped_cs(mailLock);
1265 block = mailbox;
1266 if( block ) {
1267 MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT );
1268 MALLOC_ASSERT( !isNotForUse(block->nextPrivatizable), ASSERT_TEXT );
1269 mailbox = block->nextPrivatizable;
1270 block->nextPrivatizable = (Block*) this;
1271 }
1272 }
1273 if( block ) {
1274 MALLOC_ASSERT( isSolidPtr(block->publicFreeList), ASSERT_TEXT );
1275 block->privatizePublicFreeList();
1276 block->adjustPositionInBin(this);
1277 }
1278 return block;
1279 }
1280
addPublicFreeListBlock(Block * block)1281 void Bin::addPublicFreeListBlock(Block* block)
1282 {
1283 MallocMutex::scoped_lock scoped_cs(mailLock);
1284 block->nextPrivatizable = mailbox;
1285 mailbox = block;
1286 }
1287
1288 // Process publicly freed objects in all blocks and return empty blocks
1289 // to the backend in order to reduce overall footprint.
cleanPublicFreeLists()1290 bool Bin::cleanPublicFreeLists()
1291 {
1292 Block* block;
1293 if (!FencedLoad((intptr_t&)mailbox))
1294 return false;
1295 else {
1296 // Grab all the blocks in the mailbox
1297 MallocMutex::scoped_lock scoped_cs(mailLock);
1298 block = mailbox;
1299 mailbox = NULL;
1300 }
1301 bool released = false;
1302 while (block) {
1303 MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT );
1304 Block* tmp = block->nextPrivatizable;
1305 block->nextPrivatizable = (Block*) this;
1306 block->privatizePublicFreeList();
1307 if (block->empty()) {
1308 processEmptyBlock(block, /*poolTheBlock=*/false);
1309 released = true;
1310 } else
1311 block->adjustPositionInBin(this);
1312 block = tmp;
1313 }
1314 return released;
1315 }
1316
adjustFullness()1317 bool Block::adjustFullness()
1318 {
1319 if (bumpPtr) {
1320 /* If we are still using a bump ptr for this block it is empty enough to use. */
1321 STAT_increment(getThreadId(), getIndex(objectSize), examineEmptyEnough);
1322 isFull = false;
1323 } else {
1324 const float threshold = (slabSize - sizeof(Block)) * (1 - emptyEnoughRatio);
1325 /* allocatedCount shows how many objects in the block are in use; however it still counts
1326 * blocks freed by other threads; so prior call to privatizePublicFreeList() is recommended */
1327 isFull = (allocatedCount*objectSize > threshold) ? true : false;
1328 #if COLLECT_STATISTICS
1329 if (isFull)
1330 STAT_increment(getThreadId(), getIndex(objectSize), examineNotEmpty);
1331 else
1332 STAT_increment(getThreadId(), getIndex(objectSize), examineEmptyEnough);
1333 #endif
1334 }
1335 return isFull;
1336 }
1337
1338 // This method resides in class Block, and not in class Bin, in order to avoid
1339 // calling getAllocationBin on a reasonably hot path in Block::freeOwnObject
adjustPositionInBin(Bin * bin)1340 void Block::adjustPositionInBin(Bin* bin/*=NULL*/)
1341 {
1342 // If the block were full, but became empty enough to use,
1343 // move it to the front of the list
1344 if (isFull && !adjustFullness()) {
1345 if (!bin)
1346 bin = tlsPtr->getAllocationBin(objectSize);
1347 bin->moveBlockToFront(this);
1348 }
1349 }
1350
1351 /* Restore the bump pointer for an empty block that is planned to use */
restoreBumpPtr()1352 void Block::restoreBumpPtr()
1353 {
1354 MALLOC_ASSERT( allocatedCount == 0, ASSERT_TEXT );
1355 MALLOC_ASSERT( !isSolidPtr(publicFreeList), ASSERT_TEXT );
1356 STAT_increment(getThreadId(), getIndex(objectSize), freeRestoreBumpPtr);
1357 bumpPtr = (FreeObject *)((uintptr_t)this + slabSize - objectSize);
1358 freeList = NULL;
1359 isFull = false;
1360 }
1361
freeOwnObject(void * object)1362 void Block::freeOwnObject(void *object)
1363 {
1364 tlsPtr->markUsed();
1365 allocatedCount--;
1366 MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT );
1367 #if COLLECT_STATISTICS
1368 // Note that getAllocationBin is not called on the hottest path with statistics off.
1369 if (tlsPtr->getAllocationBin(objectSize)->getActiveBlock() != this)
1370 STAT_increment(getThreadId(), getIndex(objectSize), freeToInactiveBlock);
1371 else
1372 STAT_increment(getThreadId(), getIndex(objectSize), freeToActiveBlock);
1373 #endif
1374 if (empty()) {
1375 // If the last object of a slab is freed, the slab cannot be marked full
1376 MALLOC_ASSERT(!isFull, ASSERT_TEXT);
1377 tlsPtr->getAllocationBin(objectSize)->processEmptyBlock(this, /*poolTheBlock=*/true);
1378 } else { // hot path
1379 FreeObject *objectToFree = findObjectToFree(object);
1380 objectToFree->next = freeList;
1381 freeList = objectToFree;
1382 adjustPositionInBin();
1383 }
1384 }
1385
freePublicObject(FreeObject * objectToFree)1386 void Block::freePublicObject (FreeObject *objectToFree)
1387 {
1388 FreeObject *localPublicFreeList;
1389
1390 MALLOC_ITT_SYNC_RELEASING(&publicFreeList);
1391 #if FREELIST_NONBLOCKING
1392 FreeObject *temp = publicFreeList;
1393 do {
1394 localPublicFreeList = objectToFree->next = temp;
1395 temp = (FreeObject*)AtomicCompareExchange(
1396 (intptr_t&)publicFreeList,
1397 (intptr_t)objectToFree, (intptr_t)localPublicFreeList );
1398 // no backoff necessary because trying to make change, not waiting for a change
1399 } while( temp != localPublicFreeList );
1400 #else
1401 STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList);
1402 {
1403 MallocMutex::scoped_lock scoped_cs(publicFreeListLock);
1404 localPublicFreeList = objectToFree->next = publicFreeList;
1405 publicFreeList = objectToFree;
1406 }
1407 #endif
1408
1409 if( localPublicFreeList==NULL ) {
1410 // if the block is abandoned, its nextPrivatizable pointer should be UNUSABLE
1411 // otherwise, it should point to the bin the block belongs to.
1412 // reading nextPrivatizable is thread-safe below, because:
1413 // 1) the executing thread atomically got publicFreeList==NULL and changed it to non-NULL;
1414 // 2) only owning thread can change it back to NULL,
1415 // 3) but it can not be done until the block is put to the mailbox
1416 // So the executing thread is now the only one that can change nextPrivatizable
1417 if( !isNotForUse(nextPrivatizable) ) {
1418 MALLOC_ASSERT( nextPrivatizable!=NULL, ASSERT_TEXT );
1419 Bin* theBin = (Bin*) nextPrivatizable;
1420 theBin->addPublicFreeListBlock(this);
1421 }
1422 }
1423 STAT_increment(getThreadId(), ThreadCommonCounters, freeToOtherThread);
1424 STAT_increment(ownerTid, getIndex(objectSize), freeByOtherThread);
1425 }
1426
1427 // Make objects freed by other threads available for use again
privatizePublicFreeList(bool reset)1428 void Block::privatizePublicFreeList( bool reset )
1429 {
1430 FreeObject *localPublicFreeList;
1431 // If reset is false, publicFreeList should not be zeroed but set to UNUSABLE
1432 // to properly synchronize with other threads freeing objects to this slab.
1433 const intptr_t endMarker = reset ? 0 : UNUSABLE;
1434
1435 // Only the owner thread may reset the pointer to NULL
1436 MALLOC_ASSERT( isOwnedByCurrentThread() || !reset, ASSERT_TEXT );
1437 #if FREELIST_NONBLOCKING
1438 localPublicFreeList = (FreeObject*)AtomicFetchStore( &publicFreeList, endMarker );
1439 #else
1440 STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList);
1441 {
1442 MallocMutex::scoped_lock scoped_cs(publicFreeListLock);
1443 localPublicFreeList = publicFreeList;
1444 publicFreeList = endMarker;
1445 }
1446 #endif
1447 MALLOC_ITT_SYNC_ACQUIRED(&publicFreeList);
1448 MALLOC_ASSERT( !(reset && isNotForUse(publicFreeList)), ASSERT_TEXT );
1449
1450 // publicFreeList must have been UNUSABLE or valid, but not NULL
1451 MALLOC_ASSERT( localPublicFreeList!=NULL, ASSERT_TEXT );
1452 if( isSolidPtr(localPublicFreeList) ) {
1453 MALLOC_ASSERT( allocatedCount <= (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT );
1454 /* other threads did not change the counter freeing our blocks */
1455 allocatedCount--;
1456 FreeObject *temp = localPublicFreeList;
1457 while( isSolidPtr(temp->next) ){ // the list will end with either NULL or UNUSABLE
1458 temp = temp->next;
1459 allocatedCount--;
1460 MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT );
1461 }
1462 /* merge with local freeList */
1463 temp->next = freeList;
1464 freeList = localPublicFreeList;
1465 STAT_increment(getThreadId(), getIndex(objectSize), allocPrivatized);
1466 }
1467 }
1468
privatizeOrphaned(TLSData * tls,unsigned index)1469 void Block::privatizeOrphaned(TLSData *tls, unsigned index)
1470 {
1471 Bin* bin = tls->bin + index;
1472 STAT_increment(getThreadId(), index, allocBlockPublic);
1473 next = NULL;
1474 previous = NULL;
1475 MALLOC_ASSERT( publicFreeList!=NULL, ASSERT_TEXT );
1476 /* There is not a race here since no other thread owns this block */
1477 markOwned(tls);
1478 // It is safe to change nextPrivatizable, as publicFreeList is not null
1479 MALLOC_ASSERT( isNotForUse(nextPrivatizable), ASSERT_TEXT );
1480 nextPrivatizable = (Block*)bin;
1481 // the next call is required to change publicFreeList to 0
1482 privatizePublicFreeList();
1483 if( empty() ) {
1484 restoreBumpPtr();
1485 } else {
1486 adjustFullness(); // check the block fullness and set isFull
1487 }
1488 MALLOC_ASSERT( !isNotForUse(publicFreeList), ASSERT_TEXT );
1489 }
1490
1491
readyToShare()1492 bool Block::readyToShare()
1493 {
1494 void* oldval;
1495 #if FREELIST_NONBLOCKING
1496 oldval = (void*)AtomicCompareExchange((intptr_t&)publicFreeList, UNUSABLE, 0);
1497 #else
1498 STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList);
1499 {
1500 MallocMutex::scoped_lock scoped_cs(publicFreeListLock);
1501 if ( (oldval=publicFreeList)==NULL )
1502 (intptr_t&)(publicFreeList) = UNUSABLE;
1503 }
1504 #endif
1505 return oldval==NULL;
1506 }
1507
shareOrphaned(intptr_t binTag,unsigned index)1508 void Block::shareOrphaned(intptr_t binTag, unsigned index)
1509 {
1510 MALLOC_ASSERT( binTag, ASSERT_TEXT );
1511 STAT_increment(getThreadId(), index, freeBlockPublic);
1512 markOrphaned();
1513 if ((intptr_t)nextPrivatizable==binTag) {
1514 // First check passed: the block is not in mailbox yet.
1515 // Need to set publicFreeList to non-zero, so other threads
1516 // will not change nextPrivatizable and it can be zeroed.
1517 if ( !readyToShare() ) {
1518 // another thread freed an object; we need to wait until it finishes.
1519 // There is no need for exponential backoff, as the wait here is not for a lock;
1520 // but need to yield, so the thread we wait has a chance to run.
1521 // TODO: add a pause to also be friendly to hyperthreads
1522 int count = 256;
1523 while( (intptr_t)const_cast<Block* volatile &>(nextPrivatizable)==binTag ) {
1524 if (--count==0) {
1525 do_yield();
1526 count = 256;
1527 }
1528 }
1529 }
1530 }
1531 MALLOC_ASSERT( publicFreeList!=NULL, ASSERT_TEXT );
1532 // now it is safe to change our data
1533 previous = NULL;
1534 // it is caller responsibility to ensure that the list of blocks
1535 // formed by nextPrivatizable pointers is kept consistent if required.
1536 // if only called from thread shutdown code, it does not matter.
1537 (intptr_t&)(nextPrivatizable) = UNUSABLE;
1538 }
1539
cleanBlockHeader()1540 void Block::cleanBlockHeader()
1541 {
1542 next = NULL;
1543 previous = NULL;
1544 freeList = NULL;
1545 allocatedCount = 0;
1546 isFull = false;
1547 tlsPtr = NULL;
1548
1549 publicFreeList = NULL;
1550 }
1551
initEmptyBlock(TLSData * tls,size_t size)1552 void Block::initEmptyBlock(TLSData *tls, size_t size)
1553 {
1554 // Having getIndex and getObjectSize called next to each other
1555 // allows better compiler optimization as they basically share the code.
1556 unsigned int index = getIndex(size);
1557 unsigned int objSz = getObjectSize(size);
1558
1559 cleanBlockHeader();
1560 objectSize = objSz;
1561 markOwned(tls);
1562 // bump pointer should be prepared for first allocation - thus mode it down to objectSize
1563 bumpPtr = (FreeObject *)((uintptr_t)this + slabSize - objectSize);
1564
1565 // each block should have the address where the head of the list of "privatizable" blocks is kept
1566 // the only exception is a block for boot strap which is initialized when TLS is yet NULL
1567 nextPrivatizable = tls? (Block*)(tls->bin + index) : NULL;
1568 TRACEF(( "[ScalableMalloc trace] Empty block %p is initialized, owner is %ld, objectSize is %d, bumpPtr is %p\n",
1569 this, tlsPtr ? getThreadId() : -1, objectSize, bumpPtr ));
1570 }
1571
get(TLSData * tls,unsigned int size)1572 Block *OrphanedBlocks::get(TLSData *tls, unsigned int size)
1573 {
1574 // TODO: try to use index from getAllocationBin
1575 unsigned int index = getIndex(size);
1576 Block *block = bins[index].pop();
1577 if (block) {
1578 MALLOC_ITT_SYNC_ACQUIRED(bins+index);
1579 block->privatizeOrphaned(tls, index);
1580 }
1581 return block;
1582 }
1583
put(intptr_t binTag,Block * block)1584 void OrphanedBlocks::put(intptr_t binTag, Block *block)
1585 {
1586 unsigned int index = getIndex(block->getSize());
1587 block->shareOrphaned(binTag, index);
1588 MALLOC_ITT_SYNC_RELEASING(bins+index);
1589 bins[index].push(block);
1590 }
1591
reset()1592 void OrphanedBlocks::reset()
1593 {
1594 for (uint32_t i=0; i<numBlockBinLimit; i++)
1595 new (bins+i) LifoList();
1596 }
1597
cleanup(Backend * backend)1598 bool OrphanedBlocks::cleanup(Backend* backend)
1599 {
1600 bool released = false;
1601 for (uint32_t i=0; i<numBlockBinLimit; i++) {
1602 Block* block = bins[i].grab();
1603 MALLOC_ITT_SYNC_ACQUIRED(bins+i);
1604 while (block) {
1605 Block* next = block->next;
1606 block->privatizePublicFreeList( /*reset=*/false ); // do not set publicFreeList to NULL
1607 if (block->empty()) {
1608 block->reset();
1609 // slab blocks in user's pools do not have valid backRefIdx
1610 if (!backend->inUserPool())
1611 removeBackRef(*(block->getBackRefIdx()));
1612 backend->putSlabBlock(block);
1613 released = true;
1614 } else {
1615 MALLOC_ITT_SYNC_RELEASING(bins+i);
1616 bins[i].push(block);
1617 }
1618 block = next;
1619 }
1620 }
1621 return released;
1622 }
1623
getBlock()1624 FreeBlockPool::ResOfGet FreeBlockPool::getBlock()
1625 {
1626 Block *b = (Block*)AtomicFetchStore(&head, 0);
1627
1628 if (b) {
1629 size--;
1630 Block *newHead = b->next;
1631 lastAccessMiss = false;
1632 FencedStore((intptr_t&)head, (intptr_t)newHead);
1633 } else
1634 lastAccessMiss = true;
1635
1636 return ResOfGet(b, lastAccessMiss);
1637 }
1638
returnBlock(Block * block)1639 void FreeBlockPool::returnBlock(Block *block)
1640 {
1641 MALLOC_ASSERT( size <= POOL_HIGH_MARK, ASSERT_TEXT );
1642 Block *localHead = (Block*)AtomicFetchStore(&head, 0);
1643
1644 if (!localHead)
1645 size = 0; // head was stolen by externalClean, correct size accordingly
1646 else if (size == POOL_HIGH_MARK) {
1647 // release cold blocks and add hot one,
1648 // so keep POOL_LOW_MARK-1 blocks and add new block to head
1649 Block *headToFree = localHead, *helper;
1650 for (int i=0; i<POOL_LOW_MARK-2; i++)
1651 headToFree = headToFree->next;
1652 Block *last = headToFree;
1653 headToFree = headToFree->next;
1654 last->next = NULL;
1655 size = POOL_LOW_MARK-1;
1656 for (Block *currBl = headToFree; currBl; currBl = helper) {
1657 helper = currBl->next;
1658 // slab blocks in user's pools do not have valid backRefIdx
1659 if (!backend->inUserPool())
1660 removeBackRef(currBl->backRefIdx);
1661 backend->putSlabBlock(currBl);
1662 }
1663 }
1664 size++;
1665 block->next = localHead;
1666 FencedStore((intptr_t&)head, (intptr_t)block);
1667 }
1668
externalCleanup()1669 bool FreeBlockPool::externalCleanup()
1670 {
1671 Block *helper;
1672 bool released = false;
1673
1674 for (Block *currBl=(Block*)AtomicFetchStore(&head, 0); currBl; currBl=helper) {
1675 helper = currBl->next;
1676 // slab blocks in user's pools do not have valid backRefIdx
1677 if (!backend->inUserPool())
1678 removeBackRef(currBl->backRefIdx);
1679 backend->putSlabBlock(currBl);
1680 released = true;
1681 }
1682 return released;
1683 }
1684
1685 /* Prepare the block for returning to FreeBlockPool */
reset()1686 void Block::reset()
1687 {
1688 // it is caller's responsibility to ensure no data is lost before calling this
1689 MALLOC_ASSERT( allocatedCount==0, ASSERT_TEXT );
1690 MALLOC_ASSERT( !isSolidPtr(publicFreeList), ASSERT_TEXT );
1691 if (!isStartupAllocObject())
1692 STAT_increment(getThreadId(), getIndex(objectSize), freeBlockBack);
1693
1694 cleanBlockHeader();
1695
1696 nextPrivatizable = NULL;
1697
1698 objectSize = 0;
1699 // for an empty block, bump pointer should point right after the end of the block
1700 bumpPtr = (FreeObject *)((uintptr_t)this + slabSize);
1701 }
1702
setActiveBlock(Block * block)1703 inline void Bin::setActiveBlock (Block *block)
1704 {
1705 // MALLOC_ASSERT( bin, ASSERT_TEXT );
1706 MALLOC_ASSERT( block->isOwnedByCurrentThread(), ASSERT_TEXT );
1707 // it is the caller responsibility to keep bin consistence (i.e. ensure this block is in the bin list)
1708 activeBlk = block;
1709 }
1710
setPreviousBlockActive()1711 inline Block* Bin::setPreviousBlockActive()
1712 {
1713 MALLOC_ASSERT( activeBlk, ASSERT_TEXT );
1714 Block* temp = activeBlk->previous;
1715 if( temp ) {
1716 MALLOC_ASSERT( !(temp->isFull), ASSERT_TEXT );
1717 activeBlk = temp;
1718 }
1719 return temp;
1720 }
1721
isOwnedByCurrentThread() const1722 inline bool Block::isOwnedByCurrentThread() const {
1723 return tlsPtr && ownerTid.isCurrentThreadId();
1724 }
1725
findObjectToFree(const void * object) const1726 FreeObject *Block::findObjectToFree(const void *object) const
1727 {
1728 FreeObject *objectToFree;
1729 // Due to aligned allocations, a pointer passed to scalable_free
1730 // might differ from the address of internally allocated object.
1731 // Small objects however should always be fine.
1732 if (objectSize <= maxSegregatedObjectSize)
1733 objectToFree = (FreeObject*)object;
1734 // "Fitting size" allocations are suspicious if aligned higher than naturally
1735 else {
1736 if ( ! isAligned(object,2*fittingAlignment) )
1737 // TODO: the above check is questionable - it gives false negatives in ~50% cases,
1738 // so might even be slower in average than unconditional use of findAllocatedObject.
1739 // here it should be a "real" object
1740 objectToFree = (FreeObject*)object;
1741 else
1742 // here object can be an aligned address, so applying additional checks
1743 objectToFree = findAllocatedObject(object);
1744 MALLOC_ASSERT( isAligned(objectToFree,fittingAlignment), ASSERT_TEXT );
1745 }
1746 MALLOC_ASSERT( isProperlyPlaced(objectToFree), ASSERT_TEXT );
1747
1748 return objectToFree;
1749 }
1750
release()1751 void TLSData::release()
1752 {
1753 memPool->extMemPool.allLocalCaches.unregisterThread(this);
1754 externalCleanup(/*cleanOnlyUnused=*/false, /*cleanBins=*/false);
1755
1756 for (unsigned index = 0; index < numBlockBins; index++) {
1757 Block *activeBlk = bin[index].getActiveBlock();
1758 if (!activeBlk)
1759 continue;
1760 Block *threadlessBlock = activeBlk->previous;
1761 while (threadlessBlock) {
1762 Block *threadBlock = threadlessBlock->previous;
1763 if (threadlessBlock->empty()) {
1764 /* we destroy the thread, so not use its block pool */
1765 memPool->returnEmptyBlock(threadlessBlock, /*poolTheBlock=*/false);
1766 } else {
1767 memPool->extMemPool.orphanedBlocks.put(intptr_t(bin+index), threadlessBlock);
1768 }
1769 threadlessBlock = threadBlock;
1770 }
1771 threadlessBlock = activeBlk;
1772 while (threadlessBlock) {
1773 Block *threadBlock = threadlessBlock->next;
1774 if (threadlessBlock->empty()) {
1775 /* we destroy the thread, so not use its block pool */
1776 memPool->returnEmptyBlock(threadlessBlock, /*poolTheBlock=*/false);
1777 } else {
1778 memPool->extMemPool.orphanedBlocks.put(intptr_t(bin+index), threadlessBlock);
1779 }
1780 threadlessBlock = threadBlock;
1781 }
1782 bin[index].resetActiveBlock();
1783 }
1784 }
1785
1786
1787 #if MALLOC_CHECK_RECURSION
1788 // TODO: Use dedicated heap for this
1789
1790 /*
1791 * It's a special kind of allocation that can be used when malloc is
1792 * not available (either during startup or when malloc was already called and
1793 * we are, say, inside pthread_setspecific's call).
1794 * Block can contain objects of different sizes,
1795 * allocations are performed by moving bump pointer and increasing of object counter,
1796 * releasing is done via counter of objects allocated in the block
1797 * or moving bump pointer if releasing object is on a bound.
1798 * TODO: make bump pointer to grow to the same backward direction as all the others.
1799 */
1800
1801 class StartupBlock : public Block {
availableSize() const1802 size_t availableSize() const {
1803 return slabSize - ((uintptr_t)bumpPtr - (uintptr_t)this);
1804 }
1805 static StartupBlock *getBlock();
1806 public:
1807 static FreeObject *allocate(size_t size);
msize(void * ptr)1808 static size_t msize(void *ptr) { return *((size_t*)ptr - 1); }
1809 void free(void *ptr);
1810 };
1811
1812 static MallocMutex startupMallocLock;
1813 static StartupBlock *firstStartupBlock;
1814
getBlock()1815 StartupBlock *StartupBlock::getBlock()
1816 {
1817 BackRefIdx backRefIdx = BackRefIdx::newBackRef(/*largeObj=*/false);
1818 if (backRefIdx.isInvalid()) return NULL;
1819
1820 StartupBlock *block = static_cast<StartupBlock*>(
1821 defaultMemPool->extMemPool.backend.getSlabBlock(1));
1822 if (!block) return NULL;
1823
1824 block->cleanBlockHeader();
1825 setBackRef(backRefIdx, block);
1826 block->backRefIdx = backRefIdx;
1827 // use startupAllocObjSizeMark to mark objects from startup block marker
1828 block->objectSize = startupAllocObjSizeMark;
1829 block->bumpPtr = (FreeObject *)((uintptr_t)block + sizeof(StartupBlock));
1830 return block;
1831 }
1832
allocate(size_t size)1833 FreeObject *StartupBlock::allocate(size_t size)
1834 {
1835 FreeObject *result;
1836 StartupBlock *newBlock = NULL;
1837 bool newBlockUnused = false;
1838
1839 /* Objects must be aligned on their natural bounds,
1840 and objects bigger than word on word's bound. */
1841 size = alignUp(size, sizeof(size_t));
1842 // We need size of an object to implement msize.
1843 size_t reqSize = size + sizeof(size_t);
1844 // speculatively allocates newBlock to try avoid allocation while holding lock
1845 /* TODO: The function is called when malloc nested call is detected,
1846 so simultaneous usage from different threads seems unlikely.
1847 If pre-allocation is found useless, the code might be simplified. */
1848 if (!firstStartupBlock || firstStartupBlock->availableSize() < reqSize) {
1849 newBlock = StartupBlock::getBlock();
1850 if (!newBlock) return NULL;
1851 }
1852 {
1853 MallocMutex::scoped_lock scoped_cs(startupMallocLock);
1854 // Re-check whether we need a new block (conditions might have changed)
1855 if (!firstStartupBlock || firstStartupBlock->availableSize() < reqSize) {
1856 if (!newBlock) {
1857 newBlock = StartupBlock::getBlock();
1858 if (!newBlock) return NULL;
1859 }
1860 newBlock->next = (Block*)firstStartupBlock;
1861 if (firstStartupBlock)
1862 firstStartupBlock->previous = (Block*)newBlock;
1863 firstStartupBlock = newBlock;
1864 } else
1865 newBlockUnused = true;
1866 result = firstStartupBlock->bumpPtr;
1867 firstStartupBlock->allocatedCount++;
1868 firstStartupBlock->bumpPtr =
1869 (FreeObject *)((uintptr_t)firstStartupBlock->bumpPtr + reqSize);
1870 }
1871 if (newBlock && newBlockUnused)
1872 defaultMemPool->returnEmptyBlock(newBlock, /*poolTheBlock=*/false);
1873
1874 // keep object size at the negative offset
1875 *((size_t*)result) = size;
1876 return (FreeObject*)((size_t*)result+1);
1877 }
1878
free(void * ptr)1879 void StartupBlock::free(void *ptr)
1880 {
1881 Block* blockToRelease = NULL;
1882 {
1883 MallocMutex::scoped_lock scoped_cs(startupMallocLock);
1884
1885 MALLOC_ASSERT(firstStartupBlock, ASSERT_TEXT);
1886 MALLOC_ASSERT(startupAllocObjSizeMark==objectSize
1887 && allocatedCount>0, ASSERT_TEXT);
1888 MALLOC_ASSERT((uintptr_t)ptr>=(uintptr_t)this+sizeof(StartupBlock)
1889 && (uintptr_t)ptr+StartupBlock::msize(ptr)<=(uintptr_t)this+slabSize,
1890 ASSERT_TEXT);
1891 if (0 == --allocatedCount) {
1892 if (this == firstStartupBlock)
1893 firstStartupBlock = (StartupBlock*)firstStartupBlock->next;
1894 if (previous)
1895 previous->next = next;
1896 if (next)
1897 next->previous = previous;
1898 blockToRelease = this;
1899 } else if ((uintptr_t)ptr + StartupBlock::msize(ptr) == (uintptr_t)bumpPtr) {
1900 // last object in the block released
1901 FreeObject *newBump = (FreeObject*)((size_t*)ptr - 1);
1902 MALLOC_ASSERT((uintptr_t)newBump>(uintptr_t)this+sizeof(StartupBlock),
1903 ASSERT_TEXT);
1904 bumpPtr = newBump;
1905 }
1906 }
1907 if (blockToRelease) {
1908 blockToRelease->previous = blockToRelease->next = NULL;
1909 defaultMemPool->returnEmptyBlock(blockToRelease, /*poolTheBlock=*/false);
1910 }
1911 }
1912
1913 #endif /* MALLOC_CHECK_RECURSION */
1914
1915 /********* End thread related code *************/
1916
1917 /********* Library initialization *************/
1918
1919 //! Value indicating the state of initialization.
1920 /* 0 = initialization not started.
1921 * 1 = initialization started but not finished.
1922 * 2 = initialization finished.
1923 * In theory, we only need values 0 and 2. But value 1 is nonetheless
1924 * useful for detecting errors in the double-check pattern.
1925 */
1926 static intptr_t mallocInitialized; // implicitly initialized to 0
1927 static MallocMutex initMutex;
1928
1929 /** The leading "\0" is here so that applying "strings" to the binary
1930 delivers a clean result. */
1931 static char VersionString[] = "\0" TBBMALLOC_VERSION_STRINGS;
1932
1933 #if USE_PTHREAD && (__TBB_SOURCE_DIRECTLY_INCLUDED || __TBB_USE_DLOPEN_REENTRANCY_WORKAROUND)
1934
1935 /* Decrease race interval between dynamic library unloading and pthread key
1936 destructor. Protect only Pthreads with supported unloading. */
1937 class ShutdownSync {
1938 /* flag is the number of threads in pthread key dtor body
1939 (i.e., between threadDtorStart() and threadDtorDone())
1940 or the signal to skip dtor, if flag < 0 */
1941 intptr_t flag;
1942 static const intptr_t skipDtor = INTPTR_MIN/2;
1943 public:
init()1944 void init() { flag = 0; }
1945 /* Suppose that 2*abs(skipDtor) or more threads never call threadDtorStart()
1946 simultaneously, so flag never becomes negative because of that. */
threadDtorStart()1947 bool threadDtorStart() {
1948 if (flag < 0)
1949 return false;
1950 if (AtomicIncrement(flag) <= 0) { // note that new value returned
1951 AtomicAdd(flag, -1); // flag is spoiled by us, restore it
1952 return false;
1953 }
1954 return true;
1955 }
threadDtorDone()1956 void threadDtorDone() {
1957 AtomicAdd(flag, -1);
1958 }
processExit()1959 void processExit() {
1960 if (AtomicAdd(flag, skipDtor) != 0)
1961 SpinWaitUntilEq(flag, skipDtor);
1962 }
1963 };
1964
1965 #else
1966
1967 class ShutdownSync {
1968 public:
init()1969 void init() { }
threadDtorStart()1970 bool threadDtorStart() { return true; }
threadDtorDone()1971 void threadDtorDone() { }
processExit()1972 void processExit() { }
1973 };
1974
1975 #endif // USE_PTHREAD && (__TBB_SOURCE_DIRECTLY_INCLUDED || __TBB_USE_DLOPEN_REENTRANCY_WORKAROUND)
1976
1977 static ShutdownSync shutdownSync;
1978
isMallocInitialized()1979 inline bool isMallocInitialized() {
1980 // Load must have acquire fence; otherwise thread taking "initialized" path
1981 // might perform textually later loads *before* mallocInitialized becomes 2.
1982 return 2 == FencedLoad(mallocInitialized);
1983 }
1984
isMallocInitializedExt()1985 bool isMallocInitializedExt() {
1986 return isMallocInitialized();
1987 }
1988
1989 /* Caller is responsible for ensuring this routine is called exactly once. */
MallocInitializeITT()1990 extern "C" void MallocInitializeITT() {
1991 #if DO_ITT_NOTIFY
1992 if (!usedBySrcIncluded)
1993 tbb::internal::__TBB_load_ittnotify();
1994 #endif
1995 }
1996
initDefaultPool()1997 void MemoryPool::initDefaultPool() {
1998 hugePages.init();
1999 }
2000
2001 /*
2002 * Allocator initialization routine;
2003 * it is called lazily on the very first scalable_malloc call.
2004 */
initMemoryManager()2005 static bool initMemoryManager()
2006 {
2007 TRACEF(( "[ScalableMalloc trace] sizeof(Block) is %d (expected 128); sizeof(uintptr_t) is %d\n",
2008 sizeof(Block), sizeof(uintptr_t) ));
2009 MALLOC_ASSERT( 2*blockHeaderAlignment == sizeof(Block), ASSERT_TEXT );
2010 MALLOC_ASSERT( sizeof(FreeObject) == sizeof(void*), ASSERT_TEXT );
2011 MALLOC_ASSERT( isAligned(defaultMemPool, sizeof(intptr_t)),
2012 "Memory pool must be void*-aligned for atomic to work over aligned arguments.");
2013
2014 #if USE_WINTHREAD
2015 const size_t granularity = 64*1024; // granulatity of VirtualAlloc
2016 #else
2017 // POSIX.1-2001-compliant way to get page size
2018 const size_t granularity = sysconf(_SC_PAGESIZE);
2019 #endif
2020 if (!defaultMemPool) {
2021 // Do not rely on static constructors and do the assignment in case
2022 // of library static section not initialized at this call yet.
2023 defaultMemPool = (MemoryPool*)defaultMemPool_space;
2024 }
2025 bool initOk = defaultMemPool->
2026 extMemPool.init(0, NULL, NULL, granularity,
2027 /*keepAllMemory=*/false, /*fixedPool=*/false);
2028 // TODO: extMemPool.init() to not allocate memory
2029 if (!initOk || !initBackRefMaster(&defaultMemPool->extMemPool.backend) || !ThreadId::init())
2030 return false;
2031 MemoryPool::initDefaultPool();
2032 // init() is required iff initMemoryManager() is called
2033 // after mallocProcessShutdownNotification()
2034 shutdownSync.init();
2035 #if COLLECT_STATISTICS
2036 initStatisticsCollection();
2037 #endif
2038 return true;
2039 }
2040
GetBoolEnvironmentVariable(const char * name)2041 static bool GetBoolEnvironmentVariable(const char* name) {
2042 return tbb::internal::GetBoolEnvironmentVariable(name);
2043 }
2044
2045 //! Ensures that initMemoryManager() is called once and only once.
2046 /** Does not return until initMemoryManager() has been completed by a thread.
2047 There is no need to call this routine if mallocInitialized==2 . */
doInitialization()2048 static bool doInitialization()
2049 {
2050 MallocMutex::scoped_lock lock( initMutex );
2051 if (mallocInitialized!=2) {
2052 MALLOC_ASSERT( mallocInitialized==0, ASSERT_TEXT );
2053 mallocInitialized = 1;
2054 RecursiveMallocCallProtector scoped;
2055 if (!initMemoryManager()) {
2056 mallocInitialized = 0; // restore and out
2057 return false;
2058 }
2059 #ifdef MALLOC_EXTRA_INITIALIZATION
2060 MALLOC_EXTRA_INITIALIZATION;
2061 #endif
2062 #if MALLOC_CHECK_RECURSION
2063 RecursiveMallocCallProtector::detectNaiveOverload();
2064 #endif
2065 MALLOC_ASSERT( mallocInitialized==1, ASSERT_TEXT );
2066 // Store must have release fence, otherwise mallocInitialized==2
2067 // might become remotely visible before side effects of
2068 // initMemoryManager() become remotely visible.
2069 FencedStore( mallocInitialized, 2 );
2070 if( GetBoolEnvironmentVariable("TBB_VERSION") ) {
2071 fputs(VersionString+1,stderr);
2072 hugePages.printStatus();
2073 }
2074 }
2075 /* It can't be 0 or I would have initialized it */
2076 MALLOC_ASSERT( mallocInitialized==2, ASSERT_TEXT );
2077 return true;
2078 }
2079
2080 /********* End library initialization *************/
2081
2082 /********* The malloc show begins *************/
2083
2084
allocateFromFreeList()2085 FreeObject *Block::allocateFromFreeList()
2086 {
2087 FreeObject *result;
2088
2089 if (!freeList) return NULL;
2090
2091 result = freeList;
2092 MALLOC_ASSERT( result, ASSERT_TEXT );
2093
2094 freeList = result->next;
2095 MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT );
2096 allocatedCount++;
2097 STAT_increment(getThreadId(), getIndex(objectSize), allocFreeListUsed);
2098
2099 return result;
2100 }
2101
allocateFromBumpPtr()2102 FreeObject *Block::allocateFromBumpPtr()
2103 {
2104 FreeObject *result = bumpPtr;
2105 if (result) {
2106 bumpPtr = (FreeObject *) ((uintptr_t) bumpPtr - objectSize);
2107 if ( (uintptr_t)bumpPtr < (uintptr_t)this+sizeof(Block) ) {
2108 bumpPtr = NULL;
2109 }
2110 MALLOC_ASSERT( allocatedCount < (slabSize-sizeof(Block))/objectSize, ASSERT_TEXT );
2111 allocatedCount++;
2112 STAT_increment(getThreadId(), getIndex(objectSize), allocBumpPtrUsed);
2113 }
2114 return result;
2115 }
2116
allocate()2117 inline FreeObject* Block::allocate()
2118 {
2119 MALLOC_ASSERT( isOwnedByCurrentThread(), ASSERT_TEXT );
2120
2121 /* for better cache locality, first looking in the free list. */
2122 if ( FreeObject *result = allocateFromFreeList() ) {
2123 return result;
2124 }
2125 MALLOC_ASSERT( !freeList, ASSERT_TEXT );
2126
2127 /* if free list is empty, try thread local bump pointer allocation. */
2128 if ( FreeObject *result = allocateFromBumpPtr() ) {
2129 return result;
2130 }
2131 MALLOC_ASSERT( !bumpPtr, ASSERT_TEXT );
2132
2133 /* the block is considered full. */
2134 isFull = true;
2135 return NULL;
2136 }
2137
findObjectSize(void * object) const2138 size_t Block::findObjectSize(void *object) const
2139 {
2140 size_t blSize = getSize();
2141 #if MALLOC_CHECK_RECURSION
2142 // Currently, there is no aligned allocations from startup blocks,
2143 // so we can return just StartupBlock::msize().
2144 // TODO: This must be extended if we add aligned allocation from startup blocks.
2145 if (!blSize)
2146 return StartupBlock::msize(object);
2147 #endif
2148 // object can be aligned, so real size can be less than block's
2149 size_t size =
2150 blSize - ((uintptr_t)object - (uintptr_t)findObjectToFree(object));
2151 MALLOC_ASSERT(size>0 && size<minLargeObjectSize, ASSERT_TEXT);
2152 return size;
2153 }
2154
moveBlockToFront(Block * block)2155 void Bin::moveBlockToFront(Block *block)
2156 {
2157 /* move the block to the front of the bin */
2158 if (block == activeBlk) return;
2159 outofTLSBin(block);
2160 pushTLSBin(block);
2161 }
2162
processEmptyBlock(Block * block,bool poolTheBlock)2163 void Bin::processEmptyBlock(Block *block, bool poolTheBlock)
2164 {
2165 if (block != activeBlk) {
2166 /* We are not using this block; return it to the pool */
2167 outofTLSBin(block);
2168 block->getMemPool()->returnEmptyBlock(block, poolTheBlock);
2169 } else {
2170 /* all objects are free - let's restore the bump pointer */
2171 block->restoreBumpPtr();
2172 }
2173 }
2174
2175 template<int LOW_MARK, int HIGH_MARK>
put(LargeMemoryBlock * object,ExtMemoryPool * extMemPool)2176 bool LocalLOCImpl<LOW_MARK, HIGH_MARK>::put(LargeMemoryBlock *object, ExtMemoryPool *extMemPool)
2177 {
2178 const size_t size = object->unalignedSize;
2179 // not spoil cache with too large object, that can cause its total cleanup
2180 if (size > MAX_TOTAL_SIZE)
2181 return false;
2182 LargeMemoryBlock *localHead = (LargeMemoryBlock*)AtomicFetchStore(&head, 0);
2183
2184 object->prev = NULL;
2185 object->next = localHead;
2186 if (localHead)
2187 localHead->prev = object;
2188 else {
2189 // those might not be cleaned during local cache stealing, correct them
2190 totalSize = 0;
2191 numOfBlocks = 0;
2192 tail = object;
2193 }
2194 localHead = object;
2195 totalSize += size;
2196 numOfBlocks++;
2197 // must meet both size and number of cached objects constrains
2198 if (totalSize > MAX_TOTAL_SIZE || numOfBlocks >= HIGH_MARK) {
2199 // scanning from tail until meet conditions
2200 while (totalSize > MAX_TOTAL_SIZE || numOfBlocks > LOW_MARK) {
2201 totalSize -= tail->unalignedSize;
2202 numOfBlocks--;
2203 tail = tail->prev;
2204 }
2205 LargeMemoryBlock *headToRelease = tail->next;
2206 tail->next = NULL;
2207
2208 extMemPool->freeLargeObjectList(headToRelease);
2209 }
2210
2211 FencedStore((intptr_t&)head, (intptr_t)localHead);
2212 return true;
2213 }
2214
2215 template<int LOW_MARK, int HIGH_MARK>
get(size_t size)2216 LargeMemoryBlock *LocalLOCImpl<LOW_MARK, HIGH_MARK>::get(size_t size)
2217 {
2218 LargeMemoryBlock *localHead, *res=NULL;
2219
2220 if (size > MAX_TOTAL_SIZE)
2221 return NULL;
2222
2223 if (!head || (localHead = (LargeMemoryBlock*)AtomicFetchStore(&head, 0)) == NULL) {
2224 // do not restore totalSize, numOfBlocks and tail at this point,
2225 // as they are used only in put(), where they must be restored
2226 return NULL;
2227 }
2228
2229 for (LargeMemoryBlock *curr = localHead; curr; curr=curr->next) {
2230 if (curr->unalignedSize == size) {
2231 res = curr;
2232 if (curr->next)
2233 curr->next->prev = curr->prev;
2234 else
2235 tail = curr->prev;
2236 if (curr != localHead)
2237 curr->prev->next = curr->next;
2238 else
2239 localHead = curr->next;
2240 totalSize -= size;
2241 numOfBlocks--;
2242 break;
2243 }
2244 }
2245 FencedStore((intptr_t&)head, (intptr_t)localHead);
2246 return res;
2247 }
2248
2249 template<int LOW_MARK, int HIGH_MARK>
externalCleanup(ExtMemoryPool * extMemPool)2250 bool LocalLOCImpl<LOW_MARK, HIGH_MARK>::externalCleanup(ExtMemoryPool *extMemPool)
2251 {
2252 if (LargeMemoryBlock *localHead = (LargeMemoryBlock*)AtomicFetchStore(&head, 0)) {
2253 extMemPool->freeLargeObjectList(localHead);
2254 return true;
2255 }
2256 return false;
2257 }
2258
getFromLLOCache(TLSData * tls,size_t size,size_t alignment)2259 void *MemoryPool::getFromLLOCache(TLSData* tls, size_t size, size_t alignment)
2260 {
2261 LargeMemoryBlock *lmb = NULL;
2262
2263 size_t headersSize = sizeof(LargeMemoryBlock)+sizeof(LargeObjectHdr);
2264 size_t allocationSize = LargeObjectCache::alignToBin(size+headersSize+alignment);
2265 if (allocationSize < size) // allocationSize is wrapped around after alignToBin
2266 return NULL;
2267 MALLOC_ASSERT(allocationSize >= alignment, "Overflow must be checked before.");
2268
2269 if (tls) {
2270 tls->markUsed();
2271 lmb = tls->lloc.get(allocationSize);
2272 }
2273 if (!lmb)
2274 lmb = extMemPool.mallocLargeObject(this, allocationSize);
2275
2276 if (lmb) {
2277 // doing shuffle we suppose that alignment offset guarantees
2278 // that different cache lines are in use
2279 MALLOC_ASSERT(alignment >= estimatedCacheLineSize, ASSERT_TEXT);
2280
2281 void *alignedArea = (void*)alignUp((uintptr_t)lmb+headersSize, alignment);
2282 uintptr_t alignedRight =
2283 alignDown((uintptr_t)lmb+lmb->unalignedSize - size, alignment);
2284 // Has some room to shuffle object between cache lines?
2285 // Note that alignedRight and alignedArea are aligned at alignment.
2286 unsigned ptrDelta = alignedRight - (uintptr_t)alignedArea;
2287 if (ptrDelta && tls) { // !tls is cold path
2288 // for the hot path of alignment==estimatedCacheLineSize,
2289 // allow compilers to use shift for division
2290 // (since estimatedCacheLineSize is a power-of-2 constant)
2291 unsigned numOfPossibleOffsets = alignment == estimatedCacheLineSize?
2292 ptrDelta / estimatedCacheLineSize :
2293 ptrDelta / alignment;
2294 unsigned myCacheIdx = ++tls->currCacheIdx;
2295 unsigned offset = myCacheIdx % numOfPossibleOffsets;
2296
2297 // Move object to a cache line with an offset that is different from
2298 // previous allocation. This supposedly allows us to use cache
2299 // associativity more efficiently.
2300 alignedArea = (void*)((uintptr_t)alignedArea + offset*alignment);
2301 }
2302 MALLOC_ASSERT((uintptr_t)lmb+lmb->unalignedSize >=
2303 (uintptr_t)alignedArea+size, "Object doesn't fit the block.");
2304 LargeObjectHdr *header = (LargeObjectHdr*)alignedArea-1;
2305 header->memoryBlock = lmb;
2306 header->backRefIdx = lmb->backRefIdx;
2307 setBackRef(header->backRefIdx, header);
2308
2309 lmb->objectSize = size;
2310
2311 MALLOC_ASSERT( isLargeObject<unknownMem>(alignedArea), ASSERT_TEXT );
2312 MALLOC_ASSERT( isAligned(alignedArea, alignment), ASSERT_TEXT );
2313
2314 return alignedArea;
2315 }
2316 return NULL;
2317 }
2318
putToLLOCache(TLSData * tls,void * object)2319 void MemoryPool::putToLLOCache(TLSData *tls, void *object)
2320 {
2321 LargeObjectHdr *header = (LargeObjectHdr*)object - 1;
2322 // overwrite backRefIdx to simplify double free detection
2323 header->backRefIdx = BackRefIdx();
2324
2325 if (tls) {
2326 tls->markUsed();
2327 if (tls->lloc.put(header->memoryBlock, &extMemPool))
2328 return;
2329 }
2330 extMemPool.freeLargeObject(header->memoryBlock);
2331 }
2332
2333 /*
2334 * All aligned allocations fall into one of the following categories:
2335 * 1. if both request size and alignment are <= maxSegregatedObjectSize,
2336 * we just align the size up, and request this amount, because for every size
2337 * aligned to some power of 2, the allocated object is at least that aligned.
2338 * 2. for size<minLargeObjectSize, check if already guaranteed fittingAlignment is enough.
2339 * 3. if size+alignment<minLargeObjectSize, we take an object of fittingSizeN and align
2340 * its address up; given such pointer, scalable_free could find the real object.
2341 * Wrapping of size+alignment is impossible because maximal allowed
2342 * alignment plus minLargeObjectSize can't lead to wrapping.
2343 * 4. otherwise, aligned large object is allocated.
2344 */
allocateAligned(MemoryPool * memPool,size_t size,size_t alignment)2345 static void *allocateAligned(MemoryPool *memPool, size_t size, size_t alignment)
2346 {
2347 MALLOC_ASSERT( isPowerOfTwo(alignment), ASSERT_TEXT );
2348
2349 if (!isMallocInitialized())
2350 if (!doInitialization())
2351 return NULL;
2352
2353 void *result;
2354 if (size<=maxSegregatedObjectSize && alignment<=maxSegregatedObjectSize)
2355 result = internalPoolMalloc(memPool, alignUp(size? size: sizeof(size_t), alignment));
2356 else if (size<minLargeObjectSize) {
2357 if (alignment<=fittingAlignment)
2358 result = internalPoolMalloc(memPool, size);
2359 else if (size+alignment < minLargeObjectSize) {
2360 void *unaligned = internalPoolMalloc(memPool, size+alignment);
2361 if (!unaligned) return NULL;
2362 result = alignUp(unaligned, alignment);
2363 } else
2364 goto LargeObjAlloc;
2365 } else {
2366 LargeObjAlloc:
2367 TLSData *tls = memPool->getTLS(/*create=*/true);
2368 // take into account only alignment that are higher then natural
2369 result =
2370 memPool->getFromLLOCache(tls, size, largeObjectAlignment>alignment?
2371 largeObjectAlignment: alignment);
2372 }
2373
2374 MALLOC_ASSERT( isAligned(result, alignment), ASSERT_TEXT );
2375 return result;
2376 }
2377
reallocAligned(MemoryPool * memPool,void * ptr,size_t newSize,size_t alignment=0)2378 static void *reallocAligned(MemoryPool *memPool, void *ptr,
2379 size_t newSize, size_t alignment = 0)
2380 {
2381 void *result;
2382 size_t copySize;
2383
2384 if (isLargeObject<ourMem>(ptr)) {
2385 LargeMemoryBlock* lmb = ((LargeObjectHdr *)ptr - 1)->memoryBlock;
2386 copySize = lmb->unalignedSize-((uintptr_t)ptr-(uintptr_t)lmb);
2387
2388 // Apply different strategies if size decreases
2389 if (newSize <= copySize && (0 == alignment || isAligned(ptr, alignment))) {
2390
2391 // For huge objects (that do not fit in backend cache), keep the same space unless
2392 // the new size is at least twice smaller
2393 bool isMemoryBlockHuge = copySize > memPool->extMemPool.backend.getMaxBinnedSize();
2394 size_t threshold = isMemoryBlockHuge ? copySize / 2 : 0;
2395 if (newSize > threshold) {
2396 lmb->objectSize = newSize;
2397 return ptr;
2398 }
2399 // TODO: For large objects suitable for the backend cache,
2400 // split out the excessive part and put it to the backend.
2401 }
2402 // Reallocate for real
2403 copySize = lmb->objectSize;
2404 #if BACKEND_HAS_MREMAP
2405 if (void *r = memPool->extMemPool.remap(ptr, copySize, newSize,
2406 alignment < largeObjectAlignment ? largeObjectAlignment : alignment))
2407 return r;
2408 #endif
2409 result = alignment ? allocateAligned(memPool, newSize, alignment) :
2410 internalPoolMalloc(memPool, newSize);
2411
2412 } else {
2413 Block* block = (Block *)alignDown(ptr, slabSize);
2414 copySize = block->findObjectSize(ptr);
2415
2416 // TODO: Move object to another bin if size decreases and the current bin is "empty enough".
2417 // Currently, in case of size decreasing, old pointer is returned
2418 if (newSize <= copySize && (0==alignment || isAligned(ptr, alignment))) {
2419 return ptr;
2420 } else {
2421 result = alignment ? allocateAligned(memPool, newSize, alignment) :
2422 internalPoolMalloc(memPool, newSize);
2423 }
2424 }
2425 if (result) {
2426 memcpy(result, ptr, copySize < newSize ? copySize : newSize);
2427 internalPoolFree(memPool, ptr, 0);
2428 }
2429 return result;
2430 }
2431
2432 /* A predicate checks if an object is properly placed inside its block */
isProperlyPlaced(const void * object) const2433 inline bool Block::isProperlyPlaced(const void *object) const
2434 {
2435 return 0 == ((uintptr_t)this + slabSize - (uintptr_t)object) % objectSize;
2436 }
2437
2438 /* Finds the real object inside the block */
findAllocatedObject(const void * address) const2439 FreeObject *Block::findAllocatedObject(const void *address) const
2440 {
2441 // calculate offset from the end of the block space
2442 uint16_t offset = (uintptr_t)this + slabSize - (uintptr_t)address;
2443 MALLOC_ASSERT( offset<=slabSize-sizeof(Block), ASSERT_TEXT );
2444 // find offset difference from a multiple of allocation size
2445 offset %= objectSize;
2446 // and move the address down to where the real object starts.
2447 return (FreeObject*)((uintptr_t)address - (offset? objectSize-offset: 0));
2448 }
2449
2450 /*
2451 * Bad dereference caused by a foreign pointer is possible only here, not earlier in call chain.
2452 * Separate function isolates SEH code, as it has bad influence on compiler optimization.
2453 */
safer_dereference(const BackRefIdx * ptr)2454 static inline BackRefIdx safer_dereference (const BackRefIdx *ptr)
2455 {
2456 BackRefIdx id;
2457 #if _MSC_VER
2458 __try {
2459 #endif
2460 id = *ptr;
2461 #if _MSC_VER
2462 } __except( GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION?
2463 EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH ) {
2464 id = BackRefIdx();
2465 }
2466 #endif
2467 return id;
2468 }
2469
2470 template<MemoryOrigin memOrigin>
isLargeObject(void * object)2471 bool isLargeObject(void *object)
2472 {
2473 if (!isAligned(object, largeObjectAlignment))
2474 return false;
2475 LargeObjectHdr *header = (LargeObjectHdr*)object - 1;
2476 BackRefIdx idx = (memOrigin == unknownMem) ?
2477 safer_dereference(&header->backRefIdx) : header->backRefIdx;
2478
2479 return idx.isLargeObject()
2480 // in valid LargeObjectHdr memoryBlock is not NULL
2481 && header->memoryBlock
2482 // in valid LargeObjectHdr memoryBlock points somewhere before header
2483 // TODO: more strict check
2484 && (uintptr_t)header->memoryBlock < (uintptr_t)header
2485 && getBackRef(idx) == header;
2486 }
2487
isSmallObject(void * ptr)2488 static inline bool isSmallObject (void *ptr)
2489 {
2490 Block* expectedBlock = (Block*)alignDown(ptr, slabSize);
2491 const BackRefIdx* idx = expectedBlock->getBackRefIdx();
2492
2493 bool isSmall = expectedBlock == getBackRef(safer_dereference(idx));
2494 if (isSmall)
2495 expectedBlock->checkFreePrecond(ptr);
2496 return isSmall;
2497 }
2498
2499 /**** Check if an object was allocated by scalable_malloc ****/
isRecognized(void * ptr)2500 static inline bool isRecognized (void* ptr)
2501 {
2502 return defaultMemPool->extMemPool.backend.ptrCanBeValid(ptr) &&
2503 (isLargeObject<unknownMem>(ptr) || isSmallObject(ptr));
2504 }
2505
freeSmallObject(void * object)2506 static inline void freeSmallObject(void *object)
2507 {
2508 /* mask low bits to get the block */
2509 Block *block = (Block *)alignDown(object, slabSize);
2510 block->checkFreePrecond(object);
2511
2512 #if MALLOC_CHECK_RECURSION
2513 if (block->isStartupAllocObject()) {
2514 ((StartupBlock *)block)->free(object);
2515 return;
2516 }
2517 #endif
2518 if (block->isOwnedByCurrentThread()) {
2519 block->freeOwnObject(object);
2520 } else { /* Slower path to add to the shared list, the allocatedCount is updated by the owner thread in malloc. */
2521 FreeObject *objectToFree = block->findObjectToFree(object);
2522 block->freePublicObject(objectToFree);
2523 }
2524 }
2525
internalPoolMalloc(MemoryPool * memPool,size_t size)2526 static void *internalPoolMalloc(MemoryPool* memPool, size_t size)
2527 {
2528 Bin* bin;
2529 Block * mallocBlock;
2530
2531 if (!memPool) return NULL;
2532
2533 if (!size) size = sizeof(size_t);
2534
2535 TLSData *tls = memPool->getTLS(/*create=*/true);
2536
2537 /* Allocate a large object */
2538 if (size >= minLargeObjectSize)
2539 return memPool->getFromLLOCache(tls, size, largeObjectAlignment);
2540
2541 if (!tls) return NULL;
2542
2543 tls->markUsed();
2544 /*
2545 * Get an element in thread-local array corresponding to the given size;
2546 * It keeps ptr to the active block for allocations of this size
2547 */
2548 bin = tls->getAllocationBin(size);
2549 if ( !bin ) return NULL;
2550
2551 /* Get a block to try to allocate in. */
2552 for( mallocBlock = bin->getActiveBlock(); mallocBlock;
2553 mallocBlock = bin->setPreviousBlockActive() ) // the previous block should be empty enough
2554 {
2555 if( FreeObject *result = mallocBlock->allocate() )
2556 return result;
2557 }
2558
2559 /*
2560 * else privatize publicly freed objects in some block and allocate from it
2561 */
2562 mallocBlock = bin->getPrivatizedFreeListBlock();
2563 if (mallocBlock) {
2564 MALLOC_ASSERT( mallocBlock->freeListNonNull(), ASSERT_TEXT );
2565 if ( FreeObject *result = mallocBlock->allocateFromFreeList() )
2566 return result;
2567 /* Else something strange happened, need to retry from the beginning; */
2568 TRACEF(( "[ScalableMalloc trace] Something is wrong: no objects in public free list; reentering.\n" ));
2569 return internalPoolMalloc(memPool, size);
2570 }
2571
2572 /*
2573 * no suitable own blocks, try to get a partial block that some other thread has discarded.
2574 */
2575 mallocBlock = memPool->extMemPool.orphanedBlocks.get(tls, size);
2576 while (mallocBlock) {
2577 bin->pushTLSBin(mallocBlock);
2578 bin->setActiveBlock(mallocBlock); // TODO: move under the below condition?
2579 if( FreeObject *result = mallocBlock->allocate() )
2580 return result;
2581 mallocBlock = memPool->extMemPool.orphanedBlocks.get(tls, size);
2582 }
2583
2584 /*
2585 * else try to get a new empty block
2586 */
2587 mallocBlock = memPool->getEmptyBlock(size);
2588 if (mallocBlock) {
2589 bin->pushTLSBin(mallocBlock);
2590 bin->setActiveBlock(mallocBlock);
2591 if( FreeObject *result = mallocBlock->allocate() )
2592 return result;
2593 /* Else something strange happened, need to retry from the beginning; */
2594 TRACEF(( "[ScalableMalloc trace] Something is wrong: no objects in empty block; reentering.\n" ));
2595 return internalPoolMalloc(memPool, size);
2596 }
2597 /*
2598 * else nothing works so return NULL
2599 */
2600 TRACEF(( "[ScalableMalloc trace] No memory found, returning NULL.\n" ));
2601 return NULL;
2602 }
2603
2604 // When size==0 (i.e. unknown), detect here whether the object is large.
2605 // For size is known and < minLargeObjectSize, we still need to check
2606 // if the actual object is large, because large objects might be used
2607 // for aligned small allocations.
internalPoolFree(MemoryPool * memPool,void * object,size_t size)2608 static bool internalPoolFree(MemoryPool *memPool, void *object, size_t size)
2609 {
2610 if (!memPool || !object) return false;
2611
2612 // The library is initialized at allocation call, so releasing while
2613 // not initialized means foreign object is releasing.
2614 MALLOC_ASSERT(isMallocInitialized(), ASSERT_TEXT);
2615 MALLOC_ASSERT(memPool->extMemPool.userPool() || isRecognized(object),
2616 "Invalid pointer during object releasing is detected.");
2617
2618 if (size >= minLargeObjectSize || isLargeObject<ourMem>(object))
2619 memPool->putToLLOCache(memPool->getTLS(/*create=*/false), object);
2620 else
2621 freeSmallObject(object);
2622 return true;
2623 }
2624
internalMalloc(size_t size)2625 static void *internalMalloc(size_t size)
2626 {
2627 if (!size) size = sizeof(size_t);
2628
2629 #if MALLOC_CHECK_RECURSION
2630 if (RecursiveMallocCallProtector::sameThreadActive())
2631 return size<minLargeObjectSize? StartupBlock::allocate(size) :
2632 // nested allocation, so skip tls
2633 (FreeObject*)defaultMemPool->getFromLLOCache(NULL, size, slabSize);
2634 #endif
2635
2636 if (!isMallocInitialized())
2637 if (!doInitialization())
2638 return NULL;
2639 return internalPoolMalloc(defaultMemPool, size);
2640 }
2641
internalFree(void * object)2642 static void internalFree(void *object)
2643 {
2644 internalPoolFree(defaultMemPool, object, 0);
2645 }
2646
internalMsize(void * ptr)2647 static size_t internalMsize(void* ptr)
2648 {
2649 MALLOC_ASSERT(ptr, "Invalid pointer passed to internalMsize");
2650 if (isLargeObject<ourMem>(ptr)) {
2651 // TODO: return the maximum memory size, that can be written to this object
2652 LargeMemoryBlock* lmb = ((LargeObjectHdr*)ptr - 1)->memoryBlock;
2653 return lmb->objectSize;
2654 } else {
2655 Block *block = (Block*)alignDown(ptr, slabSize);
2656 return block->findObjectSize(ptr);
2657 }
2658 }
2659
2660 } // namespace internal
2661
2662 using namespace rml::internal;
2663
2664 // legacy entry point saved for compatibility with binaries complied
2665 // with pre-6003 versions of TBB
pool_create(intptr_t pool_id,const MemPoolPolicy * policy)2666 rml::MemoryPool *pool_create(intptr_t pool_id, const MemPoolPolicy *policy)
2667 {
2668 rml::MemoryPool *pool;
2669 MemPoolPolicy pol(policy->pAlloc, policy->pFree, policy->granularity);
2670
2671 pool_create_v1(pool_id, &pol, &pool);
2672 return pool;
2673 }
2674
pool_create_v1(intptr_t pool_id,const MemPoolPolicy * policy,rml::MemoryPool ** pool)2675 rml::MemPoolError pool_create_v1(intptr_t pool_id, const MemPoolPolicy *policy,
2676 rml::MemoryPool **pool)
2677 {
2678 if ( !policy->pAlloc || policy->version<MemPoolPolicy::TBBMALLOC_POOL_VERSION
2679 // empty pFree allowed only for fixed pools
2680 || !(policy->fixedPool || policy->pFree)) {
2681 *pool = NULL;
2682 return INVALID_POLICY;
2683 }
2684 if ( policy->version>MemPoolPolicy::TBBMALLOC_POOL_VERSION // future versions are not supported
2685 // new flags can be added in place of reserved, but default
2686 // behaviour must be supported by this version
2687 || policy->reserved ) {
2688 *pool = NULL;
2689 return UNSUPPORTED_POLICY;
2690 }
2691 if (!isMallocInitialized())
2692 if (!doInitialization()) {
2693 *pool = NULL;
2694 return NO_MEMORY;
2695 }
2696 rml::internal::MemoryPool *memPool =
2697 (rml::internal::MemoryPool*)internalMalloc((sizeof(rml::internal::MemoryPool)));
2698 if (!memPool) {
2699 *pool = NULL;
2700 return NO_MEMORY;
2701 }
2702 memset(memPool, 0, sizeof(rml::internal::MemoryPool));
2703 if (!memPool->init(pool_id, policy)) {
2704 internalFree(memPool);
2705 *pool = NULL;
2706 return NO_MEMORY;
2707 }
2708
2709 *pool = (rml::MemoryPool*)memPool;
2710 return POOL_OK;
2711 }
2712
pool_destroy(rml::MemoryPool * memPool)2713 bool pool_destroy(rml::MemoryPool* memPool)
2714 {
2715 if (!memPool) return false;
2716 bool ret = ((rml::internal::MemoryPool*)memPool)->destroy();
2717 internalFree(memPool);
2718
2719 return ret;
2720 }
2721
pool_reset(rml::MemoryPool * memPool)2722 bool pool_reset(rml::MemoryPool* memPool)
2723 {
2724 if (!memPool) return false;
2725
2726 return ((rml::internal::MemoryPool*)memPool)->reset();
2727 }
2728
pool_malloc(rml::MemoryPool * mPool,size_t size)2729 void *pool_malloc(rml::MemoryPool* mPool, size_t size)
2730 {
2731 return internalPoolMalloc((rml::internal::MemoryPool*)mPool, size);
2732 }
2733
pool_realloc(rml::MemoryPool * mPool,void * object,size_t size)2734 void *pool_realloc(rml::MemoryPool* mPool, void *object, size_t size)
2735 {
2736 if (!object)
2737 return internalPoolMalloc((rml::internal::MemoryPool*)mPool, size);
2738 if (!size) {
2739 internalPoolFree((rml::internal::MemoryPool*)mPool, object, 0);
2740 return NULL;
2741 }
2742 return reallocAligned((rml::internal::MemoryPool*)mPool, object, size, 0);
2743 }
2744
pool_aligned_malloc(rml::MemoryPool * mPool,size_t size,size_t alignment)2745 void *pool_aligned_malloc(rml::MemoryPool* mPool, size_t size, size_t alignment)
2746 {
2747 if (!isPowerOfTwo(alignment) || 0==size)
2748 return NULL;
2749
2750 return allocateAligned((rml::internal::MemoryPool*)mPool, size, alignment);
2751 }
2752
pool_aligned_realloc(rml::MemoryPool * memPool,void * ptr,size_t size,size_t alignment)2753 void *pool_aligned_realloc(rml::MemoryPool* memPool, void *ptr, size_t size, size_t alignment)
2754 {
2755 if (!isPowerOfTwo(alignment))
2756 return NULL;
2757 rml::internal::MemoryPool *mPool = (rml::internal::MemoryPool*)memPool;
2758 void *tmp;
2759
2760 if (!ptr)
2761 tmp = allocateAligned(mPool, size, alignment);
2762 else if (!size) {
2763 internalPoolFree(mPool, ptr, 0);
2764 return NULL;
2765 } else
2766 tmp = reallocAligned(mPool, ptr, size, alignment);
2767
2768 return tmp;
2769 }
2770
pool_free(rml::MemoryPool * mPool,void * object)2771 bool pool_free(rml::MemoryPool *mPool, void *object)
2772 {
2773 return internalPoolFree((rml::internal::MemoryPool*)mPool, object, 0);
2774 }
2775
pool_identify(void * object)2776 rml::MemoryPool *pool_identify(void *object)
2777 {
2778 rml::internal::MemoryPool *pool;
2779 if (isLargeObject<ourMem>(object)) {
2780 LargeObjectHdr *header = (LargeObjectHdr*)object - 1;
2781 pool = header->memoryBlock->pool;
2782 } else {
2783 Block *block = (Block*)alignDown(object, slabSize);
2784 pool = block->getMemPool();
2785 }
2786 // do not return defaultMemPool, as it can't be used in pool_free() etc
2787 __TBB_ASSERT_RELEASE(pool!=defaultMemPool,
2788 "rml::pool_identify() can't be used for scalable_malloc() etc results.");
2789 return (rml::MemoryPool*)pool;
2790 }
2791
pool_msize(rml::MemoryPool * mPool,void * object)2792 size_t pool_msize(rml::MemoryPool *mPool, void* object)
2793 {
2794 if (object) {
2795 // No assert for object recognition, cause objects allocated from non-default
2796 // memory pool do not participate in range checking and do not have valid backreferences for
2797 // small objects. Instead, check that an object belong to the certain memory pool.
2798 MALLOC_ASSERT_EX(mPool == pool_identify(object), "Object does not belong to the specified pool");
2799 return internalMsize(object);
2800 }
2801 errno = EINVAL;
2802 // Unlike _msize, return 0 in case of parameter error.
2803 // Returning size_t(-1) looks more like the way to troubles.
2804 return 0;
2805 }
2806
2807 } // namespace rml
2808
2809 using namespace rml::internal;
2810
2811 #if MALLOC_TRACE
2812 static unsigned int threadGoingDownCount = 0;
2813 #endif
2814
2815 /*
2816 * When a thread is shutting down this routine should be called to remove all the thread ids
2817 * from the malloc blocks and replace them with a NULL thread id.
2818 *
2819 * For pthreads, the function is set as a callback in pthread_key_create for TLS bin.
2820 * It will be automatically called at thread exit with the key value as the argument,
2821 * unless that value is NULL.
2822 * For Windows, it is called from DllMain( DLL_THREAD_DETACH ).
2823 *
2824 * However neither of the above is called for the main process thread, so the routine
2825 * also needs to be called during the process shutdown.
2826 *
2827 */
2828 // TODO: Consider making this function part of class MemoryPool.
doThreadShutdownNotification(TLSData * tls,bool main_thread)2829 void doThreadShutdownNotification(TLSData* tls, bool main_thread)
2830 {
2831 TRACEF(( "[ScalableMalloc trace] Thread id %d blocks return start %d\n",
2832 getThreadId(), threadGoingDownCount++ ));
2833
2834 #if USE_PTHREAD
2835 if (tls) {
2836 if (!shutdownSync.threadDtorStart()) return;
2837 tls->getMemPool()->onThreadShutdown(tls);
2838 shutdownSync.threadDtorDone();
2839 } else
2840 #endif
2841 {
2842 suppress_unused_warning(tls); // not used on Windows
2843 // The default pool is safe to use at this point:
2844 // on Linux, only the main thread can go here before destroying defaultMemPool;
2845 // on Windows, shutdown is synchronized via loader lock and isMallocInitialized().
2846 // See also __TBB_mallocProcessShutdownNotification()
2847 defaultMemPool->onThreadShutdown(defaultMemPool->getTLS(/*create=*/false));
2848 // Take lock to walk through other pools; but waiting might be dangerous at this point
2849 // (e.g. on Windows the main thread might deadlock)
2850 bool locked;
2851 MallocMutex::scoped_lock lock(MemoryPool::memPoolListLock, /*wait=*/!main_thread, &locked);
2852 if (locked) { // the list is safe to process
2853 for (MemoryPool *memPool = defaultMemPool->next; memPool; memPool = memPool->next)
2854 memPool->onThreadShutdown(memPool->getTLS(/*create=*/false));
2855 }
2856 }
2857
2858 TRACEF(( "[ScalableMalloc trace] Thread id %d blocks return end\n", getThreadId() ));
2859 }
2860
2861 #if USE_PTHREAD
mallocThreadShutdownNotification(void * arg)2862 void mallocThreadShutdownNotification(void* arg)
2863 {
2864 // The routine is called for each pool (as TLS dtor) on each thread, except for the main thread
2865 if (!isMallocInitialized()) return;
2866 doThreadShutdownNotification((TLSData*)arg, false);
2867 }
2868 #else
__TBB_mallocThreadShutdownNotification()2869 extern "C" void __TBB_mallocThreadShutdownNotification()
2870 {
2871 // The routine is called once per thread on Windows
2872 if (!isMallocInitialized()) return;
2873 doThreadShutdownNotification(NULL, false);
2874 }
2875 #endif
2876
__TBB_mallocProcessShutdownNotification(bool windows_process_dying)2877 extern "C" void __TBB_mallocProcessShutdownNotification(bool windows_process_dying)
2878 {
2879 if (!isMallocInitialized()) return;
2880
2881 // Don't clean allocator internals if the entire process is exiting
2882 if (!windows_process_dying) {
2883 doThreadShutdownNotification(NULL, /*main_thread=*/true);
2884 }
2885 #if __TBB_MALLOC_LOCACHE_STAT
2886 printf("cache hit ratio %f, size hit %f\n",
2887 1.*cacheHits/mallocCalls, 1.*memHitKB/memAllocKB);
2888 defaultMemPool->extMemPool.loc.reportStat(stdout);
2889 #endif
2890
2891 shutdownSync.processExit();
2892 #if __TBB_SOURCE_DIRECTLY_INCLUDED
2893 /* Pthread keys must be deleted as soon as possible to not call key dtor
2894 on thread termination when then the tbbmalloc code can be already unloaded.
2895 */
2896 defaultMemPool->destroy();
2897 destroyBackRefMaster(&defaultMemPool->extMemPool.backend);
2898 ThreadId::destroy(); // Delete key for thread id
2899 hugePages.reset();
2900 // new total malloc initialization is possible after this point
2901 FencedStore(mallocInitialized, 0);
2902 #elif __TBB_USE_DLOPEN_REENTRANCY_WORKAROUND
2903 /* In most cases we prevent unloading tbbmalloc, and don't clean up memory
2904 on process shutdown. When impossible to prevent, library unload results
2905 in shutdown notification, and it makes sense to release unused memory
2906 at that point (we can't release all memory because it's possible that
2907 it will be accessed after this point).
2908 TODO: better support systems where we can't prevent unloading by removing
2909 pthread destructors and releasing caches.
2910 */
2911 defaultMemPool->extMemPool.hardCachesCleanup();
2912 #endif // __TBB_SOURCE_DIRECTLY_INCLUDED
2913
2914 #if COLLECT_STATISTICS
2915 unsigned nThreads = ThreadId::getMaxThreadId();
2916 for( int i=1; i<=nThreads && i<MAX_THREADS; ++i )
2917 STAT_print(i);
2918 #endif
2919 if (!usedBySrcIncluded)
2920 MALLOC_ITT_FINI_ITTLIB();
2921 }
2922
scalable_malloc(size_t size)2923 extern "C" void * scalable_malloc(size_t size)
2924 {
2925 void *ptr = internalMalloc(size);
2926 if (!ptr) errno = ENOMEM;
2927 return ptr;
2928 }
2929
scalable_free(void * object)2930 extern "C" void scalable_free(void *object)
2931 {
2932 internalFree(object);
2933 }
2934
2935 #if MALLOC_ZONE_OVERLOAD_ENABLED
__TBB_malloc_free_definite_size(void * object,size_t size)2936 extern "C" void __TBB_malloc_free_definite_size(void *object, size_t size)
2937 {
2938 internalPoolFree(defaultMemPool, object, size);
2939 }
2940 #endif
2941
2942 /*
2943 * A variant that provides additional memory safety, by checking whether the given address
2944 * was obtained with this allocator, and if not redirecting to the provided alternative call.
2945 */
__TBB_malloc_safer_free(void * object,void (* original_free)(void *))2946 extern "C" void __TBB_malloc_safer_free(void *object, void (*original_free)(void*))
2947 {
2948 if (!object)
2949 return;
2950
2951 // tbbmalloc can allocate object only when tbbmalloc has been initialized
2952 if (FencedLoad(mallocInitialized) && defaultMemPool->extMemPool.backend.ptrCanBeValid(object)) {
2953 if (isLargeObject<unknownMem>(object)) {
2954 // must check 1st for large object, because small object check touches 4 pages on left,
2955 // and it can be inaccessible
2956 TLSData *tls = defaultMemPool->getTLS(/*create=*/false);
2957
2958 defaultMemPool->putToLLOCache(tls, object);
2959 return;
2960 } else if (isSmallObject(object)) {
2961 freeSmallObject(object);
2962 return;
2963 }
2964 }
2965 if (original_free)
2966 original_free(object);
2967 }
2968
2969 /********* End the free code *************/
2970
2971 /********* Code for scalable_realloc ***********/
2972
2973 /*
2974 * From K&R
2975 * "realloc changes the size of the object pointed to by p to size. The contents will
2976 * be unchanged up to the minimum of the old and the new sizes. If the new size is larger,
2977 * the new space is uninitialized. realloc returns a pointer to the new space, or
2978 * NULL if the request cannot be satisfied, in which case *p is unchanged."
2979 *
2980 */
scalable_realloc(void * ptr,size_t size)2981 extern "C" void* scalable_realloc(void* ptr, size_t size)
2982 {
2983 void *tmp;
2984
2985 if (!ptr)
2986 tmp = internalMalloc(size);
2987 else if (!size) {
2988 internalFree(ptr);
2989 return NULL;
2990 } else
2991 tmp = reallocAligned(defaultMemPool, ptr, size, 0);
2992
2993 if (!tmp) errno = ENOMEM;
2994 return tmp;
2995 }
2996
2997 /*
2998 * A variant that provides additional memory safety, by checking whether the given address
2999 * was obtained with this allocator, and if not redirecting to the provided alternative call.
3000 */
__TBB_malloc_safer_realloc(void * ptr,size_t sz,void * original_realloc)3001 extern "C" void* __TBB_malloc_safer_realloc(void* ptr, size_t sz, void* original_realloc)
3002 {
3003 void *tmp; // TODO: fix warnings about uninitialized use of tmp
3004
3005 if (!ptr) {
3006 tmp = internalMalloc(sz);
3007 } else if (FencedLoad(mallocInitialized) && isRecognized(ptr)) {
3008 if (!sz) {
3009 internalFree(ptr);
3010 return NULL;
3011 } else {
3012 tmp = reallocAligned(defaultMemPool, ptr, sz, 0);
3013 }
3014 }
3015 #if USE_WINTHREAD
3016 else if (original_realloc && sz) {
3017 orig_ptrs *original_ptrs = static_cast<orig_ptrs*>(original_realloc);
3018 if ( original_ptrs->msize ){
3019 size_t oldSize = original_ptrs->msize(ptr);
3020 tmp = internalMalloc(sz);
3021 if (tmp) {
3022 memcpy(tmp, ptr, sz<oldSize? sz : oldSize);
3023 if ( original_ptrs->free ){
3024 original_ptrs->free( ptr );
3025 }
3026 }
3027 } else
3028 tmp = NULL;
3029 }
3030 #else
3031 else if (original_realloc) {
3032 typedef void* (*realloc_ptr_t)(void*,size_t);
3033 realloc_ptr_t original_realloc_ptr;
3034 (void *&)original_realloc_ptr = original_realloc;
3035 tmp = original_realloc_ptr(ptr,sz);
3036 }
3037 #endif
3038 else tmp = NULL;
3039
3040 if (!tmp) errno = ENOMEM;
3041 return tmp;
3042 }
3043
3044 /********* End code for scalable_realloc ***********/
3045
3046 /********* Code for scalable_calloc ***********/
3047
3048 /*
3049 * From K&R
3050 * calloc returns a pointer to space for an array of nobj objects,
3051 * each of size size, or NULL if the request cannot be satisfied.
3052 * The space is initialized to zero bytes.
3053 *
3054 */
3055
scalable_calloc(size_t nobj,size_t size)3056 extern "C" void * scalable_calloc(size_t nobj, size_t size)
3057 {
3058 // it's square root of maximal size_t value
3059 const size_t mult_not_overflow = size_t(1) << (sizeof(size_t)*CHAR_BIT/2);
3060 const size_t arraySize = nobj * size;
3061
3062 // check for overflow during multiplication:
3063 if (nobj>=mult_not_overflow || size>=mult_not_overflow) // 1) heuristic check
3064 if (nobj && arraySize / nobj != size) { // 2) exact check
3065 errno = ENOMEM;
3066 return NULL;
3067 }
3068 void* result = internalMalloc(arraySize);
3069 if (result)
3070 memset(result, 0, arraySize);
3071 else
3072 errno = ENOMEM;
3073 return result;
3074 }
3075
3076 /********* End code for scalable_calloc ***********/
3077
3078 /********* Code for aligned allocation API **********/
3079
scalable_posix_memalign(void ** memptr,size_t alignment,size_t size)3080 extern "C" int scalable_posix_memalign(void **memptr, size_t alignment, size_t size)
3081 {
3082 if ( !isPowerOfTwoAtLeast(alignment, sizeof(void*)) )
3083 return EINVAL;
3084 void *result = allocateAligned(defaultMemPool, size, alignment);
3085 if (!result)
3086 return ENOMEM;
3087 *memptr = result;
3088 return 0;
3089 }
3090
scalable_aligned_malloc(size_t size,size_t alignment)3091 extern "C" void * scalable_aligned_malloc(size_t size, size_t alignment)
3092 {
3093 if (!isPowerOfTwo(alignment) || 0==size) {
3094 errno = EINVAL;
3095 return NULL;
3096 }
3097 void *tmp = allocateAligned(defaultMemPool, size, alignment);
3098 if (!tmp) errno = ENOMEM;
3099 return tmp;
3100 }
3101
scalable_aligned_realloc(void * ptr,size_t size,size_t alignment)3102 extern "C" void * scalable_aligned_realloc(void *ptr, size_t size, size_t alignment)
3103 {
3104 if (!isPowerOfTwo(alignment)) {
3105 errno = EINVAL;
3106 return NULL;
3107 }
3108 void *tmp;
3109
3110 if (!ptr)
3111 tmp = allocateAligned(defaultMemPool, size, alignment);
3112 else if (!size) {
3113 scalable_free(ptr);
3114 return NULL;
3115 } else
3116 tmp = reallocAligned(defaultMemPool, ptr, size, alignment);
3117
3118 if (!tmp) errno = ENOMEM;
3119 return tmp;
3120 }
3121
__TBB_malloc_safer_aligned_realloc(void * ptr,size_t size,size_t alignment,void * orig_function)3122 extern "C" void * __TBB_malloc_safer_aligned_realloc(void *ptr, size_t size, size_t alignment, void* orig_function)
3123 {
3124 /* corner cases left out of reallocAligned to not deal with errno there */
3125 if (!isPowerOfTwo(alignment)) {
3126 errno = EINVAL;
3127 return NULL;
3128 }
3129 void *tmp = NULL;
3130
3131 if (!ptr) {
3132 tmp = allocateAligned(defaultMemPool, size, alignment);
3133 } else if (FencedLoad(mallocInitialized) && isRecognized(ptr)) {
3134 if (!size) {
3135 internalFree(ptr);
3136 return NULL;
3137 } else {
3138 tmp = reallocAligned(defaultMemPool, ptr, size, alignment);
3139 }
3140 }
3141 #if USE_WINTHREAD
3142 else {
3143 orig_aligned_ptrs *original_ptrs = static_cast<orig_aligned_ptrs*>(orig_function);
3144 if (size) {
3145 // Without orig_msize, we can't do anything with this.
3146 // Just keeping old pointer.
3147 if ( original_ptrs->aligned_msize ){
3148 // set alignment and offset to have possibly correct oldSize
3149 size_t oldSize = original_ptrs->aligned_msize(ptr, sizeof(void*), 0);
3150 tmp = allocateAligned(defaultMemPool, size, alignment);
3151 if (tmp) {
3152 memcpy(tmp, ptr, size<oldSize? size : oldSize);
3153 if ( original_ptrs->aligned_free ){
3154 original_ptrs->aligned_free( ptr );
3155 }
3156 }
3157 }
3158 } else {
3159 if ( original_ptrs->aligned_free ){
3160 original_ptrs->aligned_free( ptr );
3161 }
3162 return NULL;
3163 }
3164 }
3165 #else
3166 // As original_realloc can't align result, and there is no way to find
3167 // size of reallocating object, we are giving up.
3168 suppress_unused_warning(orig_function);
3169 #endif
3170 if (!tmp) errno = ENOMEM;
3171 return tmp;
3172 }
3173
scalable_aligned_free(void * ptr)3174 extern "C" void scalable_aligned_free(void *ptr)
3175 {
3176 internalFree(ptr);
3177 }
3178
3179 /********* end code for aligned allocation API **********/
3180
3181 /********* Code for scalable_msize ***********/
3182
3183 /*
3184 * Returns the size of a memory block allocated in the heap.
3185 */
scalable_msize(void * ptr)3186 extern "C" size_t scalable_msize(void* ptr)
3187 {
3188 if (ptr) {
3189 MALLOC_ASSERT(isRecognized(ptr), "Invalid pointer in scalable_msize detected.");
3190 return internalMsize(ptr);
3191 }
3192 errno = EINVAL;
3193 // Unlike _msize, return 0 in case of parameter error.
3194 // Returning size_t(-1) looks more like the way to troubles.
3195 return 0;
3196 }
3197
3198 /*
3199 * A variant that provides additional memory safety, by checking whether the given address
3200 * was obtained with this allocator, and if not redirecting to the provided alternative call.
3201 */
__TBB_malloc_safer_msize(void * object,size_t (* original_msize)(void *))3202 extern "C" size_t __TBB_malloc_safer_msize(void *object, size_t (*original_msize)(void*))
3203 {
3204 if (object) {
3205 // Check if the memory was allocated by scalable_malloc
3206 if (FencedLoad(mallocInitialized) && isRecognized(object))
3207 return internalMsize(object);
3208 else if (original_msize)
3209 return original_msize(object);
3210 }
3211 // object is NULL or unknown, or foreign and no original_msize
3212 #if USE_WINTHREAD
3213 errno = EINVAL; // errno expected to be set only on this platform
3214 #endif
3215 return 0;
3216 }
3217
3218 /*
3219 * The same as above but for _aligned_msize case
3220 */
__TBB_malloc_safer_aligned_msize(void * object,size_t alignment,size_t offset,size_t (* orig_aligned_msize)(void *,size_t,size_t))3221 extern "C" size_t __TBB_malloc_safer_aligned_msize(void *object, size_t alignment, size_t offset, size_t (*orig_aligned_msize)(void*,size_t,size_t))
3222 {
3223 if (object) {
3224 // Check if the memory was allocated by scalable_malloc
3225 if (FencedLoad(mallocInitialized) && isRecognized(object))
3226 return internalMsize(object);
3227 else if (orig_aligned_msize)
3228 return orig_aligned_msize(object,alignment,offset);
3229 }
3230 // object is NULL or unknown
3231 errno = EINVAL;
3232 return 0;
3233 }
3234
3235 /********* End code for scalable_msize ***********/
3236
scalable_allocation_mode(int param,intptr_t value)3237 extern "C" int scalable_allocation_mode(int param, intptr_t value)
3238 {
3239 if (param == TBBMALLOC_SET_SOFT_HEAP_LIMIT) {
3240 defaultMemPool->extMemPool.backend.setRecommendedMaxSize((size_t)value);
3241 return TBBMALLOC_OK;
3242 } else if (param == USE_HUGE_PAGES) {
3243 #if __linux__
3244 switch (value) {
3245 case 0:
3246 case 1:
3247 hugePages.setMode(value);
3248 return TBBMALLOC_OK;
3249 default:
3250 return TBBMALLOC_INVALID_PARAM;
3251 }
3252 #else
3253 return TBBMALLOC_NO_EFFECT;
3254 #endif
3255 #if __TBB_SOURCE_DIRECTLY_INCLUDED
3256 } else if (param == TBBMALLOC_INTERNAL_SOURCE_INCLUDED) {
3257 switch (value) {
3258 case 0: // used by dynamic library
3259 case 1: // used by static library or directly included sources
3260 usedBySrcIncluded = value;
3261 return TBBMALLOC_OK;
3262 default:
3263 return TBBMALLOC_INVALID_PARAM;
3264 }
3265 #endif
3266 } else if (param == TBBMALLOC_SET_HUGE_SIZE_THRESHOLD) {
3267 defaultMemPool->extMemPool.loc.setHugeSizeThreshold((size_t)value);
3268 return TBBMALLOC_OK;
3269 }
3270 return TBBMALLOC_INVALID_PARAM;
3271 }
3272
scalable_allocation_command(int cmd,void * param)3273 extern "C" int scalable_allocation_command(int cmd, void *param)
3274 {
3275 if (param)
3276 return TBBMALLOC_INVALID_PARAM;
3277
3278 bool released = false;
3279 switch(cmd) {
3280 case TBBMALLOC_CLEAN_THREAD_BUFFERS:
3281 if (TLSData *tls = defaultMemPool->getTLS(/*create=*/false))
3282 released = tls->externalCleanup(/*cleanOnlyUnused*/false, /*cleanBins=*/true);
3283 break;
3284 case TBBMALLOC_CLEAN_ALL_BUFFERS:
3285 released = defaultMemPool->extMemPool.hardCachesCleanup();
3286 break;
3287 default:
3288 return TBBMALLOC_INVALID_PARAM;
3289 }
3290 return released ? TBBMALLOC_OK : TBBMALLOC_NO_EFFECT;
3291 }
3292