1 /*
2     Copyright (c) 2005-2021 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #ifndef __TBB_tbbmalloc_internal_H
18 #define __TBB_tbbmalloc_internal_H
19 
20 #include "TypeDefinitions.h" /* Also includes customization layer Customize.h */
21 
22 #if USE_PTHREAD
23     // Some pthreads documentation says that <pthreads.h> must be first header.
24     #include <pthread.h>
25     typedef pthread_key_t tls_key_t;
26 #elif USE_WINTHREAD
27     #include <windows.h>
28     typedef DWORD tls_key_t;
29 #else
30     #error Must define USE_PTHREAD or USE_WINTHREAD
31 #endif
32 
33 #include <atomic>
34 
35 // TODO: *BSD also has it
36 #define BACKEND_HAS_MREMAP __linux__
37 #define CHECK_ALLOCATION_RANGE MALLOC_DEBUG || MALLOC_ZONE_OVERLOAD_ENABLED || MALLOC_UNIXLIKE_OVERLOAD_ENABLED
38 
39 #include "oneapi/tbb/detail/_config.h" // for __TBB_LIBSTDCPP_EXCEPTION_HEADERS_BROKEN
40 #include "oneapi/tbb/detail/_template_helpers.h"
41 #if __TBB_LIBSTDCPP_EXCEPTION_HEADERS_BROKEN
42   #define _EXCEPTION_PTR_H /* prevents exception_ptr.h inclusion */
43   #define _GLIBCXX_NESTED_EXCEPTION_H /* prevents nested_exception.h inclusion */
44 #endif
45 
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <limits.h> // for CHAR_BIT
49 #include <string.h> // for memset
50 #if MALLOC_CHECK_RECURSION
51 #include <new>        /* for placement new */
52 #endif
53 #include "oneapi/tbb/scalable_allocator.h"
54 #include "tbbmalloc_internal_api.h"
55 
56 /********* Various compile-time options        **************/
57 
58 #if !__TBB_DEFINE_MIC && __TBB_MIC_NATIVE
59  #error Intel(R) Many Integrated Core Compiler does not define __MIC__ anymore.
60 #endif
61 
62 #define MALLOC_TRACE 0
63 
64 #if MALLOC_TRACE
65 #define TRACEF(x) printf x
66 #else
67 #define TRACEF(x) ((void)0)
68 #endif /* MALLOC_TRACE */
69 
70 #define ASSERT_TEXT NULL
71 
72 #define COLLECT_STATISTICS ( MALLOC_DEBUG && MALLOCENV_COLLECT_STATISTICS )
73 #ifndef USE_INTERNAL_TID
74 #define USE_INTERNAL_TID COLLECT_STATISTICS || MALLOC_TRACE
75 #endif
76 
77 #include "Statistics.h"
78 
79 // call yield for whitebox testing, skip in real library
80 #ifndef WhiteboxTestingYield
81 #define WhiteboxTestingYield() ((void)0)
82 #endif
83 
84 
85 /********* End compile-time options        **************/
86 
87 namespace rml {
88 
89 namespace internal {
90 
91 #if __TBB_MALLOC_LOCACHE_STAT
92 extern intptr_t mallocCalls, cacheHits;
93 extern intptr_t memAllocKB, memHitKB;
94 #endif
95 
96 //! Utility template function to prevent "unused" warnings by various compilers.
97 template<typename T>
suppress_unused_warning(const T &)98 void suppress_unused_warning( const T& ) {}
99 
100 /********** Various global default constants ********/
101 
102 /*
103  * Default huge page size
104  */
105 static const size_t HUGE_PAGE_SIZE = 2 * 1024 * 1024;
106 
107 /********** End of global default constatns *********/
108 
109 /********** Various numeric parameters controlling allocations ********/
110 
111 /*
112  * slabSize - the size of a block for allocation of small objects,
113  * it must be larger than maxSegregatedObjectSize.
114  */
115 const uintptr_t slabSize = 16*1024;
116 
117 /*
118  * Large blocks cache cleanup frequency.
119  * It should be power of 2 for the fast checking.
120  */
121 const unsigned cacheCleanupFreq = 256;
122 
123 /*
124  * Alignment of large (>= minLargeObjectSize) objects.
125  */
126 const size_t largeObjectAlignment = estimatedCacheLineSize;
127 
128 /*
129  * This number of bins in the TLS that leads to blocks that we can allocate in.
130  */
131 const uint32_t numBlockBinLimit = 31;
132 
133 /********** End of numeric parameters controlling allocations *********/
134 
135 class BlockI;
136 class Block;
137 struct LargeMemoryBlock;
138 struct ExtMemoryPool;
139 struct MemRegion;
140 class FreeBlock;
141 class TLSData;
142 class Backend;
143 class MemoryPool;
144 struct CacheBinOperation;
145 extern const uint32_t minLargeObjectSize;
146 
147 enum DecreaseOrIncrease {
148     decrease, increase
149 };
150 
151 class TLSKey {
152     tls_key_t TLS_pointer_key;
153 public:
154     bool init();
155     bool destroy();
156     TLSData* getThreadMallocTLS() const;
157     void setThreadMallocTLS( TLSData * newvalue );
158     TLSData* createTLS(MemoryPool *memPool, Backend *backend);
159 };
160 
161 template<typename Arg, typename Compare>
AtomicUpdate(std::atomic<Arg> & location,Arg newVal,const Compare & cmp)162 inline void AtomicUpdate(std::atomic<Arg>& location, Arg newVal, const Compare &cmp)
163 {
164     static_assert(sizeof(Arg) == sizeof(intptr_t), "Type of argument must match AtomicCompareExchange type.");
165     Arg old = location.load(std::memory_order_acquire);
166     for (; cmp(old, newVal); ) {
167         if (location.compare_exchange_strong(old, newVal))
168             break;
169         // TODO: do we need backoff after unsuccessful CAS?
170         //old = val;
171     }
172 }
173 
174 // TODO: make BitMaskBasic more general
175 // TODO: check that BitMaskBasic is not used for synchronization
176 // (currently, it fits BitMaskMin well, but not as suitable for BitMaskMax)
177 template<unsigned NUM>
178 class BitMaskBasic {
179     static const unsigned SZ = (NUM-1)/(CHAR_BIT*sizeof(uintptr_t))+1;
180     static const unsigned WORD_LEN = CHAR_BIT*sizeof(uintptr_t);
181 
182     std::atomic<uintptr_t> mask[SZ];
183 
184 protected:
set(size_t idx,bool val)185     void set(size_t idx, bool val) {
186         MALLOC_ASSERT(idx<NUM, ASSERT_TEXT);
187 
188         size_t i = idx / WORD_LEN;
189         int pos = WORD_LEN - idx % WORD_LEN - 1;
190         if (val) {
191             mask[i].fetch_or(1ULL << pos);
192         } else {
193             mask[i].fetch_and(~(1ULL << pos));
194         }
195     }
getMinTrue(unsigned startIdx)196     int getMinTrue(unsigned startIdx) const {
197         unsigned idx = startIdx / WORD_LEN;
198         int pos;
199 
200         if (startIdx % WORD_LEN) {
201             // only interested in part of a word, clear bits before startIdx
202             pos = WORD_LEN - startIdx % WORD_LEN;
203             uintptr_t actualMask = mask[idx].load(std::memory_order_relaxed) & (((uintptr_t)1<<pos) - 1);
204             idx++;
205             if (-1 != (pos = BitScanRev(actualMask)))
206                 return idx*WORD_LEN - pos - 1;
207         }
208 
209         while (idx<SZ)
210             if (-1 != (pos = BitScanRev(mask[idx++].load(std::memory_order_relaxed))))
211                 return idx*WORD_LEN - pos - 1;
212         return -1;
213     }
214 public:
reset()215     void reset() { for (unsigned i=0; i<SZ; i++) mask[i].store(0, std::memory_order_relaxed); }
216 };
217 
218 template<unsigned NUM>
219 class BitMaskMin : public BitMaskBasic<NUM> {
220 public:
set(size_t idx,bool val)221     void set(size_t idx, bool val) { BitMaskBasic<NUM>::set(idx, val); }
getMinTrue(unsigned startIdx)222     int getMinTrue(unsigned startIdx) const {
223         return BitMaskBasic<NUM>::getMinTrue(startIdx);
224     }
225 };
226 
227 template<unsigned NUM>
228 class BitMaskMax : public BitMaskBasic<NUM> {
229 public:
set(size_t idx,bool val)230     void set(size_t idx, bool val) {
231         BitMaskBasic<NUM>::set(NUM - 1 - idx, val);
232     }
getMaxTrue(unsigned startIdx)233     int getMaxTrue(unsigned startIdx) const {
234         int p = BitMaskBasic<NUM>::getMinTrue(NUM-startIdx-1);
235         return -1==p? -1 : (int)NUM - 1 - p;
236     }
237 };
238 
239 
240 // The part of thread-specific data that can be modified by other threads.
241 // Such modifications must be protected by AllLocalCaches::listLock.
242 struct TLSRemote {
243     TLSRemote *next,
244               *prev;
245 };
246 
247 // The list of all thread-local data; supporting cleanup of thread caches
248 class AllLocalCaches {
249     TLSRemote  *head;
250     MallocMutex listLock; // protects operations in the list
251 public:
252     void registerThread(TLSRemote *tls);
253     void unregisterThread(TLSRemote *tls);
254     bool cleanup(bool cleanOnlyUnused);
255     void markUnused();
reset()256     void reset() { head = NULL; }
257 };
258 
259 class LifoList {
260 public:
261     inline LifoList();
262     inline void push(Block *block);
263     inline Block *pop();
264     inline Block *grab();
265 
266 private:
267     std::atomic<Block*> top;
268     MallocMutex lock;
269 };
270 
271 /*
272  * When a block that is not completely free is returned for reuse by other threads
273  * this is where the block goes.
274  *
275  * LifoList assumes zero initialization; so below its constructors are omitted,
276  * to avoid linking with C++ libraries on Linux.
277  */
278 
279 class OrphanedBlocks {
280     LifoList bins[numBlockBinLimit];
281 public:
282     Block *get(TLSData *tls, unsigned int size);
283     void put(intptr_t binTag, Block *block);
284     void reset();
285     bool cleanup(Backend* backend);
286 };
287 
288 /* Large objects entities */
289 #include "large_objects.h"
290 
291 // select index size for BackRefMaster based on word size: default is uint32_t,
292 // uint16_t for 32-bit platforms
293 template<bool>
294 struct MasterIndexSelect {
295     typedef uint32_t master_type;
296 };
297 
298 template<>
299 struct MasterIndexSelect<false> {
300     typedef uint16_t master_type;
301 };
302 
303 class BackRefIdx { // composite index to backreference array
304 public:
305     typedef MasterIndexSelect<4 < sizeof(uintptr_t)>::master_type master_t;
306 private:
307     static const master_t invalid = ~master_t(0);
308     master_t master;      // index in BackRefMaster
309     uint16_t largeObj:1;  // is this object "large"?
310     uint16_t offset  :15; // offset from beginning of BackRefBlock
311 public:
312     BackRefIdx() : master(invalid), largeObj(0), offset(0) {}
313     bool isInvalid() const { return master == invalid; }
314     bool isLargeObject() const { return largeObj; }
315     master_t getMaster() const { return master; }
316     uint16_t getOffset() const { return offset; }
317 
318 #if __TBB_USE_THREAD_SANITIZER
319     friend
320     __attribute__((no_sanitize("thread")))
321      BackRefIdx dereference(const BackRefIdx* ptr) {
322         BackRefIdx idx;
323         idx.master = ptr->master;
324         idx.largeObj = ptr->largeObj;
325         idx.offset = ptr->offset;
326         return idx;
327     }
328 #else
329     friend
330     BackRefIdx dereference(const BackRefIdx* ptr) {
331         return *ptr;
332     }
333 #endif
334 
335     // only newBackRef can modify BackRefIdx
336     static BackRefIdx newBackRef(bool largeObj);
337 };
338 
339 // Block header is used during block coalescing
340 // and must be preserved in used blocks.
341 class BlockI {
342 #if __clang__
343     #pragma clang diagnostic push
344     #pragma clang diagnostic ignored "-Wunused-private-field"
345 #endif
346     intptr_t     blockState[2];
347 #if __clang__
348     #pragma clang diagnostic pop // "-Wunused-private-field"
349 #endif
350 };
351 
352 struct LargeMemoryBlock : public BlockI {
353     MemoryPool       *pool;          // owner pool
354     LargeMemoryBlock *next,          // ptrs in list of cached blocks
355                      *prev,
356     // 2-linked list of pool's large objects
357     // Used to destroy backrefs on pool destroy (backrefs are global)
358     // and for object releasing during pool reset.
359                      *gPrev,
360                      *gNext;
361     uintptr_t         age;           // age of block while in cache
362     size_t            objectSize;    // the size requested by a client
363     size_t            unalignedSize; // the size requested from backend
364     BackRefIdx        backRefIdx;    // cached here, used copy is in LargeObjectHdr
365 };
366 
367 // Classes and methods for backend.cpp
368 #include "backend.h"
369 
370 // An TBB allocator mode that can be controlled by user
371 // via API/environment variable. Must be placed in zero-initialized memory.
372 // External synchronization assumed.
373 // TODO: TBB_VERSION support
374 class AllocControlledMode {
375     intptr_t val;
376     bool     setDone;
377 
378 public:
379     intptr_t get() const {
380         MALLOC_ASSERT(setDone, ASSERT_TEXT);
381         return val;
382     }
383 
384     // Note: set() can be called before init()
385     void set(intptr_t newVal) {
386         val = newVal;
387         setDone = true;
388     }
389 
390     bool ready() const {
391         return setDone;
392     }
393 
394     // envName - environment variable to get controlled mode
395     void initReadEnv(const char *envName, intptr_t defaultVal) {
396         if (!setDone) {
397             // unreferenced formal parameter warning
398             tbb::detail::suppress_unused_warning(envName);
399 #if !__TBB_WIN8UI_SUPPORT
400         // TODO: use strtol to get the actual value of the envirable
401             const char *envVal = getenv(envName);
402             if (envVal && !strcmp(envVal, "1"))
403                 val = 1;
404             else
405 #endif
406                 val = defaultVal;
407             setDone = true;
408         }
409     }
410 };
411 
412 // Page type to be used inside MapMemory.
413 // Regular (4KB aligned), Huge and Transparent Huge Pages (2MB aligned).
414 enum PageType {
415     REGULAR = 0,
416     PREALLOCATED_HUGE_PAGE,
417     TRANSPARENT_HUGE_PAGE
418 };
419 
420 // init() and printStatus() is called only under global initialization lock.
421 // Race is possible between registerAllocation() and registerReleasing(),
422 // harm is that up to single huge page releasing is missed (because failure
423 // to get huge page is registered only 1st time), that is negligible.
424 // setMode is also can be called concurrently.
425 // Object must reside in zero-initialized memory
426 // TODO: can we check for huge page presence during every 10th mmap() call
427 // in case huge page is released by another process?
428 class HugePagesStatus {
429 private:
430     AllocControlledMode requestedMode; // changed only by user
431                                        // to keep enabled and requestedMode consistent
432     MallocMutex setModeLock;
433     size_t      pageSize;
434     std::atomic<intptr_t> needActualStatusPrint;
435 
436     static void doPrintStatus(bool state, const char *stateName) {
437         // Under macOS* fprintf/snprintf acquires an internal lock, so when
438         // 1st allocation is done under the lock, we got a deadlock.
439         // Do not use fprintf etc during initialization.
440         fputs("TBBmalloc: huge pages\t", stderr);
441         if (!state)
442             fputs("not ", stderr);
443         fputs(stateName, stderr);
444         fputs("\n", stderr);
445     }
446 
447     void parseSystemMemInfo() {
448         bool hpAvailable  = false;
449         bool thpAvailable = false;
450         unsigned long long hugePageSize = 0;
451 
452 #if __unix__
453         // Check huge pages existence
454         unsigned long long meminfoHugePagesTotal = 0;
455 
456         parseFileItem meminfoItems[] = {
457             // Parse system huge page size
458             { "Hugepagesize: %llu kB", hugePageSize },
459             // Check if there are preallocated huge pages on the system
460             // https://www.kernel.org/doc/Documentation/vm/hugetlbpage.txt
461             { "HugePages_Total: %llu", meminfoHugePagesTotal } };
462 
463         parseFile</*BUFF_SIZE=*/100>("/proc/meminfo", meminfoItems);
464 
465         // Double check another system information regarding preallocated
466         // huge pages if there are no information in /proc/meminfo
467         unsigned long long vmHugePagesTotal = 0;
468 
469         parseFileItem vmItem[] = { { "%llu", vmHugePagesTotal } };
470 
471         // We parse a counter number, it can't be huge
472         parseFile</*BUFF_SIZE=*/100>("/proc/sys/vm/nr_hugepages", vmItem);
473 
474         if (meminfoHugePagesTotal > 0 || vmHugePagesTotal > 0) {
475             MALLOC_ASSERT(hugePageSize != 0, "Huge Page size can't be zero if we found preallocated.");
476 
477             // Any non zero value clearly states that there are preallocated
478             // huge pages on the system
479             hpAvailable = true;
480         }
481 
482         // Check if there is transparent huge pages support on the system
483         unsigned long long thpPresent = 'n';
484         parseFileItem thpItem[] = { { "[alwa%cs] madvise never\n", thpPresent } };
485         parseFile</*BUFF_SIZE=*/100>("/sys/kernel/mm/transparent_hugepage/enabled", thpItem);
486 
487         if (thpPresent == 'y') {
488             MALLOC_ASSERT(hugePageSize != 0, "Huge Page size can't be zero if we found thp existence.");
489             thpAvailable = true;
490         }
491 #endif
492         MALLOC_ASSERT(!pageSize, "Huge page size can't be set twice. Double initialization.");
493 
494         // Initialize object variables
495         pageSize       = hugePageSize * 1024; // was read in KB from meminfo
496         isHPAvailable  = hpAvailable;
497         isTHPAvailable = thpAvailable;
498     }
499 
500 public:
501 
502     // System information
503     bool isHPAvailable;
504     bool isTHPAvailable;
505 
506     // User defined value
507     bool isEnabled;
508 
509     void init() {
510         parseSystemMemInfo();
511         MallocMutex::scoped_lock lock(setModeLock);
512         requestedMode.initReadEnv("TBB_MALLOC_USE_HUGE_PAGES", 0);
513         isEnabled = (isHPAvailable || isTHPAvailable) && requestedMode.get();
514     }
515 
516     // Could be set from user code at any place.
517     // If we didn't call init() at this place, isEnabled will be false
518     void setMode(intptr_t newVal) {
519         MallocMutex::scoped_lock lock(setModeLock);
520         requestedMode.set(newVal);
521         isEnabled = (isHPAvailable || isTHPAvailable) && newVal;
522     }
523 
524     void reset() {
525         needActualStatusPrint.store(0, std::memory_order_relaxed);
526         pageSize = 0;
527         isEnabled = isHPAvailable = isTHPAvailable = false;
528     }
529 
530     // If memory mapping size is a multiple of huge page size, some OS kernels
531     // can use huge pages transparently. Use this when huge pages are requested.
532     size_t getGranularity() const {
533         if (requestedMode.ready())
534             return requestedMode.get() ? pageSize : 0;
535         else
536             return HUGE_PAGE_SIZE; // the mode is not yet known; assume typical 2MB huge pages
537     }
538 
539     void printStatus() {
540         doPrintStatus(requestedMode.get(), "requested");
541         if (requestedMode.get()) { // report actual status iff requested
542             if (pageSize)
543                 needActualStatusPrint.store(1, std::memory_order_release);
544             else
545                 doPrintStatus(/*state=*/false, "available");
546         }
547     }
548 };
549 
550 class AllLargeBlocksList {
551     MallocMutex       largeObjLock;
552     LargeMemoryBlock *loHead;
553 public:
554     void add(LargeMemoryBlock *lmb);
555     void remove(LargeMemoryBlock *lmb);
556     template<bool poolDestroy> void releaseAll(Backend *backend);
557 };
558 
559 struct ExtMemoryPool {
560     Backend           backend;
561     LargeObjectCache  loc;
562     AllLocalCaches    allLocalCaches;
563     OrphanedBlocks    orphanedBlocks;
564 
565     intptr_t          poolId;
566     // To find all large objects. Used during user pool destruction,
567     // to release all backreferences in large blocks (slab blocks do not have them).
568     AllLargeBlocksList lmbList;
569     // Callbacks to be used instead of MapMemory/UnmapMemory.
570     rawAllocType      rawAlloc;
571     rawFreeType       rawFree;
572     size_t            granularity;
573     bool              keepAllMemory,
574                       delayRegsReleasing,
575     // TODO: implements fixedPool with calling rawFree on destruction
576                       fixedPool;
577     TLSKey            tlsPointerKey;  // per-pool TLS key
578 
579     bool init(intptr_t poolId, rawAllocType rawAlloc, rawFreeType rawFree,
580               size_t granularity, bool keepAllMemory, bool fixedPool);
581     bool initTLS();
582 
583     // i.e., not system default pool for scalable_malloc/scalable_free
584     bool userPool() const { return rawAlloc; }
585 
586      // true if something has been released
587     bool softCachesCleanup();
588     bool releaseAllLocalCaches();
589     bool hardCachesCleanup();
590     void *remap(void *ptr, size_t oldSize, size_t newSize, size_t alignment);
591     bool reset() {
592         loc.reset();
593         allLocalCaches.reset();
594         orphanedBlocks.reset();
595         bool ret = tlsPointerKey.destroy();
596         backend.reset();
597         return ret;
598     }
599     bool destroy() {
600         MALLOC_ASSERT(isPoolValid(),
601                       "Possible double pool_destroy or heap corruption");
602         if (!userPool()) {
603             loc.reset();
604             allLocalCaches.reset();
605         }
606         // pthread_key_dtors must be disabled before memory unmapping
607         // TODO: race-free solution
608         bool ret = tlsPointerKey.destroy();
609         if (rawFree || !userPool())
610             ret &= backend.destroy();
611         // pool is not valid after this point
612         granularity = 0;
613         return ret;
614     }
615     void delayRegionsReleasing(bool mode) { delayRegsReleasing = mode; }
616     inline bool regionsAreReleaseable() const;
617 
618     LargeMemoryBlock *mallocLargeObject(MemoryPool *pool, size_t allocationSize);
619     void freeLargeObject(LargeMemoryBlock *lmb);
620     void freeLargeObjectList(LargeMemoryBlock *head);
621 #if MALLOC_DEBUG
622     // use granulatity as marker for pool validity
623     bool isPoolValid() const { return granularity; }
624 #endif
625 };
626 
627 inline bool Backend::inUserPool() const { return extMemPool->userPool(); }
628 
629 struct LargeObjectHdr {
630     LargeMemoryBlock *memoryBlock;
631     /* Backreference points to LargeObjectHdr.
632        Duplicated in LargeMemoryBlock to reuse in subsequent allocations. */
633     BackRefIdx       backRefIdx;
634 };
635 
636 struct FreeObject {
637     FreeObject  *next;
638 };
639 
640 
641 /******* A helper class to support overriding malloc with scalable_malloc *******/
642 #if MALLOC_CHECK_RECURSION
643 
644 class RecursiveMallocCallProtector {
645     // pointer to an automatic data of holding thread
646     static std::atomic<void*> autoObjPtr;
647     static MallocMutex rmc_mutex;
648     static std::atomic<pthread_t> owner_thread;
649 /* Under FreeBSD 8.0 1st call to any pthread function including pthread_self
650    leads to pthread initialization, that causes malloc calls. As 1st usage of
651    RecursiveMallocCallProtector can be before pthread initialized, pthread calls
652    can't be used in 1st instance of RecursiveMallocCallProtector.
653    RecursiveMallocCallProtector is used 1st time in checkInitialization(),
654    so there is a guarantee that on 2nd usage pthread is initialized.
655    No such situation observed with other supported OSes.
656  */
657 #if __FreeBSD__ || __DragonFly__
658     static bool        canUsePthread;
659 #else
660     static const bool  canUsePthread = true;
661 #endif
662 /*
663   The variable modified in checkInitialization,
664   so can be read without memory barriers.
665  */
666     static bool mallocRecursionDetected;
667 
668     MallocMutex::scoped_lock* lock_acquired;
669     char scoped_lock_space[sizeof(MallocMutex::scoped_lock)+1];
670 
671 public:
672 
673     RecursiveMallocCallProtector() : lock_acquired(NULL) {
674         lock_acquired = new (scoped_lock_space) MallocMutex::scoped_lock( rmc_mutex );
675         if (canUsePthread)
676             owner_thread.store(pthread_self(), std::memory_order_relaxed);
677         autoObjPtr.store(&scoped_lock_space, std::memory_order_relaxed);
678     }
679     ~RecursiveMallocCallProtector() {
680         if (lock_acquired) {
681             autoObjPtr.store(nullptr, std::memory_order_relaxed);
682             lock_acquired->~scoped_lock();
683         }
684     }
685     static bool sameThreadActive() {
686         if (!autoObjPtr.load(std::memory_order_relaxed)) // fast path
687             return false;
688         // Some thread has an active recursive call protector; check if the current one.
689         // Exact pthread_self based test
690         if (canUsePthread) {
691             if (pthread_equal( owner_thread.load(std::memory_order_relaxed), pthread_self() )) {
692                 mallocRecursionDetected = true;
693                 return true;
694             } else
695                 return false;
696         }
697         // inexact stack size based test
698         const uintptr_t threadStackSz = 2*1024*1024;
699         int dummy;
700 
701         uintptr_t xi = (uintptr_t)autoObjPtr.load(std::memory_order_relaxed), yi = (uintptr_t)&dummy;
702         uintptr_t diffPtr = xi > yi ? xi - yi : yi - xi;
703 
704         return diffPtr < threadStackSz;
705     }
706 
707 /* The function is called on 1st scalable_malloc call to check if malloc calls
708    scalable_malloc (nested call must set mallocRecursionDetected). */
709     static void detectNaiveOverload() {
710         if (!malloc_proxy) {
711 #if __FreeBSD__ || __DragonFly__
712 /* If !canUsePthread, we can't call pthread_self() before, but now pthread
713    is already on, so can do it. */
714             if (!canUsePthread) {
715                 canUsePthread = true;
716                 owner_thread.store(pthread_self(), std::memory_order_relaxed);
717             }
718 #endif
719             free(malloc(1));
720         }
721     }
722 };
723 
724 #else
725 
726 class RecursiveMallocCallProtector {
727 public:
728     RecursiveMallocCallProtector() {}
729     ~RecursiveMallocCallProtector() {}
730 };
731 
732 #endif  /* MALLOC_CHECK_RECURSION */
733 
734 unsigned int getThreadId();
735 
736 bool initBackRefMaster(Backend *backend);
737 void destroyBackRefMaster(Backend *backend);
738 void removeBackRef(BackRefIdx backRefIdx);
739 void setBackRef(BackRefIdx backRefIdx, void *newPtr);
740 void *getBackRef(BackRefIdx backRefIdx);
741 
742 } // namespace internal
743 } // namespace rml
744 
745 #endif // __TBB_tbbmalloc_internal_H
746