1 /*
2 Copyright (c) 2005-2021 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 */
16
17 #ifndef __TBB_tbbmalloc_internal_H
18 #define __TBB_tbbmalloc_internal_H
19
20 #include "TypeDefinitions.h" /* Also includes customization layer Customize.h */
21
22 #if USE_PTHREAD
23 // Some pthreads documentation says that <pthreads.h> must be first header.
24 #include <pthread.h>
25 typedef pthread_key_t tls_key_t;
26 #elif USE_WINTHREAD
27 #include <windows.h>
28 typedef DWORD tls_key_t;
29 #else
30 #error Must define USE_PTHREAD or USE_WINTHREAD
31 #endif
32
33 #include <atomic>
34
35 // TODO: *BSD also has it
36 #define BACKEND_HAS_MREMAP __linux__
37 #define CHECK_ALLOCATION_RANGE MALLOC_DEBUG || MALLOC_ZONE_OVERLOAD_ENABLED || MALLOC_UNIXLIKE_OVERLOAD_ENABLED
38
39 #include "oneapi/tbb/detail/_config.h" // for __TBB_LIBSTDCPP_EXCEPTION_HEADERS_BROKEN
40 #include "oneapi/tbb/detail/_template_helpers.h"
41 #if __TBB_LIBSTDCPP_EXCEPTION_HEADERS_BROKEN
42 #define _EXCEPTION_PTR_H /* prevents exception_ptr.h inclusion */
43 #define _GLIBCXX_NESTED_EXCEPTION_H /* prevents nested_exception.h inclusion */
44 #endif
45
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <limits.h> // for CHAR_BIT
49 #include <string.h> // for memset
50 #if MALLOC_CHECK_RECURSION
51 #include <new> /* for placement new */
52 #endif
53 #include "oneapi/tbb/scalable_allocator.h"
54 #include "tbbmalloc_internal_api.h"
55
56 /********* Various compile-time options **************/
57
58 #if !__TBB_DEFINE_MIC && __TBB_MIC_NATIVE
59 #error Intel(R) Many Integrated Core Compiler does not define __MIC__ anymore.
60 #endif
61
62 #define MALLOC_TRACE 0
63
64 #if MALLOC_TRACE
65 #define TRACEF(x) printf x
66 #else
67 #define TRACEF(x) ((void)0)
68 #endif /* MALLOC_TRACE */
69
70 #define ASSERT_TEXT NULL
71
72 #define COLLECT_STATISTICS ( MALLOC_DEBUG && MALLOCENV_COLLECT_STATISTICS )
73 #ifndef USE_INTERNAL_TID
74 #define USE_INTERNAL_TID COLLECT_STATISTICS || MALLOC_TRACE
75 #endif
76
77 #include "Statistics.h"
78
79 // call yield for whitebox testing, skip in real library
80 #ifndef WhiteboxTestingYield
81 #define WhiteboxTestingYield() ((void)0)
82 #endif
83
84
85 /********* End compile-time options **************/
86
87 namespace rml {
88
89 namespace internal {
90
91 #if __TBB_MALLOC_LOCACHE_STAT
92 extern intptr_t mallocCalls, cacheHits;
93 extern intptr_t memAllocKB, memHitKB;
94 #endif
95
96 //! Utility template function to prevent "unused" warnings by various compilers.
97 template<typename T>
suppress_unused_warning(const T &)98 void suppress_unused_warning( const T& ) {}
99
100 /********** Various global default constants ********/
101
102 /*
103 * Default huge page size
104 */
105 static const size_t HUGE_PAGE_SIZE = 2 * 1024 * 1024;
106
107 /********** End of global default constatns *********/
108
109 /********** Various numeric parameters controlling allocations ********/
110
111 /*
112 * slabSize - the size of a block for allocation of small objects,
113 * it must be larger than maxSegregatedObjectSize.
114 */
115 const uintptr_t slabSize = 16*1024;
116
117 /*
118 * Large blocks cache cleanup frequency.
119 * It should be power of 2 for the fast checking.
120 */
121 const unsigned cacheCleanupFreq = 256;
122
123 /*
124 * Alignment of large (>= minLargeObjectSize) objects.
125 */
126 const size_t largeObjectAlignment = estimatedCacheLineSize;
127
128 /*
129 * This number of bins in the TLS that leads to blocks that we can allocate in.
130 */
131 const uint32_t numBlockBinLimit = 31;
132
133 /********** End of numeric parameters controlling allocations *********/
134
135 class BlockI;
136 class Block;
137 struct LargeMemoryBlock;
138 struct ExtMemoryPool;
139 struct MemRegion;
140 class FreeBlock;
141 class TLSData;
142 class Backend;
143 class MemoryPool;
144 struct CacheBinOperation;
145 extern const uint32_t minLargeObjectSize;
146
147 enum DecreaseOrIncrease {
148 decrease, increase
149 };
150
151 class TLSKey {
152 tls_key_t TLS_pointer_key;
153 public:
154 bool init();
155 bool destroy();
156 TLSData* getThreadMallocTLS() const;
157 void setThreadMallocTLS( TLSData * newvalue );
158 TLSData* createTLS(MemoryPool *memPool, Backend *backend);
159 };
160
161 template<typename Arg, typename Compare>
AtomicUpdate(std::atomic<Arg> & location,Arg newVal,const Compare & cmp)162 inline void AtomicUpdate(std::atomic<Arg>& location, Arg newVal, const Compare &cmp)
163 {
164 static_assert(sizeof(Arg) == sizeof(intptr_t), "Type of argument must match AtomicCompareExchange type.");
165 Arg old = location.load(std::memory_order_acquire);
166 for (; cmp(old, newVal); ) {
167 if (location.compare_exchange_strong(old, newVal))
168 break;
169 // TODO: do we need backoff after unsuccessful CAS?
170 //old = val;
171 }
172 }
173
174 // TODO: make BitMaskBasic more general
175 // TODO: check that BitMaskBasic is not used for synchronization
176 // (currently, it fits BitMaskMin well, but not as suitable for BitMaskMax)
177 template<unsigned NUM>
178 class BitMaskBasic {
179 static const unsigned SZ = (NUM-1)/(CHAR_BIT*sizeof(uintptr_t))+1;
180 static const unsigned WORD_LEN = CHAR_BIT*sizeof(uintptr_t);
181
182 std::atomic<uintptr_t> mask[SZ];
183
184 protected:
set(size_t idx,bool val)185 void set(size_t idx, bool val) {
186 MALLOC_ASSERT(idx<NUM, ASSERT_TEXT);
187
188 size_t i = idx / WORD_LEN;
189 int pos = WORD_LEN - idx % WORD_LEN - 1;
190 if (val) {
191 mask[i].fetch_or(1ULL << pos);
192 } else {
193 mask[i].fetch_and(~(1ULL << pos));
194 }
195 }
getMinTrue(unsigned startIdx)196 int getMinTrue(unsigned startIdx) const {
197 unsigned idx = startIdx / WORD_LEN;
198 int pos;
199
200 if (startIdx % WORD_LEN) {
201 // only interested in part of a word, clear bits before startIdx
202 pos = WORD_LEN - startIdx % WORD_LEN;
203 uintptr_t actualMask = mask[idx].load(std::memory_order_relaxed) & (((uintptr_t)1<<pos) - 1);
204 idx++;
205 if (-1 != (pos = BitScanRev(actualMask)))
206 return idx*WORD_LEN - pos - 1;
207 }
208
209 while (idx<SZ)
210 if (-1 != (pos = BitScanRev(mask[idx++].load(std::memory_order_relaxed))))
211 return idx*WORD_LEN - pos - 1;
212 return -1;
213 }
214 public:
reset()215 void reset() { for (unsigned i=0; i<SZ; i++) mask[i].store(0, std::memory_order_relaxed); }
216 };
217
218 template<unsigned NUM>
219 class BitMaskMin : public BitMaskBasic<NUM> {
220 public:
set(size_t idx,bool val)221 void set(size_t idx, bool val) { BitMaskBasic<NUM>::set(idx, val); }
getMinTrue(unsigned startIdx)222 int getMinTrue(unsigned startIdx) const {
223 return BitMaskBasic<NUM>::getMinTrue(startIdx);
224 }
225 };
226
227 template<unsigned NUM>
228 class BitMaskMax : public BitMaskBasic<NUM> {
229 public:
set(size_t idx,bool val)230 void set(size_t idx, bool val) {
231 BitMaskBasic<NUM>::set(NUM - 1 - idx, val);
232 }
getMaxTrue(unsigned startIdx)233 int getMaxTrue(unsigned startIdx) const {
234 int p = BitMaskBasic<NUM>::getMinTrue(NUM-startIdx-1);
235 return -1==p? -1 : (int)NUM - 1 - p;
236 }
237 };
238
239
240 // The part of thread-specific data that can be modified by other threads.
241 // Such modifications must be protected by AllLocalCaches::listLock.
242 struct TLSRemote {
243 TLSRemote *next,
244 *prev;
245 };
246
247 // The list of all thread-local data; supporting cleanup of thread caches
248 class AllLocalCaches {
249 TLSRemote *head;
250 MallocMutex listLock; // protects operations in the list
251 public:
252 void registerThread(TLSRemote *tls);
253 void unregisterThread(TLSRemote *tls);
254 bool cleanup(bool cleanOnlyUnused);
255 void markUnused();
reset()256 void reset() { head = NULL; }
257 };
258
259 class LifoList {
260 public:
261 inline LifoList();
262 inline void push(Block *block);
263 inline Block *pop();
264 inline Block *grab();
265
266 private:
267 std::atomic<Block*> top;
268 MallocMutex lock;
269 };
270
271 /*
272 * When a block that is not completely free is returned for reuse by other threads
273 * this is where the block goes.
274 *
275 * LifoList assumes zero initialization; so below its constructors are omitted,
276 * to avoid linking with C++ libraries on Linux.
277 */
278
279 class OrphanedBlocks {
280 LifoList bins[numBlockBinLimit];
281 public:
282 Block *get(TLSData *tls, unsigned int size);
283 void put(intptr_t binTag, Block *block);
284 void reset();
285 bool cleanup(Backend* backend);
286 };
287
288 /* Large objects entities */
289 #include "large_objects.h"
290
291 // select index size for BackRefMaster based on word size: default is uint32_t,
292 // uint16_t for 32-bit platforms
293 template<bool>
294 struct MasterIndexSelect {
295 typedef uint32_t master_type;
296 };
297
298 template<>
299 struct MasterIndexSelect<false> {
300 typedef uint16_t master_type;
301 };
302
303 class BackRefIdx { // composite index to backreference array
304 public:
305 typedef MasterIndexSelect<4 < sizeof(uintptr_t)>::master_type master_t;
306 private:
307 static const master_t invalid = ~master_t(0);
308 master_t master; // index in BackRefMaster
309 uint16_t largeObj:1; // is this object "large"?
310 uint16_t offset :15; // offset from beginning of BackRefBlock
311 public:
312 BackRefIdx() : master(invalid), largeObj(0), offset(0) {}
313 bool isInvalid() const { return master == invalid; }
314 bool isLargeObject() const { return largeObj; }
315 master_t getMaster() const { return master; }
316 uint16_t getOffset() const { return offset; }
317
318 #if __TBB_USE_THREAD_SANITIZER
319 friend
320 __attribute__((no_sanitize("thread")))
321 BackRefIdx dereference(const BackRefIdx* ptr) {
322 BackRefIdx idx;
323 idx.master = ptr->master;
324 idx.largeObj = ptr->largeObj;
325 idx.offset = ptr->offset;
326 return idx;
327 }
328 #else
329 friend
330 BackRefIdx dereference(const BackRefIdx* ptr) {
331 return *ptr;
332 }
333 #endif
334
335 // only newBackRef can modify BackRefIdx
336 static BackRefIdx newBackRef(bool largeObj);
337 };
338
339 // Block header is used during block coalescing
340 // and must be preserved in used blocks.
341 class BlockI {
342 #if __clang__
343 #pragma clang diagnostic push
344 #pragma clang diagnostic ignored "-Wunused-private-field"
345 #endif
346 intptr_t blockState[2];
347 #if __clang__
348 #pragma clang diagnostic pop // "-Wunused-private-field"
349 #endif
350 };
351
352 struct LargeMemoryBlock : public BlockI {
353 MemoryPool *pool; // owner pool
354 LargeMemoryBlock *next, // ptrs in list of cached blocks
355 *prev,
356 // 2-linked list of pool's large objects
357 // Used to destroy backrefs on pool destroy (backrefs are global)
358 // and for object releasing during pool reset.
359 *gPrev,
360 *gNext;
361 uintptr_t age; // age of block while in cache
362 size_t objectSize; // the size requested by a client
363 size_t unalignedSize; // the size requested from backend
364 BackRefIdx backRefIdx; // cached here, used copy is in LargeObjectHdr
365 };
366
367 // Classes and methods for backend.cpp
368 #include "backend.h"
369
370 // An TBB allocator mode that can be controlled by user
371 // via API/environment variable. Must be placed in zero-initialized memory.
372 // External synchronization assumed.
373 // TODO: TBB_VERSION support
374 class AllocControlledMode {
375 intptr_t val;
376 bool setDone;
377
378 public:
379 intptr_t get() const {
380 MALLOC_ASSERT(setDone, ASSERT_TEXT);
381 return val;
382 }
383
384 // Note: set() can be called before init()
385 void set(intptr_t newVal) {
386 val = newVal;
387 setDone = true;
388 }
389
390 bool ready() const {
391 return setDone;
392 }
393
394 // envName - environment variable to get controlled mode
395 void initReadEnv(const char *envName, intptr_t defaultVal) {
396 if (!setDone) {
397 // unreferenced formal parameter warning
398 tbb::detail::suppress_unused_warning(envName);
399 #if !__TBB_WIN8UI_SUPPORT
400 // TODO: use strtol to get the actual value of the envirable
401 const char *envVal = getenv(envName);
402 if (envVal && !strcmp(envVal, "1"))
403 val = 1;
404 else
405 #endif
406 val = defaultVal;
407 setDone = true;
408 }
409 }
410 };
411
412 // Page type to be used inside MapMemory.
413 // Regular (4KB aligned), Huge and Transparent Huge Pages (2MB aligned).
414 enum PageType {
415 REGULAR = 0,
416 PREALLOCATED_HUGE_PAGE,
417 TRANSPARENT_HUGE_PAGE
418 };
419
420 // init() and printStatus() is called only under global initialization lock.
421 // Race is possible between registerAllocation() and registerReleasing(),
422 // harm is that up to single huge page releasing is missed (because failure
423 // to get huge page is registered only 1st time), that is negligible.
424 // setMode is also can be called concurrently.
425 // Object must reside in zero-initialized memory
426 // TODO: can we check for huge page presence during every 10th mmap() call
427 // in case huge page is released by another process?
428 class HugePagesStatus {
429 private:
430 AllocControlledMode requestedMode; // changed only by user
431 // to keep enabled and requestedMode consistent
432 MallocMutex setModeLock;
433 size_t pageSize;
434 std::atomic<intptr_t> needActualStatusPrint;
435
436 static void doPrintStatus(bool state, const char *stateName) {
437 // Under macOS* fprintf/snprintf acquires an internal lock, so when
438 // 1st allocation is done under the lock, we got a deadlock.
439 // Do not use fprintf etc during initialization.
440 fputs("TBBmalloc: huge pages\t", stderr);
441 if (!state)
442 fputs("not ", stderr);
443 fputs(stateName, stderr);
444 fputs("\n", stderr);
445 }
446
447 void parseSystemMemInfo() {
448 bool hpAvailable = false;
449 bool thpAvailable = false;
450 unsigned long long hugePageSize = 0;
451
452 #if __unix__
453 // Check huge pages existence
454 unsigned long long meminfoHugePagesTotal = 0;
455
456 parseFileItem meminfoItems[] = {
457 // Parse system huge page size
458 { "Hugepagesize: %llu kB", hugePageSize },
459 // Check if there are preallocated huge pages on the system
460 // https://www.kernel.org/doc/Documentation/vm/hugetlbpage.txt
461 { "HugePages_Total: %llu", meminfoHugePagesTotal } };
462
463 parseFile</*BUFF_SIZE=*/100>("/proc/meminfo", meminfoItems);
464
465 // Double check another system information regarding preallocated
466 // huge pages if there are no information in /proc/meminfo
467 unsigned long long vmHugePagesTotal = 0;
468
469 parseFileItem vmItem[] = { { "%llu", vmHugePagesTotal } };
470
471 // We parse a counter number, it can't be huge
472 parseFile</*BUFF_SIZE=*/100>("/proc/sys/vm/nr_hugepages", vmItem);
473
474 if (meminfoHugePagesTotal > 0 || vmHugePagesTotal > 0) {
475 MALLOC_ASSERT(hugePageSize != 0, "Huge Page size can't be zero if we found preallocated.");
476
477 // Any non zero value clearly states that there are preallocated
478 // huge pages on the system
479 hpAvailable = true;
480 }
481
482 // Check if there is transparent huge pages support on the system
483 unsigned long long thpPresent = 'n';
484 parseFileItem thpItem[] = { { "[alwa%cs] madvise never\n", thpPresent } };
485 parseFile</*BUFF_SIZE=*/100>("/sys/kernel/mm/transparent_hugepage/enabled", thpItem);
486
487 if (thpPresent == 'y') {
488 MALLOC_ASSERT(hugePageSize != 0, "Huge Page size can't be zero if we found thp existence.");
489 thpAvailable = true;
490 }
491 #endif
492 MALLOC_ASSERT(!pageSize, "Huge page size can't be set twice. Double initialization.");
493
494 // Initialize object variables
495 pageSize = hugePageSize * 1024; // was read in KB from meminfo
496 isHPAvailable = hpAvailable;
497 isTHPAvailable = thpAvailable;
498 }
499
500 public:
501
502 // System information
503 bool isHPAvailable;
504 bool isTHPAvailable;
505
506 // User defined value
507 bool isEnabled;
508
509 void init() {
510 parseSystemMemInfo();
511 MallocMutex::scoped_lock lock(setModeLock);
512 requestedMode.initReadEnv("TBB_MALLOC_USE_HUGE_PAGES", 0);
513 isEnabled = (isHPAvailable || isTHPAvailable) && requestedMode.get();
514 }
515
516 // Could be set from user code at any place.
517 // If we didn't call init() at this place, isEnabled will be false
518 void setMode(intptr_t newVal) {
519 MallocMutex::scoped_lock lock(setModeLock);
520 requestedMode.set(newVal);
521 isEnabled = (isHPAvailable || isTHPAvailable) && newVal;
522 }
523
524 void reset() {
525 needActualStatusPrint.store(0, std::memory_order_relaxed);
526 pageSize = 0;
527 isEnabled = isHPAvailable = isTHPAvailable = false;
528 }
529
530 // If memory mapping size is a multiple of huge page size, some OS kernels
531 // can use huge pages transparently. Use this when huge pages are requested.
532 size_t getGranularity() const {
533 if (requestedMode.ready())
534 return requestedMode.get() ? pageSize : 0;
535 else
536 return HUGE_PAGE_SIZE; // the mode is not yet known; assume typical 2MB huge pages
537 }
538
539 void printStatus() {
540 doPrintStatus(requestedMode.get(), "requested");
541 if (requestedMode.get()) { // report actual status iff requested
542 if (pageSize)
543 needActualStatusPrint.store(1, std::memory_order_release);
544 else
545 doPrintStatus(/*state=*/false, "available");
546 }
547 }
548 };
549
550 class AllLargeBlocksList {
551 MallocMutex largeObjLock;
552 LargeMemoryBlock *loHead;
553 public:
554 void add(LargeMemoryBlock *lmb);
555 void remove(LargeMemoryBlock *lmb);
556 template<bool poolDestroy> void releaseAll(Backend *backend);
557 };
558
559 struct ExtMemoryPool {
560 Backend backend;
561 LargeObjectCache loc;
562 AllLocalCaches allLocalCaches;
563 OrphanedBlocks orphanedBlocks;
564
565 intptr_t poolId;
566 // To find all large objects. Used during user pool destruction,
567 // to release all backreferences in large blocks (slab blocks do not have them).
568 AllLargeBlocksList lmbList;
569 // Callbacks to be used instead of MapMemory/UnmapMemory.
570 rawAllocType rawAlloc;
571 rawFreeType rawFree;
572 size_t granularity;
573 bool keepAllMemory,
574 delayRegsReleasing,
575 // TODO: implements fixedPool with calling rawFree on destruction
576 fixedPool;
577 TLSKey tlsPointerKey; // per-pool TLS key
578
579 bool init(intptr_t poolId, rawAllocType rawAlloc, rawFreeType rawFree,
580 size_t granularity, bool keepAllMemory, bool fixedPool);
581 bool initTLS();
582
583 // i.e., not system default pool for scalable_malloc/scalable_free
584 bool userPool() const { return rawAlloc; }
585
586 // true if something has been released
587 bool softCachesCleanup();
588 bool releaseAllLocalCaches();
589 bool hardCachesCleanup();
590 void *remap(void *ptr, size_t oldSize, size_t newSize, size_t alignment);
591 bool reset() {
592 loc.reset();
593 allLocalCaches.reset();
594 orphanedBlocks.reset();
595 bool ret = tlsPointerKey.destroy();
596 backend.reset();
597 return ret;
598 }
599 bool destroy() {
600 MALLOC_ASSERT(isPoolValid(),
601 "Possible double pool_destroy or heap corruption");
602 if (!userPool()) {
603 loc.reset();
604 allLocalCaches.reset();
605 }
606 // pthread_key_dtors must be disabled before memory unmapping
607 // TODO: race-free solution
608 bool ret = tlsPointerKey.destroy();
609 if (rawFree || !userPool())
610 ret &= backend.destroy();
611 // pool is not valid after this point
612 granularity = 0;
613 return ret;
614 }
615 void delayRegionsReleasing(bool mode) { delayRegsReleasing = mode; }
616 inline bool regionsAreReleaseable() const;
617
618 LargeMemoryBlock *mallocLargeObject(MemoryPool *pool, size_t allocationSize);
619 void freeLargeObject(LargeMemoryBlock *lmb);
620 void freeLargeObjectList(LargeMemoryBlock *head);
621 #if MALLOC_DEBUG
622 // use granulatity as marker for pool validity
623 bool isPoolValid() const { return granularity; }
624 #endif
625 };
626
627 inline bool Backend::inUserPool() const { return extMemPool->userPool(); }
628
629 struct LargeObjectHdr {
630 LargeMemoryBlock *memoryBlock;
631 /* Backreference points to LargeObjectHdr.
632 Duplicated in LargeMemoryBlock to reuse in subsequent allocations. */
633 BackRefIdx backRefIdx;
634 };
635
636 struct FreeObject {
637 FreeObject *next;
638 };
639
640
641 /******* A helper class to support overriding malloc with scalable_malloc *******/
642 #if MALLOC_CHECK_RECURSION
643
644 class RecursiveMallocCallProtector {
645 // pointer to an automatic data of holding thread
646 static std::atomic<void*> autoObjPtr;
647 static MallocMutex rmc_mutex;
648 static std::atomic<pthread_t> owner_thread;
649 /* Under FreeBSD 8.0 1st call to any pthread function including pthread_self
650 leads to pthread initialization, that causes malloc calls. As 1st usage of
651 RecursiveMallocCallProtector can be before pthread initialized, pthread calls
652 can't be used in 1st instance of RecursiveMallocCallProtector.
653 RecursiveMallocCallProtector is used 1st time in checkInitialization(),
654 so there is a guarantee that on 2nd usage pthread is initialized.
655 No such situation observed with other supported OSes.
656 */
657 #if __FreeBSD__ || __DragonFly__
658 static bool canUsePthread;
659 #else
660 static const bool canUsePthread = true;
661 #endif
662 /*
663 The variable modified in checkInitialization,
664 so can be read without memory barriers.
665 */
666 static bool mallocRecursionDetected;
667
668 MallocMutex::scoped_lock* lock_acquired;
669 char scoped_lock_space[sizeof(MallocMutex::scoped_lock)+1];
670
671 public:
672
673 RecursiveMallocCallProtector() : lock_acquired(NULL) {
674 lock_acquired = new (scoped_lock_space) MallocMutex::scoped_lock( rmc_mutex );
675 if (canUsePthread)
676 owner_thread.store(pthread_self(), std::memory_order_relaxed);
677 autoObjPtr.store(&scoped_lock_space, std::memory_order_relaxed);
678 }
679 ~RecursiveMallocCallProtector() {
680 if (lock_acquired) {
681 autoObjPtr.store(nullptr, std::memory_order_relaxed);
682 lock_acquired->~scoped_lock();
683 }
684 }
685 static bool sameThreadActive() {
686 if (!autoObjPtr.load(std::memory_order_relaxed)) // fast path
687 return false;
688 // Some thread has an active recursive call protector; check if the current one.
689 // Exact pthread_self based test
690 if (canUsePthread) {
691 if (pthread_equal( owner_thread.load(std::memory_order_relaxed), pthread_self() )) {
692 mallocRecursionDetected = true;
693 return true;
694 } else
695 return false;
696 }
697 // inexact stack size based test
698 const uintptr_t threadStackSz = 2*1024*1024;
699 int dummy;
700
701 uintptr_t xi = (uintptr_t)autoObjPtr.load(std::memory_order_relaxed), yi = (uintptr_t)&dummy;
702 uintptr_t diffPtr = xi > yi ? xi - yi : yi - xi;
703
704 return diffPtr < threadStackSz;
705 }
706
707 /* The function is called on 1st scalable_malloc call to check if malloc calls
708 scalable_malloc (nested call must set mallocRecursionDetected). */
709 static void detectNaiveOverload() {
710 if (!malloc_proxy) {
711 #if __FreeBSD__ || __DragonFly__
712 /* If !canUsePthread, we can't call pthread_self() before, but now pthread
713 is already on, so can do it. */
714 if (!canUsePthread) {
715 canUsePthread = true;
716 owner_thread.store(pthread_self(), std::memory_order_relaxed);
717 }
718 #endif
719 free(malloc(1));
720 }
721 }
722 };
723
724 #else
725
726 class RecursiveMallocCallProtector {
727 public:
728 RecursiveMallocCallProtector() {}
729 ~RecursiveMallocCallProtector() {}
730 };
731
732 #endif /* MALLOC_CHECK_RECURSION */
733
734 unsigned int getThreadId();
735
736 bool initBackRefMaster(Backend *backend);
737 void destroyBackRefMaster(Backend *backend);
738 void removeBackRef(BackRefIdx backRefIdx);
739 void setBackRef(BackRefIdx backRefIdx, void *newPtr);
740 void *getBackRef(BackRefIdx backRefIdx);
741
742 } // namespace internal
743 } // namespace rml
744
745 #endif // __TBB_tbbmalloc_internal_H
746