1 //===-- tsd_shared.h --------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef SCUDO_TSD_SHARED_H_
10 #define SCUDO_TSD_SHARED_H_
11 
12 #include "tsd.h"
13 
14 #if SCUDO_HAS_PLATFORM_TLS_SLOT
15 // This is a platform-provided header that needs to be on the include path when
16 // Scudo is compiled. It must declare a function with the prototype:
17 //   uintptr_t *getPlatformAllocatorTlsSlot()
18 // that returns the address of a thread-local word of storage reserved for
19 // Scudo, that must be zero-initialized in newly created threads.
20 #include "scudo_platform_tls_slot.h"
21 #endif
22 
23 namespace scudo {
24 
25 template <class Allocator, u32 TSDsArraySize, u32 DefaultTSDCount>
26 struct TSDRegistrySharedT {
27   void init(Allocator *Instance) {
28     DCHECK(!Initialized);
29     Instance->init();
30     for (u32 I = 0; I < TSDsArraySize; I++)
31       TSDs[I].init(Instance);
32     const u32 NumberOfCPUs = getNumberOfCPUs();
33     setNumberOfTSDs((NumberOfCPUs == 0) ? DefaultTSDCount
34                                         : Min(NumberOfCPUs, DefaultTSDCount));
35     Initialized = true;
36   }
37 
38   void initOnceMaybe(Allocator *Instance) {
39     ScopedLock L(Mutex);
40     if (LIKELY(Initialized))
41       return;
42     init(Instance); // Sets Initialized.
43   }
44 
45   void unmapTestOnly(Allocator *Instance) {
46     for (u32 I = 0; I < TSDsArraySize; I++) {
47       TSDs[I].commitBack(Instance);
48       TSDs[I] = {};
49     }
50     setCurrentTSD(nullptr);
51     Initialized = false;
52   }
53 
54   ALWAYS_INLINE void initThreadMaybe(Allocator *Instance,
55                                      UNUSED bool MinimalInit) {
56     if (LIKELY(getCurrentTSD()))
57       return;
58     initThread(Instance);
59   }
60 
61   ALWAYS_INLINE TSD<Allocator> *getTSDAndLock(bool *UnlockRequired) {
62     TSD<Allocator> *TSD = getCurrentTSD();
63     DCHECK(TSD);
64     *UnlockRequired = true;
65     // Try to lock the currently associated context.
66     if (TSD->tryLock())
67       return TSD;
68     // If that fails, go down the slow path.
69     if (TSDsArraySize == 1U) {
70       // Only 1 TSD, not need to go any further.
71       // The compiler will optimize this one way or the other.
72       TSD->lock();
73       return TSD;
74     }
75     return getTSDAndLockSlow(TSD);
76   }
77 
78   void disable() {
79     Mutex.lock();
80     for (u32 I = 0; I < TSDsArraySize; I++)
81       TSDs[I].lock();
82   }
83 
84   void enable() {
85     for (s32 I = static_cast<s32>(TSDsArraySize - 1); I >= 0; I--)
86       TSDs[I].unlock();
87     Mutex.unlock();
88   }
89 
90   bool setOption(Option O, sptr Value) {
91     if (O == Option::MaxTSDsCount)
92       return setNumberOfTSDs(static_cast<u32>(Value));
93     if (O == Option::ThreadDisableMemInit)
94       setDisableMemInit(Value);
95     // Not supported by the TSD Registry, but not an error either.
96     return true;
97   }
98 
99   bool getDisableMemInit() const { return *getTlsPtr() & 1; }
100 
101 private:
102   ALWAYS_INLINE uptr *getTlsPtr() const {
103 #if SCUDO_HAS_PLATFORM_TLS_SLOT
104     return reinterpret_cast<uptr *>(getPlatformAllocatorTlsSlot());
105 #else
106     static thread_local uptr ThreadTSD;
107     return &ThreadTSD;
108 #endif
109   }
110 
111   static_assert(alignof(TSD<Allocator>) >= 2, "");
112 
113   ALWAYS_INLINE void setCurrentTSD(TSD<Allocator> *CurrentTSD) {
114     *getTlsPtr() &= 1;
115     *getTlsPtr() |= reinterpret_cast<uptr>(CurrentTSD);
116   }
117 
118   ALWAYS_INLINE TSD<Allocator> *getCurrentTSD() {
119     return reinterpret_cast<TSD<Allocator> *>(*getTlsPtr() & ~1ULL);
120   }
121 
122   bool setNumberOfTSDs(u32 N) {
123     ScopedLock L(MutexTSDs);
124     if (N < NumberOfTSDs)
125       return false;
126     if (N > TSDsArraySize)
127       N = TSDsArraySize;
128     NumberOfTSDs = N;
129     NumberOfCoPrimes = 0;
130     // Compute all the coprimes of NumberOfTSDs. This will be used to walk the
131     // array of TSDs in a random order. For details, see:
132     // https://lemire.me/blog/2017/09/18/visiting-all-values-in-an-array-exactly-once-in-random-order/
133     for (u32 I = 0; I < N; I++) {
134       u32 A = I + 1;
135       u32 B = N;
136       // Find the GCD between I + 1 and N. If 1, they are coprimes.
137       while (B != 0) {
138         const u32 T = A;
139         A = B;
140         B = T % B;
141       }
142       if (A == 1)
143         CoPrimes[NumberOfCoPrimes++] = I + 1;
144     }
145     return true;
146   }
147 
148   void setDisableMemInit(bool B) {
149     *getTlsPtr() &= ~1ULL;
150     *getTlsPtr() |= B;
151   }
152 
153   NOINLINE void initThread(Allocator *Instance) {
154     initOnceMaybe(Instance);
155     // Initial context assignment is done in a plain round-robin fashion.
156     const u32 Index = atomic_fetch_add(&CurrentIndex, 1U, memory_order_relaxed);
157     setCurrentTSD(&TSDs[Index % NumberOfTSDs]);
158     Instance->callPostInitCallback();
159   }
160 
161   NOINLINE TSD<Allocator> *getTSDAndLockSlow(TSD<Allocator> *CurrentTSD) {
162     // Use the Precedence of the current TSD as our random seed. Since we are
163     // in the slow path, it means that tryLock failed, and as a result it's
164     // very likely that said Precedence is non-zero.
165     const u32 R = static_cast<u32>(CurrentTSD->getPrecedence());
166     u32 N, Inc;
167     {
168       ScopedLock L(MutexTSDs);
169       N = NumberOfTSDs;
170       DCHECK_NE(NumberOfCoPrimes, 0U);
171       Inc = CoPrimes[R % NumberOfCoPrimes];
172     }
173     if (N > 1U) {
174       u32 Index = R % N;
175       uptr LowestPrecedence = UINTPTR_MAX;
176       TSD<Allocator> *CandidateTSD = nullptr;
177       // Go randomly through at most 4 contexts and find a candidate.
178       for (u32 I = 0; I < Min(4U, N); I++) {
179         if (TSDs[Index].tryLock()) {
180           setCurrentTSD(&TSDs[Index]);
181           return &TSDs[Index];
182         }
183         const uptr Precedence = TSDs[Index].getPrecedence();
184         // A 0 precedence here means another thread just locked this TSD.
185         if (Precedence && Precedence < LowestPrecedence) {
186           CandidateTSD = &TSDs[Index];
187           LowestPrecedence = Precedence;
188         }
189         Index += Inc;
190         if (Index >= N)
191           Index -= N;
192       }
193       if (CandidateTSD) {
194         CandidateTSD->lock();
195         setCurrentTSD(CandidateTSD);
196         return CandidateTSD;
197       }
198     }
199     // Last resort, stick with the current one.
200     CurrentTSD->lock();
201     return CurrentTSD;
202   }
203 
204   atomic_u32 CurrentIndex = {};
205   u32 NumberOfTSDs = 0;
206   u32 NumberOfCoPrimes = 0;
207   u32 CoPrimes[TSDsArraySize] = {};
208   bool Initialized = false;
209   HybridMutex Mutex;
210   HybridMutex MutexTSDs;
211   TSD<Allocator> TSDs[TSDsArraySize];
212 };
213 
214 } // namespace scudo
215 
216 #endif // SCUDO_TSD_SHARED_H_
217