1 // Copyright 2020 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef BASE_ALLOCATOR_PARTITION_ALLOCATOR_THREAD_CACHE_H_
6 #define BASE_ALLOCATOR_PARTITION_ALLOCATOR_THREAD_CACHE_H_
7 
8 #include <atomic>
9 #include <memory>
10 
11 #include "base/allocator/partition_allocator/partition_alloc_forward.h"
12 #include "base/allocator/partition_allocator/partition_cookie.h"
13 #include "base/allocator/partition_allocator/partition_freelist_entry.h"
14 #include "base/allocator/partition_allocator/partition_lock.h"
15 #include "base/allocator/partition_allocator/partition_stats.h"
16 #include "base/allocator/partition_allocator/partition_tls.h"
17 #include "base/base_export.h"
18 #include "base/gtest_prod_util.h"
19 #include "base/no_destructor.h"
20 #include "base/partition_alloc_buildflags.h"
21 #include "base/synchronization/lock.h"
22 
23 // Need TLS support.
24 #if defined(OS_POSIX) || defined(OS_WIN)
25 #define PA_THREAD_CACHE_SUPPORTED
26 #endif
27 
28 namespace base {
29 
30 namespace internal {
31 
32 class ThreadCache;
33 
34 extern BASE_EXPORT PartitionTlsKey g_thread_cache_key;
35 
36 // Global registry of all ThreadCache instances.
37 //
38 // This class cannot allocate in the (Un)registerThreadCache() functions, as
39 // they are called from ThreadCache constructor, which is from within the
40 // allocator. However the other members can allocate.
41 class BASE_EXPORT ThreadCacheRegistry {
42  public:
43   static ThreadCacheRegistry& Instance();
44   // Do not instantiate.
45   //
46   // Several things are surprising here:
47   // - The constructor is public even though this is intended to be a singleton:
48   //   we cannot use a "static local" variable in |Instance()| as this is
49   //   reached too early during CRT initialization on Windows, meaning that
50   //   static local variables don't work (as they call into the uninitialized
51   //   runtime). To sidestep that, we use a regular global variable in the .cc,
52   //   which is fine as this object's constructor is constexpr.
53   // - Marked inline so that the chromium style plugin doesn't complain that a
54   //   "complex constructor" has an inline body. This warning is disabled when
55   //   the constructor is explicitly marked "inline". Note that this is a false
56   //   positive of the plugin, since constexpr implies inline.
57   inline constexpr ThreadCacheRegistry();
58 
59   void RegisterThreadCache(ThreadCache* cache);
60   void UnregisterThreadCache(ThreadCache* cache);
61   // Prints statistics for all thread caches, or this thread's only.
62   void DumpStats(bool my_thread_only, ThreadCacheStats* stats);
63   // Purge() this thread's cache, and asks the other ones to trigger Purge() at
64   // a later point (during a deallocation).
65   void PurgeAll();
66 
GetLock()67   static PartitionLock& GetLock() { return Instance().lock_; }
68 
69  private:
70   friend class NoDestructor<ThreadCacheRegistry>;
71   // Not using base::Lock as the object's constructor must be constexpr.
72   PartitionLock lock_;
73   ThreadCache* list_head_ GUARDED_BY(GetLock()) = nullptr;
74 };
75 
76 constexpr ThreadCacheRegistry::ThreadCacheRegistry() = default;
77 
78 // Optional statistics collection.
79 #if DCHECK_IS_ON()
80 #define PA_ENABLE_THREAD_CACHE_STATISTICS 1
81 #endif
82 
83 #if defined(PA_ENABLE_THREAD_CACHE_STATISTICS)
84 #define INCREMENT_COUNTER(counter) ++counter
85 #define GET_COUNTER(counter) counter
86 #else
87 #define INCREMENT_COUNTER(counter) \
88   do {                             \
89   } while (0)
90 #define GET_COUNTER(counter) 0
91 #endif  // defined(PA_ENABLE_THREAD_CACHE_STATISTICS)
92 
ConstexprLog2(size_t n)93 ALWAYS_INLINE static constexpr int ConstexprLog2(size_t n) {
94   return n < 1 ? -1 : (n < 2 ? 0 : (1 + ConstexprLog2(n >> 1)));
95 }
96 
97 // Per-thread cache. *Not* threadsafe, must only be accessed from a single
98 // thread.
99 //
100 // In practice, this is easily enforced as long as only |instance| is
101 // manipulated, as it is a thread_local member. As such, any
102 // |ThreadCache::instance->*()| call will necessarily be done from a single
103 // thread.
104 class BASE_EXPORT ThreadCache {
105  public:
106   // Initializes the thread cache for |root|. May allocate, so should be called
107   // with the thread cache disabled on the partition side, and without the
108   // partition lock held.
109   //
110   // May only be called by a single PartitionRoot.
111   static void Init(PartitionRoot<ThreadSafe>* root);
Init(PartitionRoot<NotThreadSafe> * root)112   static void Init(PartitionRoot<NotThreadSafe>* root) { IMMEDIATE_CRASH(); }
113 
Get()114   static ThreadCache* Get() {
115     return reinterpret_cast<ThreadCache*>(PartitionTlsGet(g_thread_cache_key));
116   }
117 
118   // Create a new ThreadCache associated with |root|.
119   // Must be called without the partition locked, as this may allocate.
120   static ThreadCache* Create(PartitionRoot<ThreadSafe>* root);
Create(PartitionRoot<NotThreadSafe> * root)121   static ThreadCache* Create(PartitionRoot<NotThreadSafe>* root) {
122     IMMEDIATE_CRASH();
123   }
124 
125   ~ThreadCache();
126 
127   // Force placement new.
128   void* operator new(size_t) = delete;
new(size_t,void * buffer)129   void* operator new(size_t, void* buffer) { return buffer; }
130   void operator delete(void* ptr) = delete;
131   ThreadCache(const ThreadCache&) = delete;
132   ThreadCache(const ThreadCache&&) = delete;
133   ThreadCache& operator=(const ThreadCache&) = delete;
134 
135   // Tries to put a memory block at |address| into the cache.
136   // The block comes from the bucket at index |bucket_index| from the partition
137   // this cache is for.
138   //
139   // Returns true if the memory was put in the cache, and false otherwise. This
140   // can happen either because the cache is full or the allocation was too
141   // large.
142   ALWAYS_INLINE bool MaybePutInCache(void* address, size_t bucket_index);
143 
144   // Tries to allocate memory from the cache.
145   // Returns nullptr for failure.
146   //
147   // Has the same behavior as RawAlloc(), that is: no cookie nor tag handling.
148   ALWAYS_INLINE void* GetFromCache(size_t bucket_index);
149 
150   // Asks this cache to trigger |Purge()| at a later point. Can be called from
151   // any thread.
152   void SetShouldPurge();
153   // Empties the cache.
154   // The Partition lock must *not* be held when calling this.
155   // Must be called from the thread this cache is for.
156   void Purge();
157   void AccumulateStats(ThreadCacheStats* stats) const;
158 
bucket_count_for_testing(size_t index)159   size_t bucket_count_for_testing(size_t index) const {
160     return buckets_[index].count;
161   }
162 
163  private:
164   explicit ThreadCache(PartitionRoot<ThreadSafe>* root);
165 
166   struct Bucket {
167     size_t count;
168     PartitionFreelistEntry* freelist_head;
169   };
170 
171   // TODO(lizeb): Optimize the threshold.
172   static constexpr size_t kSizeThreshold = 512;
173   static constexpr size_t kBucketCount =
174       ((ConstexprLog2(kSizeThreshold) - kMinBucketedOrder + 1)
175        << kNumBucketsPerOrderBits) +
176       1;
177   static_assert(
178       kBucketCount < kNumBuckets,
179       "Cannot have more cached buckets than what the allocator supports");
180   // TODO(lizeb): Tune this constant, and adapt it to the bucket size /
181   // allocation patterns.
182   static constexpr size_t kMaxCountPerBucket = 100;
183 
184   std::atomic<bool> should_purge_;
185   Bucket buckets_[kBucketCount];
186   ThreadCacheStats stats_;
187   PartitionRoot<ThreadSafe>* root_;
188 
189   // Intrusive list since ThreadCacheRegistry::RegisterThreadCache() cannot
190   // allocate.
191   ThreadCache* next_ GUARDED_BY(ThreadCacheRegistry::GetLock());
192   ThreadCache* prev_ GUARDED_BY(ThreadCacheRegistry::GetLock());
193 
194   friend class ThreadCacheRegistry;
195   FRIEND_TEST_ALL_PREFIXES(ThreadCacheTest, LargeAllocationsAreNotCached);
196   FRIEND_TEST_ALL_PREFIXES(ThreadCacheTest, MultipleThreadCaches);
197   FRIEND_TEST_ALL_PREFIXES(ThreadCacheTest, RecordStats);
198   FRIEND_TEST_ALL_PREFIXES(ThreadCacheTest, ThreadCacheRegistry);
199   FRIEND_TEST_ALL_PREFIXES(ThreadCacheTest, MultipleThreadCachesAccounting);
200 };
201 
MaybePutInCache(void * address,size_t bucket_index)202 ALWAYS_INLINE bool ThreadCache::MaybePutInCache(void* address,
203                                                 size_t bucket_index) {
204   if (UNLIKELY(should_purge_.load(std::memory_order_relaxed)))
205     Purge();
206 
207   INCREMENT_COUNTER(stats_.cache_fill_count);
208 
209   if (bucket_index >= kBucketCount) {
210     INCREMENT_COUNTER(stats_.cache_fill_too_large);
211     INCREMENT_COUNTER(stats_.cache_fill_misses);
212     return false;
213   }
214 
215   auto& bucket = buckets_[bucket_index];
216 
217   if (bucket.count >= kMaxCountPerBucket) {
218     INCREMENT_COUNTER(stats_.cache_fill_bucket_full);
219     INCREMENT_COUNTER(stats_.cache_fill_misses);
220     return false;
221   }
222 
223   PA_DCHECK(bucket.count != 0 || bucket.freelist_head == nullptr);
224 
225   auto* entry = reinterpret_cast<PartitionFreelistEntry*>(address);
226   entry->SetNextForThreadCache(bucket.freelist_head);
227   bucket.freelist_head = entry;
228   bucket.count++;
229 
230   INCREMENT_COUNTER(stats_.cache_fill_hits);
231   return true;
232 }
233 
GetFromCache(size_t bucket_index)234 ALWAYS_INLINE void* ThreadCache::GetFromCache(size_t bucket_index) {
235   INCREMENT_COUNTER(stats_.alloc_count);
236   // Only handle "small" allocations.
237   if (bucket_index >= kBucketCount) {
238     INCREMENT_COUNTER(stats_.alloc_miss_too_large);
239     INCREMENT_COUNTER(stats_.alloc_misses);
240     return nullptr;
241   }
242 
243   auto& bucket = buckets_[bucket_index];
244   auto* result = bucket.freelist_head;
245   if (!result) {
246     PA_DCHECK(bucket.count == 0);
247     INCREMENT_COUNTER(stats_.alloc_miss_empty);
248     INCREMENT_COUNTER(stats_.alloc_misses);
249     return nullptr;
250   }
251 
252   PA_DCHECK(bucket.count != 0);
253   auto* next = result->GetNext();
254   PA_DCHECK(result != next);
255   bucket.count--;
256   PA_DCHECK(bucket.count != 0 || !next);
257   bucket.freelist_head = next;
258 
259   INCREMENT_COUNTER(stats_.alloc_hits);
260   return result;
261 }
262 
263 }  // namespace internal
264 }  // namespace base
265 
266 #endif  // BASE_ALLOCATOR_PARTITION_ALLOCATOR_THREAD_CACHE_H_
267