1 // Copyright 2019 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <algorithm>
6 #include <atomic>
7 #include <limits>
8 #include <memory>
9 #include <vector>
10
11 #include "base/allocator/partition_allocator/partition_alloc.h"
12 #include "base/allocator/partition_allocator/partition_alloc_check.h"
13 #include "base/allocator/partition_allocator/thread_cache.h"
14 #include "base/bind.h"
15 #include "base/callback.h"
16 #include "base/logging.h"
17 #include "base/strings/stringprintf.h"
18 #include "base/threading/platform_thread.h"
19 #include "base/time/time.h"
20 #include "base/timer/lap_timer.h"
21 #include "build/build_config.h"
22 #include "testing/gtest/include/gtest/gtest.h"
23 #include "testing/perf/perf_result_reporter.h"
24
25 #if defined(OS_ANDROID) || defined(ARCH_CPU_32_BITS)
26 // Some tests allocate many GB of memory, which can cause issues on Android and
27 // address-space exhaustion for any 32-bit process.
28 #define MEMORY_CONSTRAINED
29 #endif
30
31 namespace base {
32 namespace {
33
34 // Change kTimeLimit to something higher if you need more time to capture a
35 // trace.
36 constexpr base::TimeDelta kTimeLimit = base::TimeDelta::FromSeconds(2);
37 constexpr int kWarmupRuns = 10000;
38 constexpr int kTimeCheckInterval = 100000;
39 constexpr size_t kAllocSize = 40;
40
41 // Size constants are mostly arbitrary, but try to simulate something like CSS
42 // parsing which consists of lots of relatively small objects.
43 constexpr int kMultiBucketMinimumSize = 24;
44 constexpr int kMultiBucketIncrement = 13;
45 // Final size is 24 + (13 * 22) = 310 bytes.
46 constexpr int kMultiBucketRounds = 22;
47
48 constexpr char kMetricPrefixMemoryAllocation[] = "MemoryAllocation.";
49 constexpr char kMetricThroughput[] = "throughput";
50 constexpr char kMetricTimePerAllocation[] = "time_per_allocation";
51
SetUpReporter(const std::string & story_name)52 perf_test::PerfResultReporter SetUpReporter(const std::string& story_name) {
53 perf_test::PerfResultReporter reporter(kMetricPrefixMemoryAllocation,
54 story_name);
55 reporter.RegisterImportantMetric(kMetricThroughput, "runs/s");
56 reporter.RegisterImportantMetric(kMetricTimePerAllocation, "ns");
57 return reporter;
58 }
59
60 enum class AllocatorType {
61 kSystem,
62 kPartitionAlloc,
63 kPartitionAllocWithThreadCache
64 };
65
66 class Allocator {
67 public:
68 Allocator() = default;
69 virtual ~Allocator() = default;
70 virtual void* Alloc(size_t size) = 0;
71 virtual void Free(void* data) = 0;
72 };
73
74 class SystemAllocator : public Allocator {
75 public:
76 SystemAllocator() = default;
77 ~SystemAllocator() override = default;
Alloc(size_t size)78 void* Alloc(size_t size) override { return malloc(size); }
Free(void * data)79 void Free(void* data) override { free(data); }
80 };
81
82 class PartitionAllocator : public Allocator {
83 public:
84 PartitionAllocator() = default;
85 ~PartitionAllocator() override = default;
86
Alloc(size_t size)87 void* Alloc(size_t size) override {
88 return alloc_.AllocFlagsNoHooks(0, size);
89 }
Free(void * data)90 void Free(void* data) override { ThreadSafePartitionRoot::FreeNoHooks(data); }
91
92 private:
93 ThreadSafePartitionRoot alloc_{{PartitionOptions::Alignment::kRegular,
94 PartitionOptions::ThreadCache::kDisabled}};
95 };
96
97 // Only one partition with a thread cache.
98 ThreadSafePartitionRoot* g_partition_root = nullptr;
99 class PartitionAllocatorWithThreadCache : public Allocator {
100 public:
PartitionAllocatorWithThreadCache()101 PartitionAllocatorWithThreadCache() {
102 if (!g_partition_root) {
103 g_partition_root = new ThreadSafePartitionRoot(
104 {PartitionOptions::Alignment::kRegular,
105 PartitionOptions::ThreadCache::kEnabled});
106 }
107 internal::ThreadCacheRegistry::Instance().PurgeAll();
108 }
109 ~PartitionAllocatorWithThreadCache() override = default;
110
Alloc(size_t size)111 void* Alloc(size_t size) override {
112 return g_partition_root->AllocFlagsNoHooks(0, size);
113 }
Free(void * data)114 void Free(void* data) override { ThreadSafePartitionRoot::FreeNoHooks(data); }
115 };
116
117 class TestLoopThread : public PlatformThread::Delegate {
118 public:
TestLoopThread(OnceCallback<float ()> test_fn)119 explicit TestLoopThread(OnceCallback<float()> test_fn)
120 : test_fn_(std::move(test_fn)) {
121 PA_CHECK(PlatformThread::Create(0, this, &thread_handle_));
122 }
123
Run()124 float Run() {
125 PlatformThread::Join(thread_handle_);
126 return laps_per_second_;
127 }
128
ThreadMain()129 void ThreadMain() override { laps_per_second_ = std::move(test_fn_).Run(); }
130
131 OnceCallback<float()> test_fn_;
132 PlatformThreadHandle thread_handle_;
133 std::atomic<float> laps_per_second_;
134 };
135
DisplayResults(const std::string & story_name,float iterations_per_second)136 void DisplayResults(const std::string& story_name,
137 float iterations_per_second) {
138 auto reporter = SetUpReporter(story_name);
139 reporter.AddResult(kMetricThroughput, iterations_per_second);
140 reporter.AddResult(kMetricTimePerAllocation,
141 static_cast<size_t>(1e9 / iterations_per_second));
142 }
143
144 class MemoryAllocationPerfNode {
145 public:
GetNext() const146 MemoryAllocationPerfNode* GetNext() const { return next_; }
SetNext(MemoryAllocationPerfNode * p)147 void SetNext(MemoryAllocationPerfNode* p) { next_ = p; }
FreeAll(MemoryAllocationPerfNode * first,Allocator * alloc)148 static void FreeAll(MemoryAllocationPerfNode* first, Allocator* alloc) {
149 MemoryAllocationPerfNode* cur = first;
150 while (cur != nullptr) {
151 MemoryAllocationPerfNode* next = cur->GetNext();
152 alloc->Free(cur);
153 cur = next;
154 }
155 }
156
157 private:
158 MemoryAllocationPerfNode* next_ = nullptr;
159 };
160
161 #if !defined(MEMORY_CONSTRAINED)
SingleBucket(Allocator * allocator)162 float SingleBucket(Allocator* allocator) {
163 auto* first =
164 reinterpret_cast<MemoryAllocationPerfNode*>(allocator->Alloc(kAllocSize));
165 size_t allocated_memory = kAllocSize;
166
167 LapTimer timer(kWarmupRuns, kTimeLimit, kTimeCheckInterval);
168 MemoryAllocationPerfNode* cur = first;
169 do {
170 auto* next = reinterpret_cast<MemoryAllocationPerfNode*>(
171 allocator->Alloc(kAllocSize));
172 CHECK_NE(next, nullptr);
173 cur->SetNext(next);
174 cur = next;
175 timer.NextLap();
176 allocated_memory += kAllocSize;
177 // With multiple threads, can get OOM otherwise.
178 if (allocated_memory > 200e6) {
179 cur->SetNext(nullptr);
180 MemoryAllocationPerfNode::FreeAll(first->GetNext(), allocator);
181 cur = first;
182 allocated_memory = kAllocSize;
183 }
184 } while (!timer.HasTimeLimitExpired());
185
186 // next_ = nullptr only works if the class constructor is called (it's not
187 // called in this case because then we can allocate arbitrary-length
188 // payloads.)
189 cur->SetNext(nullptr);
190 MemoryAllocationPerfNode::FreeAll(first, allocator);
191
192 return timer.LapsPerSecond();
193 }
194 #endif // defined(MEMORY_CONSTRAINED)
195
SingleBucketWithFree(Allocator * allocator)196 float SingleBucketWithFree(Allocator* allocator) {
197 // Allocate an initial element to make sure the bucket stays set up.
198 void* elem = allocator->Alloc(kAllocSize);
199
200 LapTimer timer(kWarmupRuns, kTimeLimit, kTimeCheckInterval);
201 do {
202 void* cur = allocator->Alloc(kAllocSize);
203 CHECK_NE(cur, nullptr);
204 allocator->Free(cur);
205 timer.NextLap();
206 } while (!timer.HasTimeLimitExpired());
207
208 allocator->Free(elem);
209 return timer.LapsPerSecond();
210 }
211
212 #if !defined(MEMORY_CONSTRAINED)
MultiBucket(Allocator * allocator)213 float MultiBucket(Allocator* allocator) {
214 auto* first =
215 reinterpret_cast<MemoryAllocationPerfNode*>(allocator->Alloc(kAllocSize));
216 MemoryAllocationPerfNode* cur = first;
217 size_t allocated_memory = kAllocSize;
218
219 LapTimer timer(kWarmupRuns, kTimeLimit, kTimeCheckInterval);
220 do {
221 for (int i = 0; i < kMultiBucketRounds; i++) {
222 size_t size = kMultiBucketMinimumSize + (i * kMultiBucketIncrement);
223 auto* next =
224 reinterpret_cast<MemoryAllocationPerfNode*>(allocator->Alloc(size));
225 CHECK_NE(next, nullptr);
226 cur->SetNext(next);
227 cur = next;
228 allocated_memory += size;
229 }
230
231 // Can OOM with multiple threads.
232 if (allocated_memory > 100e6) {
233 cur->SetNext(nullptr);
234 MemoryAllocationPerfNode::FreeAll(first->GetNext(), allocator);
235 cur = first;
236 allocated_memory = kAllocSize;
237 }
238
239 timer.NextLap();
240 } while (!timer.HasTimeLimitExpired());
241
242 cur->SetNext(nullptr);
243 MemoryAllocationPerfNode::FreeAll(first, allocator);
244
245 return timer.LapsPerSecond() * kMultiBucketRounds;
246 }
247 #endif // defined(MEMORY_CONSTRAINED)
248
MultiBucketWithFree(Allocator * allocator)249 float MultiBucketWithFree(Allocator* allocator) {
250 std::vector<void*> elems;
251 elems.reserve(kMultiBucketRounds);
252 // Do an initial round of allocation to make sure that the buckets stay in
253 // use (and aren't accidentally released back to the OS).
254 for (int i = 0; i < kMultiBucketRounds; i++) {
255 void* cur =
256 allocator->Alloc(kMultiBucketMinimumSize + (i * kMultiBucketIncrement));
257 CHECK_NE(cur, nullptr);
258 elems.push_back(cur);
259 }
260
261 LapTimer timer(kWarmupRuns, kTimeLimit, kTimeCheckInterval);
262 do {
263 for (int i = 0; i < kMultiBucketRounds; i++) {
264 void* cur = allocator->Alloc(kMultiBucketMinimumSize +
265 (i * kMultiBucketIncrement));
266 CHECK_NE(cur, nullptr);
267 allocator->Free(cur);
268 }
269 timer.NextLap();
270 } while (!timer.HasTimeLimitExpired());
271
272 for (void* ptr : elems) {
273 allocator->Free(ptr);
274 }
275
276 return timer.LapsPerSecond() * kMultiBucketRounds;
277 }
278
CreateAllocator(AllocatorType type)279 std::unique_ptr<Allocator> CreateAllocator(AllocatorType type) {
280 switch (type) {
281 case AllocatorType::kSystem:
282 return std::make_unique<SystemAllocator>();
283 case AllocatorType::kPartitionAlloc:
284 return std::make_unique<PartitionAllocator>();
285 case AllocatorType::kPartitionAllocWithThreadCache:
286 return std::make_unique<PartitionAllocatorWithThreadCache>();
287 }
288 }
289
LogResults(int thread_count,AllocatorType alloc_type,uint64_t total_laps_per_second,uint64_t min_laps_per_second)290 void LogResults(int thread_count,
291 AllocatorType alloc_type,
292 uint64_t total_laps_per_second,
293 uint64_t min_laps_per_second) {
294 LOG(INFO) << "RESULTSCSV: " << thread_count << ","
295 << static_cast<int>(alloc_type) << "," << total_laps_per_second
296 << "," << min_laps_per_second;
297 }
298
RunTest(int thread_count,AllocatorType alloc_type,float (* test_fn)(Allocator *),const char * story_base_name)299 void RunTest(int thread_count,
300 AllocatorType alloc_type,
301 float (*test_fn)(Allocator*),
302 const char* story_base_name) {
303 auto alloc = CreateAllocator(alloc_type);
304
305 std::vector<std::unique_ptr<TestLoopThread>> threads;
306 for (int i = 0; i < thread_count; ++i) {
307 threads.push_back(std::make_unique<TestLoopThread>(
308 BindOnce(test_fn, Unretained(alloc.get()))));
309 }
310
311 uint64_t total_laps_per_second = 0;
312 uint64_t min_laps_per_second = std::numeric_limits<uint64_t>::max();
313 for (int i = 0; i < thread_count; ++i) {
314 uint64_t laps_per_second = threads[i]->Run();
315 min_laps_per_second = std::min(laps_per_second, min_laps_per_second);
316 total_laps_per_second += laps_per_second;
317 }
318
319 char const* alloc_type_str;
320 switch (alloc_type) {
321 case AllocatorType::kSystem:
322 alloc_type_str = "System";
323 break;
324 case AllocatorType::kPartitionAlloc:
325 alloc_type_str = "PartitionAlloc";
326 break;
327 case AllocatorType::kPartitionAllocWithThreadCache:
328 alloc_type_str = "PartitionAllocWithThreadCache";
329 break;
330 }
331
332 std::string name =
333 base::StringPrintf("%s%s_%s_%d", kMetricPrefixMemoryAllocation,
334 story_base_name, alloc_type_str, thread_count);
335
336 DisplayResults(name + "_total", total_laps_per_second);
337 DisplayResults(name + "_worst", min_laps_per_second);
338 LogResults(thread_count, alloc_type, total_laps_per_second,
339 min_laps_per_second);
340 }
341
342 class MemoryAllocationPerfTest
343 : public testing::TestWithParam<std::tuple<int, AllocatorType>> {};
344
345 // Only one partition with a thread cache: cannot use the thread cache when
346 // PartitionAlloc is malloc().
347 INSTANTIATE_TEST_SUITE_P(
348 ,
349 MemoryAllocationPerfTest,
350 ::testing::Combine(
351 ::testing::Values(1, 2, 3, 4),
352 ::testing::Values(AllocatorType::kSystem,
353 AllocatorType::kPartitionAlloc
354 #if !BUILDFLAG(USE_PARTITION_ALLOC_AS_MALLOC)
355 ,
356 AllocatorType::kPartitionAllocWithThreadCache
357 #endif
358 )));
359
360 // This test (and the other one below) allocates a large amount of memory, which
361 // can cause issues on Android.
362 #if !defined(MEMORY_CONSTRAINED)
TEST_P(MemoryAllocationPerfTest,SingleBucket)363 TEST_P(MemoryAllocationPerfTest, SingleBucket) {
364 auto params = GetParam();
365 RunTest(std::get<0>(params), std::get<1>(params), SingleBucket,
366 "SingleBucket");
367 }
368 #endif // defined(MEMORY_CONSTRAINED)
369
TEST_P(MemoryAllocationPerfTest,SingleBucketWithFree)370 TEST_P(MemoryAllocationPerfTest, SingleBucketWithFree) {
371 auto params = GetParam();
372 RunTest(std::get<0>(params), std::get<1>(params), SingleBucketWithFree,
373 "SingleBucketWithFree");
374 }
375
376 #if !defined(MEMORY_CONSTRAINED)
TEST_P(MemoryAllocationPerfTest,MultiBucket)377 TEST_P(MemoryAllocationPerfTest, MultiBucket) {
378 auto params = GetParam();
379 RunTest(std::get<0>(params), std::get<1>(params), MultiBucket, "MultiBucket");
380 }
381 #endif // defined(MEMORY_CONSTRAINED)
382
TEST_P(MemoryAllocationPerfTest,MultiBucketWithFree)383 TEST_P(MemoryAllocationPerfTest, MultiBucketWithFree) {
384 auto params = GetParam();
385 RunTest(std::get<0>(params), std::get<1>(params), MultiBucketWithFree,
386 "MultiBucketWithFree");
387 }
388
389 } // namespace
390
391 } // namespace base
392