1 // Copyright 2019 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <algorithm>
6 #include <atomic>
7 #include <limits>
8 #include <memory>
9 #include <vector>
10 
11 #include "base/allocator/partition_allocator/partition_alloc.h"
12 #include "base/allocator/partition_allocator/partition_alloc_check.h"
13 #include "base/allocator/partition_allocator/thread_cache.h"
14 #include "base/bind.h"
15 #include "base/callback.h"
16 #include "base/logging.h"
17 #include "base/strings/stringprintf.h"
18 #include "base/threading/platform_thread.h"
19 #include "base/time/time.h"
20 #include "base/timer/lap_timer.h"
21 #include "build/build_config.h"
22 #include "testing/gtest/include/gtest/gtest.h"
23 #include "testing/perf/perf_result_reporter.h"
24 
25 #if defined(OS_ANDROID) || defined(ARCH_CPU_32_BITS)
26 // Some tests allocate many GB of memory, which can cause issues on Android and
27 // address-space exhaustion for any 32-bit process.
28 #define MEMORY_CONSTRAINED
29 #endif
30 
31 namespace base {
32 namespace {
33 
34 // Change kTimeLimit to something higher if you need more time to capture a
35 // trace.
36 constexpr base::TimeDelta kTimeLimit = base::TimeDelta::FromSeconds(2);
37 constexpr int kWarmupRuns = 10000;
38 constexpr int kTimeCheckInterval = 100000;
39 constexpr size_t kAllocSize = 40;
40 
41 // Size constants are mostly arbitrary, but try to simulate something like CSS
42 // parsing which consists of lots of relatively small objects.
43 constexpr int kMultiBucketMinimumSize = 24;
44 constexpr int kMultiBucketIncrement = 13;
45 // Final size is 24 + (13 * 22) = 310 bytes.
46 constexpr int kMultiBucketRounds = 22;
47 
48 constexpr char kMetricPrefixMemoryAllocation[] = "MemoryAllocation.";
49 constexpr char kMetricThroughput[] = "throughput";
50 constexpr char kMetricTimePerAllocation[] = "time_per_allocation";
51 
SetUpReporter(const std::string & story_name)52 perf_test::PerfResultReporter SetUpReporter(const std::string& story_name) {
53   perf_test::PerfResultReporter reporter(kMetricPrefixMemoryAllocation,
54                                          story_name);
55   reporter.RegisterImportantMetric(kMetricThroughput, "runs/s");
56   reporter.RegisterImportantMetric(kMetricTimePerAllocation, "ns");
57   return reporter;
58 }
59 
60 enum class AllocatorType {
61   kSystem,
62   kPartitionAlloc,
63   kPartitionAllocWithThreadCache
64 };
65 
66 class Allocator {
67  public:
68   Allocator() = default;
69   virtual ~Allocator() = default;
70   virtual void* Alloc(size_t size) = 0;
71   virtual void Free(void* data) = 0;
72 };
73 
74 class SystemAllocator : public Allocator {
75  public:
76   SystemAllocator() = default;
77   ~SystemAllocator() override = default;
Alloc(size_t size)78   void* Alloc(size_t size) override { return malloc(size); }
Free(void * data)79   void Free(void* data) override { free(data); }
80 };
81 
82 class PartitionAllocator : public Allocator {
83  public:
84   PartitionAllocator() = default;
85   ~PartitionAllocator() override = default;
86 
Alloc(size_t size)87   void* Alloc(size_t size) override {
88     return alloc_.AllocFlagsNoHooks(0, size);
89   }
Free(void * data)90   void Free(void* data) override { ThreadSafePartitionRoot::FreeNoHooks(data); }
91 
92  private:
93   ThreadSafePartitionRoot alloc_{{PartitionOptions::Alignment::kRegular,
94                                   PartitionOptions::ThreadCache::kDisabled}};
95 };
96 
97 // Only one partition with a thread cache.
98 ThreadSafePartitionRoot* g_partition_root = nullptr;
99 class PartitionAllocatorWithThreadCache : public Allocator {
100  public:
PartitionAllocatorWithThreadCache()101   PartitionAllocatorWithThreadCache() {
102     if (!g_partition_root) {
103       g_partition_root = new ThreadSafePartitionRoot(
104           {PartitionOptions::Alignment::kRegular,
105            PartitionOptions::ThreadCache::kEnabled});
106     }
107     internal::ThreadCacheRegistry::Instance().PurgeAll();
108   }
109   ~PartitionAllocatorWithThreadCache() override = default;
110 
Alloc(size_t size)111   void* Alloc(size_t size) override {
112     return g_partition_root->AllocFlagsNoHooks(0, size);
113   }
Free(void * data)114   void Free(void* data) override { ThreadSafePartitionRoot::FreeNoHooks(data); }
115 };
116 
117 class TestLoopThread : public PlatformThread::Delegate {
118  public:
TestLoopThread(OnceCallback<float ()> test_fn)119   explicit TestLoopThread(OnceCallback<float()> test_fn)
120       : test_fn_(std::move(test_fn)) {
121     PA_CHECK(PlatformThread::Create(0, this, &thread_handle_));
122   }
123 
Run()124   float Run() {
125     PlatformThread::Join(thread_handle_);
126     return laps_per_second_;
127   }
128 
ThreadMain()129   void ThreadMain() override { laps_per_second_ = std::move(test_fn_).Run(); }
130 
131   OnceCallback<float()> test_fn_;
132   PlatformThreadHandle thread_handle_;
133   std::atomic<float> laps_per_second_;
134 };
135 
DisplayResults(const std::string & story_name,float iterations_per_second)136 void DisplayResults(const std::string& story_name,
137                     float iterations_per_second) {
138   auto reporter = SetUpReporter(story_name);
139   reporter.AddResult(kMetricThroughput, iterations_per_second);
140   reporter.AddResult(kMetricTimePerAllocation,
141                      static_cast<size_t>(1e9 / iterations_per_second));
142 }
143 
144 class MemoryAllocationPerfNode {
145  public:
GetNext() const146   MemoryAllocationPerfNode* GetNext() const { return next_; }
SetNext(MemoryAllocationPerfNode * p)147   void SetNext(MemoryAllocationPerfNode* p) { next_ = p; }
FreeAll(MemoryAllocationPerfNode * first,Allocator * alloc)148   static void FreeAll(MemoryAllocationPerfNode* first, Allocator* alloc) {
149     MemoryAllocationPerfNode* cur = first;
150     while (cur != nullptr) {
151       MemoryAllocationPerfNode* next = cur->GetNext();
152       alloc->Free(cur);
153       cur = next;
154     }
155   }
156 
157  private:
158   MemoryAllocationPerfNode* next_ = nullptr;
159 };
160 
161 #if !defined(MEMORY_CONSTRAINED)
SingleBucket(Allocator * allocator)162 float SingleBucket(Allocator* allocator) {
163   auto* first =
164       reinterpret_cast<MemoryAllocationPerfNode*>(allocator->Alloc(kAllocSize));
165   size_t allocated_memory = kAllocSize;
166 
167   LapTimer timer(kWarmupRuns, kTimeLimit, kTimeCheckInterval);
168   MemoryAllocationPerfNode* cur = first;
169   do {
170     auto* next = reinterpret_cast<MemoryAllocationPerfNode*>(
171         allocator->Alloc(kAllocSize));
172     CHECK_NE(next, nullptr);
173     cur->SetNext(next);
174     cur = next;
175     timer.NextLap();
176     allocated_memory += kAllocSize;
177     // With multiple threads, can get OOM otherwise.
178     if (allocated_memory > 200e6) {
179       cur->SetNext(nullptr);
180       MemoryAllocationPerfNode::FreeAll(first->GetNext(), allocator);
181       cur = first;
182       allocated_memory = kAllocSize;
183     }
184   } while (!timer.HasTimeLimitExpired());
185 
186   // next_ = nullptr only works if the class constructor is called (it's not
187   // called in this case because then we can allocate arbitrary-length
188   // payloads.)
189   cur->SetNext(nullptr);
190   MemoryAllocationPerfNode::FreeAll(first, allocator);
191 
192   return timer.LapsPerSecond();
193 }
194 #endif  // defined(MEMORY_CONSTRAINED)
195 
SingleBucketWithFree(Allocator * allocator)196 float SingleBucketWithFree(Allocator* allocator) {
197   // Allocate an initial element to make sure the bucket stays set up.
198   void* elem = allocator->Alloc(kAllocSize);
199 
200   LapTimer timer(kWarmupRuns, kTimeLimit, kTimeCheckInterval);
201   do {
202     void* cur = allocator->Alloc(kAllocSize);
203     CHECK_NE(cur, nullptr);
204     allocator->Free(cur);
205     timer.NextLap();
206   } while (!timer.HasTimeLimitExpired());
207 
208   allocator->Free(elem);
209   return timer.LapsPerSecond();
210 }
211 
212 #if !defined(MEMORY_CONSTRAINED)
MultiBucket(Allocator * allocator)213 float MultiBucket(Allocator* allocator) {
214   auto* first =
215       reinterpret_cast<MemoryAllocationPerfNode*>(allocator->Alloc(kAllocSize));
216   MemoryAllocationPerfNode* cur = first;
217   size_t allocated_memory = kAllocSize;
218 
219   LapTimer timer(kWarmupRuns, kTimeLimit, kTimeCheckInterval);
220   do {
221     for (int i = 0; i < kMultiBucketRounds; i++) {
222       size_t size = kMultiBucketMinimumSize + (i * kMultiBucketIncrement);
223       auto* next =
224           reinterpret_cast<MemoryAllocationPerfNode*>(allocator->Alloc(size));
225       CHECK_NE(next, nullptr);
226       cur->SetNext(next);
227       cur = next;
228       allocated_memory += size;
229     }
230 
231     // Can OOM with multiple threads.
232     if (allocated_memory > 100e6) {
233       cur->SetNext(nullptr);
234       MemoryAllocationPerfNode::FreeAll(first->GetNext(), allocator);
235       cur = first;
236       allocated_memory = kAllocSize;
237     }
238 
239     timer.NextLap();
240   } while (!timer.HasTimeLimitExpired());
241 
242   cur->SetNext(nullptr);
243   MemoryAllocationPerfNode::FreeAll(first, allocator);
244 
245   return timer.LapsPerSecond() * kMultiBucketRounds;
246 }
247 #endif  // defined(MEMORY_CONSTRAINED)
248 
MultiBucketWithFree(Allocator * allocator)249 float MultiBucketWithFree(Allocator* allocator) {
250   std::vector<void*> elems;
251   elems.reserve(kMultiBucketRounds);
252   // Do an initial round of allocation to make sure that the buckets stay in
253   // use (and aren't accidentally released back to the OS).
254   for (int i = 0; i < kMultiBucketRounds; i++) {
255     void* cur =
256         allocator->Alloc(kMultiBucketMinimumSize + (i * kMultiBucketIncrement));
257     CHECK_NE(cur, nullptr);
258     elems.push_back(cur);
259   }
260 
261   LapTimer timer(kWarmupRuns, kTimeLimit, kTimeCheckInterval);
262   do {
263     for (int i = 0; i < kMultiBucketRounds; i++) {
264       void* cur = allocator->Alloc(kMultiBucketMinimumSize +
265                                    (i * kMultiBucketIncrement));
266       CHECK_NE(cur, nullptr);
267       allocator->Free(cur);
268     }
269     timer.NextLap();
270   } while (!timer.HasTimeLimitExpired());
271 
272   for (void* ptr : elems) {
273     allocator->Free(ptr);
274   }
275 
276   return timer.LapsPerSecond() * kMultiBucketRounds;
277 }
278 
CreateAllocator(AllocatorType type)279 std::unique_ptr<Allocator> CreateAllocator(AllocatorType type) {
280   switch (type) {
281     case AllocatorType::kSystem:
282       return std::make_unique<SystemAllocator>();
283     case AllocatorType::kPartitionAlloc:
284       return std::make_unique<PartitionAllocator>();
285     case AllocatorType::kPartitionAllocWithThreadCache:
286       return std::make_unique<PartitionAllocatorWithThreadCache>();
287   }
288 }
289 
LogResults(int thread_count,AllocatorType alloc_type,uint64_t total_laps_per_second,uint64_t min_laps_per_second)290 void LogResults(int thread_count,
291                 AllocatorType alloc_type,
292                 uint64_t total_laps_per_second,
293                 uint64_t min_laps_per_second) {
294   LOG(INFO) << "RESULTSCSV: " << thread_count << ","
295             << static_cast<int>(alloc_type) << "," << total_laps_per_second
296             << "," << min_laps_per_second;
297 }
298 
RunTest(int thread_count,AllocatorType alloc_type,float (* test_fn)(Allocator *),const char * story_base_name)299 void RunTest(int thread_count,
300              AllocatorType alloc_type,
301              float (*test_fn)(Allocator*),
302              const char* story_base_name) {
303   auto alloc = CreateAllocator(alloc_type);
304 
305   std::vector<std::unique_ptr<TestLoopThread>> threads;
306   for (int i = 0; i < thread_count; ++i) {
307     threads.push_back(std::make_unique<TestLoopThread>(
308         BindOnce(test_fn, Unretained(alloc.get()))));
309   }
310 
311   uint64_t total_laps_per_second = 0;
312   uint64_t min_laps_per_second = std::numeric_limits<uint64_t>::max();
313   for (int i = 0; i < thread_count; ++i) {
314     uint64_t laps_per_second = threads[i]->Run();
315     min_laps_per_second = std::min(laps_per_second, min_laps_per_second);
316     total_laps_per_second += laps_per_second;
317   }
318 
319   char const* alloc_type_str;
320   switch (alloc_type) {
321     case AllocatorType::kSystem:
322       alloc_type_str = "System";
323       break;
324     case AllocatorType::kPartitionAlloc:
325       alloc_type_str = "PartitionAlloc";
326       break;
327     case AllocatorType::kPartitionAllocWithThreadCache:
328       alloc_type_str = "PartitionAllocWithThreadCache";
329       break;
330   }
331 
332   std::string name =
333       base::StringPrintf("%s%s_%s_%d", kMetricPrefixMemoryAllocation,
334                          story_base_name, alloc_type_str, thread_count);
335 
336   DisplayResults(name + "_total", total_laps_per_second);
337   DisplayResults(name + "_worst", min_laps_per_second);
338   LogResults(thread_count, alloc_type, total_laps_per_second,
339              min_laps_per_second);
340 }
341 
342 class MemoryAllocationPerfTest
343     : public testing::TestWithParam<std::tuple<int, AllocatorType>> {};
344 
345 // Only one partition with a thread cache: cannot use the thread cache when
346 // PartitionAlloc is malloc().
347 INSTANTIATE_TEST_SUITE_P(
348     ,
349     MemoryAllocationPerfTest,
350     ::testing::Combine(
351         ::testing::Values(1, 2, 3, 4),
352         ::testing::Values(AllocatorType::kSystem,
353                           AllocatorType::kPartitionAlloc
354 #if !BUILDFLAG(USE_PARTITION_ALLOC_AS_MALLOC)
355                           ,
356                           AllocatorType::kPartitionAllocWithThreadCache
357 #endif
358                           )));
359 
360 // This test (and the other one below) allocates a large amount of memory, which
361 // can cause issues on Android.
362 #if !defined(MEMORY_CONSTRAINED)
TEST_P(MemoryAllocationPerfTest,SingleBucket)363 TEST_P(MemoryAllocationPerfTest, SingleBucket) {
364   auto params = GetParam();
365   RunTest(std::get<0>(params), std::get<1>(params), SingleBucket,
366           "SingleBucket");
367 }
368 #endif  // defined(MEMORY_CONSTRAINED)
369 
TEST_P(MemoryAllocationPerfTest,SingleBucketWithFree)370 TEST_P(MemoryAllocationPerfTest, SingleBucketWithFree) {
371   auto params = GetParam();
372   RunTest(std::get<0>(params), std::get<1>(params), SingleBucketWithFree,
373           "SingleBucketWithFree");
374 }
375 
376 #if !defined(MEMORY_CONSTRAINED)
TEST_P(MemoryAllocationPerfTest,MultiBucket)377 TEST_P(MemoryAllocationPerfTest, MultiBucket) {
378   auto params = GetParam();
379   RunTest(std::get<0>(params), std::get<1>(params), MultiBucket, "MultiBucket");
380 }
381 #endif  // defined(MEMORY_CONSTRAINED)
382 
TEST_P(MemoryAllocationPerfTest,MultiBucketWithFree)383 TEST_P(MemoryAllocationPerfTest, MultiBucketWithFree) {
384   auto params = GetParam();
385   RunTest(std::get<0>(params), std::get<1>(params), MultiBucketWithFree,
386           "MultiBucketWithFree");
387 }
388 
389 }  // namespace
390 
391 }  // namespace base
392