1 /* This Source Code Form is subject to the terms of the Mozilla Public
2  * License, v. 2.0. If a copy of the MPL was not distributed with this
3  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4 
5 // Overview
6 // --------
7 // This file measures the speed of various implementations of C++ and Rust
8 // collections (hash tables, etc.) used within the codebase. There are a small
9 // number of benchmarks for each collection type; each benchmark tests certain
10 // operations (insertion, lookup, iteration, etc.) More benchmarks could easily
11 // be envisioned, but this small number is enough to characterize the major
12 // differences between implementations, while keeping the file size and
13 // complexity low.
14 //
15 // Details
16 // -------
17 // The file uses `MOZ_GTEST_BENCH_F` so that results are integrated into
18 // PerfHerder. It is also designed so that individual test benchmarks can be
19 // run under a profiler.
20 //
21 // The C++ code uses `MOZ_RELEASE_ASSERT` extensively to check values and
22 // ensure operations aren't optimized away by the compiler. The Rust code uses
23 // `assert!()`. These should be roughly equivalent, but aren't guaranteed to be
24 // the same. As a result, the intra-C++ comparisons should be reliable, and the
25 // intra-Rust comparisons should be reliable, but the C++ vs. Rust comparisons
26 // may be less reliable.
27 //
28 // Note that the Rust implementations run very slowly without --enable-release.
29 //
30 // Profiling
31 // ---------
32 // If you want to measure a particular implementation under a profiler such as
33 // Callgrind, do something like this:
34 //
35 //   MOZ_RUN_GTEST=1 GTEST_FILTER='*BenchCollections*$IMPL*'
36 //       valgrind --tool=callgrind --callgrind-out-file=clgout
37 //       $OBJDIR/dist/bin/firefox -unittest
38 //   callgrind_annotate --auto=yes clgout > clgann
39 //
40 // where $IMPL is part of an implementation name in a test (e.g. "PLDHash",
41 // "MozHash") and $OBJDIR is an objdir containing a --enable-release build.
42 //
43 // Note that multiple processes are spawned, so `clgout` gets overwritten
44 // multiple times, but the last process to write its profiling data to file is
45 // the one of interest. (Alternatively, use --callgrind-out-file=clgout.%p to
46 // get separate output files for each process, with a PID suffix.)
47 
48 #include "gtest/gtest.h"
49 #include "gtest/MozGTestBench.h"  // For MOZ_GTEST_BENCH
50 #include "mozilla/AllocPolicy.h"
51 #include "mozilla/ArrayUtils.h"
52 #include "mozilla/HashFunctions.h"
53 #include "mozilla/HashTable.h"
54 #include "mozilla/StaticMutex.h"
55 #include "mozilla/TimeStamp.h"
56 #include "PLDHashTable.h"
57 #include <unordered_set>
58 
59 using namespace mozilla;
60 
61 // This function gives a pseudo-random sequence with the following properties:
62 // - Deterministic and platform-independent.
63 // - No duplicates in the first VALS_LEN results, which is useful for ensuring
64 //   the tables get to a particular size, and also for guaranteeing lookups
65 //   that fail.
MyRand()66 static uintptr_t MyRand() {
67   static uintptr_t s = 0;
68   s = s * 1103515245 + 12345;
69   return s;
70 }
71 
72 // Keep this in sync with Params in bench.rs.
73 struct Params {
74   const char* mConfigName;
75   size_t mNumInserts;            // Insert this many unique keys
76   size_t mNumSuccessfulLookups;  // Does mNumInserts lookups each time
77   size_t mNumFailingLookups;     // Does mNumInserts lookups each time
78   size_t mNumIterations;         // Iterates the full table each time
79   bool mRemoveInserts;           // Remove all entries at end?
80 };
81 
82 // We don't use std::unordered_{set,map}, but it's an interesting thing to
83 // benchmark against.
84 //
85 // Keep this in sync with all the other Bench_*() functions.
Bench_Cpp_unordered_set(const Params * aParams,void ** aVals,size_t aLen)86 static void Bench_Cpp_unordered_set(const Params* aParams, void** aVals,
87                                     size_t aLen) {
88   std::unordered_set<void*> hs;
89 
90   for (size_t j = 0; j < aParams->mNumInserts; j++) {
91     hs.insert(aVals[j]);
92   }
93 
94   for (size_t i = 0; i < aParams->mNumSuccessfulLookups; i++) {
95     for (size_t j = 0; j < aParams->mNumInserts; j++) {
96       MOZ_RELEASE_ASSERT(hs.find(aVals[j]) != hs.end());
97     }
98   }
99 
100   for (size_t i = 0; i < aParams->mNumFailingLookups; i++) {
101     for (size_t j = aParams->mNumInserts; j < aParams->mNumInserts * 2; j++) {
102       MOZ_RELEASE_ASSERT(hs.find(aVals[j]) == hs.end());
103     }
104   }
105 
106   for (size_t i = 0; i < aParams->mNumIterations; i++) {
107     size_t n = 0;
108     for (const auto& elem : hs) {
109       (void)elem;
110       n++;
111     }
112     MOZ_RELEASE_ASSERT(aParams->mNumInserts == n);
113     MOZ_RELEASE_ASSERT(hs.size() == n);
114   }
115 
116   if (aParams->mRemoveInserts) {
117     for (size_t j = 0; j < aParams->mNumInserts; j++) {
118       MOZ_RELEASE_ASSERT(hs.erase(aVals[j]) == 1);
119     }
120     MOZ_RELEASE_ASSERT(hs.size() == 0);
121   } else {
122     MOZ_RELEASE_ASSERT(hs.size() == aParams->mNumInserts);
123   }
124 }
125 
126 // Keep this in sync with all the other Bench_*() functions.
Bench_Cpp_PLDHashTable(const Params * aParams,void ** aVals,size_t aLen)127 static void Bench_Cpp_PLDHashTable(const Params* aParams, void** aVals,
128                                    size_t aLen) {
129   PLDHashTable hs(PLDHashTable::StubOps(), sizeof(PLDHashEntryStub));
130 
131   for (size_t j = 0; j < aParams->mNumInserts; j++) {
132     auto entry = static_cast<PLDHashEntryStub*>(hs.Add(aVals[j]));
133     MOZ_RELEASE_ASSERT(!entry->key);
134     entry->key = aVals[j];
135   }
136 
137   for (size_t i = 0; i < aParams->mNumSuccessfulLookups; i++) {
138     for (size_t j = 0; j < aParams->mNumInserts; j++) {
139       MOZ_RELEASE_ASSERT(hs.Search(aVals[j]));
140     }
141   }
142 
143   for (size_t i = 0; i < aParams->mNumFailingLookups; i++) {
144     for (size_t j = aParams->mNumInserts; j < aParams->mNumInserts * 2; j++) {
145       MOZ_RELEASE_ASSERT(!hs.Search(aVals[j]));
146     }
147   }
148 
149   for (size_t i = 0; i < aParams->mNumIterations; i++) {
150     size_t n = 0;
151     for (auto iter = hs.Iter(); !iter.Done(); iter.Next()) {
152       n++;
153     }
154     MOZ_RELEASE_ASSERT(aParams->mNumInserts == n);
155     MOZ_RELEASE_ASSERT(hs.EntryCount() == n);
156   }
157 
158   if (aParams->mRemoveInserts) {
159     for (size_t j = 0; j < aParams->mNumInserts; j++) {
160       hs.Remove(aVals[j]);
161     }
162     MOZ_RELEASE_ASSERT(hs.EntryCount() == 0);
163   } else {
164     MOZ_RELEASE_ASSERT(hs.EntryCount() == aParams->mNumInserts);
165   }
166 }
167 
168 // Keep this in sync with all the other Bench_*() functions.
Bench_Cpp_MozHashSet(const Params * aParams,void ** aVals,size_t aLen)169 static void Bench_Cpp_MozHashSet(const Params* aParams, void** aVals,
170                                  size_t aLen) {
171   mozilla::HashSet<void*, mozilla::DefaultHasher<void*>, MallocAllocPolicy> hs;
172 
173   for (size_t j = 0; j < aParams->mNumInserts; j++) {
174     MOZ_RELEASE_ASSERT(hs.put(aVals[j]));
175   }
176 
177   for (size_t i = 0; i < aParams->mNumSuccessfulLookups; i++) {
178     for (size_t j = 0; j < aParams->mNumInserts; j++) {
179       MOZ_RELEASE_ASSERT(hs.has(aVals[j]));
180     }
181   }
182 
183   for (size_t i = 0; i < aParams->mNumFailingLookups; i++) {
184     for (size_t j = aParams->mNumInserts; j < aParams->mNumInserts * 2; j++) {
185       MOZ_RELEASE_ASSERT(!hs.has(aVals[j]));
186     }
187   }
188 
189   for (size_t i = 0; i < aParams->mNumIterations; i++) {
190     size_t n = 0;
191     for (auto iter = hs.iter(); !iter.done(); iter.next()) {
192       n++;
193     }
194     MOZ_RELEASE_ASSERT(aParams->mNumInserts == n);
195     MOZ_RELEASE_ASSERT(hs.count() == n);
196   }
197 
198   if (aParams->mRemoveInserts) {
199     for (size_t j = 0; j < aParams->mNumInserts; j++) {
200       hs.remove(aVals[j]);
201     }
202     MOZ_RELEASE_ASSERT(hs.count() == 0);
203   } else {
204     MOZ_RELEASE_ASSERT(hs.count() == aParams->mNumInserts);
205   }
206 }
207 
208 extern "C" {
209 void Bench_Rust_HashSet(const Params* params, void** aVals, size_t aLen);
210 void Bench_Rust_FnvHashSet(const Params* params, void** aVals, size_t aLen);
211 void Bench_Rust_FxHashSet(const Params* params, void** aVals, size_t aLen);
212 }
213 
214 static const size_t VALS_LEN = 131072;
215 
216 // Each benchmark measures a different aspect of performance.
217 // Note that no "Inserts" value can exceed VALS_LEN.
218 // Also, if any failing lookups are done, Inserts must be <= VALS_LEN/2.
219 const Params gParamsList[] = {
220     // clang-format off
221   //                           Successful Failing              Remove
222   //                 Inserts   lookups    lookups  Iterations  inserts
223   { "succ_lookups",  1024,     5000,      0,       0,          false },
224   { "fail_lookups",  1024,     0,         5000,    0,          false },
225   { "insert_remove", VALS_LEN, 0,         0,       0,          true  },
226   { "iterate",       1024,     0,         0,       5000,       false },
227     // clang-format on
228 };
229 
230 class BenchCollections : public ::testing::Test {
231  protected:
SetUp()232   void SetUp() override {
233     StaticMutexAutoLock lock(sValsMutex);
234 
235     if (!sVals) {
236       sVals = (void**)malloc(VALS_LEN * sizeof(void*));
237       for (size_t i = 0; i < VALS_LEN; i++) {
238         // This leaves the high 32 bits zero on 64-bit platforms, but that
239         // should still be enough randomness to get typical behaviour.
240         sVals[i] = reinterpret_cast<void*>(uintptr_t(MyRand()));
241       }
242     }
243 
244     printf("\n");
245     for (size_t i = 0; i < ArrayLength(gParamsList); i++) {
246       const Params* params = &gParamsList[i];
247       printf("%14s", params->mConfigName);
248     }
249     printf("%14s\n", "total");
250   }
251 
252  public:
BenchImpl(void (* aBench)(const Params *,void **,size_t))253   void BenchImpl(void (*aBench)(const Params*, void**, size_t)) {
254     StaticMutexAutoLock lock(sValsMutex);
255 
256     double total = 0;
257     for (size_t i = 0; i < ArrayLength(gParamsList); i++) {
258       const Params* params = &gParamsList[i];
259       TimeStamp t1 = TimeStamp::Now();
260       aBench(params, sVals, VALS_LEN);
261       TimeStamp t2 = TimeStamp::Now();
262       double t = (t2 - t1).ToMilliseconds();
263       printf("%11.1f ms", t);
264       total += t;
265     }
266     printf("%11.1f ms\n", total);
267   }
268 
269  private:
270   // Random values used in the benchmarks.
271   static void** sVals;
272 
273   // A mutex that protects all benchmark operations, ensuring that two
274   // benchmarks never run concurrently.
275   static StaticMutex sValsMutex;
276 };
277 
278 void** BenchCollections::sVals;
279 StaticMutex BenchCollections::sValsMutex;
280 
281 MOZ_GTEST_BENCH_F(BenchCollections, unordered_set,
__anon761682940102null282                   [this] { BenchImpl(Bench_Cpp_unordered_set); });
283 
284 MOZ_GTEST_BENCH_F(BenchCollections, PLDHash,
__anon761682940202null285                   [this] { BenchImpl(Bench_Cpp_PLDHashTable); });
286 
287 MOZ_GTEST_BENCH_F(BenchCollections, MozHash,
__anon761682940302null288                   [this] { BenchImpl(Bench_Cpp_MozHashSet); });
289 
290 MOZ_GTEST_BENCH_F(BenchCollections, RustHash,
__anon761682940402null291                   [this] { BenchImpl(Bench_Rust_HashSet); });
292 
293 MOZ_GTEST_BENCH_F(BenchCollections, RustFnvHash,
__anon761682940502null294                   [this] { BenchImpl(Bench_Rust_FnvHashSet); });
295 
296 MOZ_GTEST_BENCH_F(BenchCollections, RustFxHash,
__anon761682940602null297                   [this] { BenchImpl(Bench_Rust_FxHashSet); });
298