1 //===-- Benchmark function --------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 // This file mainly defines a `Benchmark` function.
10 //
11 // The benchmarking process is as follows:
12 // - We start by measuring the time it takes to run the function
13 // `InitialIterations` times. This is called a Sample. From this we can derive
14 // the time it took to run a single iteration.
15 //
16 // - We repeat the previous step with a greater number of iterations to lower
17 // the impact of the measurement. We can derive a more precise estimation of the
18 // runtime for a single iteration.
19 //
20 // - Each sample gives a more accurate estimation of the runtime for a single
21 // iteration but also takes more time to run. We stop the process when:
22 // * The measure stabilize under a certain precision (Epsilon),
23 // * The overall benchmarking time is greater than MaxDuration,
24 // * The overall sample count is greater than MaxSamples,
25 // * The last sample used more than MaxIterations iterations.
26 //
27 // - We also makes sure that the benchmark doesn't run for a too short period of
28 // time by defining MinDuration and MinSamples.
29
30 #ifndef LLVM_LIBC_UTILS_BENCHMARK_BENCHMARK_H
31 #define LLVM_LIBC_UTILS_BENCHMARK_BENCHMARK_H
32
33 #include "benchmark/benchmark.h"
34 #include "llvm/ADT/ArrayRef.h"
35 #include "llvm/ADT/Optional.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include <array>
38 #include <chrono>
39 #include <cstdint>
40
41 namespace llvm {
42 namespace libc_benchmarks {
43
44 using Duration = std::chrono::duration<double>;
45
46 enum class BenchmarkLog {
47 None, // Don't keep the internal state of the benchmark.
48 Last, // Keep only the last batch.
49 Full // Keep all iterations states, useful for testing or debugging.
50 };
51
52 // An object to configure the benchmark stopping conditions.
53 // See documentation at the beginning of the file for the overall algorithm and
54 // meaning of each field.
55 struct BenchmarkOptions {
56 // The minimum time for which the benchmark is running.
57 Duration MinDuration = std::chrono::seconds(0);
58 // The maximum time for which the benchmark is running.
59 Duration MaxDuration = std::chrono::seconds(10);
60 // The number of iterations in the first sample.
61 uint32_t InitialIterations = 1;
62 // The maximum number of iterations for any given sample.
63 uint32_t MaxIterations = 10000000;
64 // The minimum number of samples.
65 uint32_t MinSamples = 4;
66 // The maximum number of samples.
67 uint32_t MaxSamples = 1000;
68 // The benchmark will stop if the relative difference between the current and
69 // the last estimation is less than epsilon. This is 1% by default.
70 double Epsilon = 0.01;
71 // The number of iterations grows exponentially between each sample.
72 // Must be greater or equal to 1.
73 double ScalingFactor = 1.4;
74 BenchmarkLog Log = BenchmarkLog::None;
75 };
76
77 // The state of a benchmark.
78 enum class BenchmarkStatus {
79 Running,
80 MaxDurationReached,
81 MaxIterationsReached,
82 MaxSamplesReached,
83 PrecisionReached,
84 };
85
86 // The internal state of the benchmark, useful to debug, test or report
87 // statistics.
88 struct BenchmarkState {
89 size_t LastSampleIterations;
90 Duration LastBatchElapsed;
91 BenchmarkStatus CurrentStatus;
92 Duration CurrentBestGuess; // The time estimation for a single run of `foo`.
93 double ChangeRatio; // The change in time estimation between previous and
94 // current samples.
95 };
96
97 // A lightweight result for a benchmark.
98 struct BenchmarkResult {
99 BenchmarkStatus TerminationStatus = BenchmarkStatus::Running;
100 Duration BestGuess = {};
101 llvm::Optional<llvm::SmallVector<BenchmarkState, 16>> MaybeBenchmarkLog;
102 };
103
104 // Stores information about a cache in the host memory system.
105 struct CacheInfo {
106 std::string Type; // e.g. "Instruction", "Data", "Unified".
107 int Level; // 0 is closest to processing unit.
108 int Size; // In bytes.
109 int NumSharing; // The number of processing units (Hyper-Threading Thread)
110 // with which this cache is shared.
111 };
112
113 // Stores information about the host.
114 struct HostState {
115 std::string CpuName; // returns a string compatible with the -march option.
116 double CpuFrequency; // in Hertz.
117 std::vector<CacheInfo> Caches;
118
119 static HostState get();
120 };
121
122 namespace internal {
123
124 struct Measurement {
125 size_t Iterations = 0;
126 Duration Elapsed = {};
127 };
128
129 // Updates the estimation of the elapsed time for a single iteration.
130 class RefinableRuntimeEstimation {
131 Duration TotalTime = {};
132 size_t TotalIterations = 0;
133
134 public:
update(const Measurement & M)135 Duration update(const Measurement &M) {
136 assert(M.Iterations > 0);
137 // Duration is encoded as a double (see definition).
138 // `TotalTime` and `M.Elapsed` are of the same magnitude so we don't expect
139 // loss of precision due to radically different scales.
140 TotalTime += M.Elapsed;
141 TotalIterations += M.Iterations;
142 return TotalTime / TotalIterations;
143 }
144 };
145
146 // This class tracks the progression of the runtime estimation.
147 class RuntimeEstimationProgression {
148 RefinableRuntimeEstimation RRE;
149
150 public:
151 Duration CurrentEstimation = {};
152
153 // Returns the change ratio between our best guess so far and the one from the
154 // new measurement.
computeImprovement(const Measurement & M)155 double computeImprovement(const Measurement &M) {
156 const Duration NewEstimation = RRE.update(M);
157 const double Ratio = fabs(((CurrentEstimation / NewEstimation) - 1.0));
158 CurrentEstimation = NewEstimation;
159 return Ratio;
160 }
161 };
162
163 } // namespace internal
164
165 // Measures the runtime of `foo` until conditions defined by `Options` are met.
166 //
167 // To avoid measurement's imprecisions we measure batches of `foo`.
168 // The batch size is growing by `ScalingFactor` to minimize the effect of
169 // measuring.
170 //
171 // Note: The benchmark is not responsible for serializing the executions of
172 // `foo`. It is not suitable for measuring, very small & side effect free
173 // functions, as the processor is free to execute several executions in
174 // parallel.
175 //
176 // - Options: A set of parameters controlling the stopping conditions for the
177 // benchmark.
178 // - foo: The function under test. It takes one value and returns one value.
179 // The input value is used to randomize the execution of `foo` as part of a
180 // batch to mitigate the effect of the branch predictor. Signature:
181 // `ProductType foo(ParameterProvider::value_type value);`
182 // The output value is a product of the execution of `foo` and prevents the
183 // compiler from optimizing out foo's body.
184 // - ParameterProvider: An object responsible for providing a range of
185 // `Iterations` values to use as input for `foo`. The `value_type` of the
186 // returned container has to be compatible with `foo` argument.
187 // Must implement one of:
188 // `Container<ParameterType> generateBatch(size_t Iterations);`
189 // `const Container<ParameterType>& generateBatch(size_t Iterations);`
190 // - Clock: An object providing the current time. Must implement:
191 // `std::chrono::time_point now();`
192 template <typename Function, typename ParameterProvider,
193 typename BenchmarkClock = const std::chrono::high_resolution_clock>
194 BenchmarkResult benchmark(const BenchmarkOptions &Options,
195 ParameterProvider &PP, Function foo,
196 BenchmarkClock &Clock = BenchmarkClock()) {
197 BenchmarkResult Result;
198 internal::RuntimeEstimationProgression REP;
199 Duration TotalBenchmarkDuration = {};
200 size_t Iterations = std::max(Options.InitialIterations, uint32_t(1));
201 size_t Samples = 0;
202 if (Options.ScalingFactor < 1.0)
203 report_fatal_error("ScalingFactor should be >= 1");
204 if (Options.Log != BenchmarkLog::None)
205 Result.MaybeBenchmarkLog.emplace();
206 for (;;) {
207 // Request a new Batch of size `Iterations`.
208 const auto &Batch = PP.generateBatch(Iterations);
209
210 // Measuring this Batch.
211 const auto StartTime = Clock.now();
212 for (const auto Parameter : Batch) {
213 const auto Production = foo(Parameter);
214 benchmark::DoNotOptimize(Production);
215 }
216 const auto EndTime = Clock.now();
217 const Duration Elapsed = EndTime - StartTime;
218
219 // Updating statistics.
220 ++Samples;
221 TotalBenchmarkDuration += Elapsed;
222 const double ChangeRatio = REP.computeImprovement({Iterations, Elapsed});
223 Result.BestGuess = REP.CurrentEstimation;
224
225 // Stopping condition.
226 if (TotalBenchmarkDuration >= Options.MinDuration &&
227 Samples >= Options.MinSamples && ChangeRatio < Options.Epsilon)
228 Result.TerminationStatus = BenchmarkStatus::PrecisionReached;
229 else if (Samples >= Options.MaxSamples)
230 Result.TerminationStatus = BenchmarkStatus::MaxSamplesReached;
231 else if (TotalBenchmarkDuration >= Options.MaxDuration)
232 Result.TerminationStatus = BenchmarkStatus::MaxDurationReached;
233 else if (Iterations >= Options.MaxIterations)
234 Result.TerminationStatus = BenchmarkStatus::MaxIterationsReached;
235
236 if (Result.MaybeBenchmarkLog) {
237 auto &BenchmarkLog = *Result.MaybeBenchmarkLog;
238 if (Options.Log == BenchmarkLog::Last && !BenchmarkLog.empty())
239 BenchmarkLog.pop_back();
240 BenchmarkState BS;
241 BS.LastSampleIterations = Iterations;
242 BS.LastBatchElapsed = Elapsed;
243 BS.CurrentStatus = Result.TerminationStatus;
244 BS.CurrentBestGuess = Result.BestGuess;
245 BS.ChangeRatio = ChangeRatio;
246 BenchmarkLog.push_back(BS);
247 }
248
249 if (Result.TerminationStatus != BenchmarkStatus::Running)
250 return Result;
251
252 if (Options.ScalingFactor > 1 &&
253 Iterations * Options.ScalingFactor == Iterations)
254 report_fatal_error(
255 "`Iterations *= ScalingFactor` is idempotent, increase ScalingFactor "
256 "or InitialIterations.");
257
258 Iterations *= Options.ScalingFactor;
259 }
260 }
261
262 // Interprets `Array` as a circular buffer of `Size` elements.
263 template <typename T> class CircularArrayRef {
264 llvm::ArrayRef<T> Array;
265 size_t Size;
266
267 public:
268 using value_type = T;
269 using reference = T &;
270 using const_reference = const T &;
271 using difference_type = ssize_t;
272 using size_type = size_t;
273
274 class const_iterator
275 : public std::iterator<std::input_iterator_tag, T, ssize_t> {
276 llvm::ArrayRef<T> Array;
277 size_t Index;
278 size_t Offset;
279
280 public:
281 explicit const_iterator(llvm::ArrayRef<T> Array, size_t Index = 0)
Array(Array)282 : Array(Array), Index(Index), Offset(Index % Array.size()) {}
283 const_iterator &operator++() {
284 ++Index;
285 ++Offset;
286 if (Offset == Array.size())
287 Offset = 0;
288 return *this;
289 }
290 bool operator==(const_iterator Other) const { return Index == Other.Index; }
291 bool operator!=(const_iterator Other) const { return !(*this == Other); }
292 const T &operator*() const { return Array[Offset]; }
293 };
294
CircularArrayRef(llvm::ArrayRef<T> Array,size_t Size)295 CircularArrayRef(llvm::ArrayRef<T> Array, size_t Size)
296 : Array(Array), Size(Size) {
297 assert(Array.size() > 0);
298 }
299
begin()300 const_iterator begin() const { return const_iterator(Array); }
end()301 const_iterator end() const { return const_iterator(Array, Size); }
302 };
303
304 // A convenient helper to produce a CircularArrayRef from an ArrayRef.
305 template <typename T>
cycle(llvm::ArrayRef<T> Array,size_t Size)306 CircularArrayRef<T> cycle(llvm::ArrayRef<T> Array, size_t Size) {
307 return {Array, Size};
308 }
309
310 // Creates an std::array which storage size is constrained under `Bytes`.
311 template <typename T, size_t Bytes>
312 using ByteConstrainedArray = std::array<T, Bytes / sizeof(T)>;
313
314 // A convenient helper to produce a CircularArrayRef from a
315 // ByteConstrainedArray.
316 template <typename T, size_t N>
cycle(const std::array<T,N> & Container,size_t Size)317 CircularArrayRef<T> cycle(const std::array<T, N> &Container, size_t Size) {
318 return {llvm::ArrayRef<T>(Container.cbegin(), Container.cend()), Size};
319 }
320
321 // Makes sure the binary was compiled in release mode and that frequency
322 // governor is set on performance.
323 void checkRequirements();
324
325 } // namespace libc_benchmarks
326 } // namespace llvm
327
328 #endif // LLVM_LIBC_UTILS_BENCHMARK_BENCHMARK_H
329