1 //===-- Benchmark function --------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 // This file mainly defines a `Benchmark` function.
10 //
11 // The benchmarking process is as follows:
12 // - We start by measuring the time it takes to run the function
13 // `InitialIterations` times. This is called a Sample. From this we can derive
14 // the time it took to run a single iteration.
15 //
16 // - We repeat the previous step with a greater number of iterations to lower
17 // the impact of the measurement. We can derive a more precise estimation of the
18 // runtime for a single iteration.
19 //
20 // - Each sample gives a more accurate estimation of the runtime for a single
21 // iteration but also takes more time to run. We stop the process when:
22 //   * The measure stabilize under a certain precision (Epsilon),
23 //   * The overall benchmarking time is greater than MaxDuration,
24 //   * The overall sample count is greater than MaxSamples,
25 //   * The last sample used more than MaxIterations iterations.
26 //
27 // - We also makes sure that the benchmark doesn't run for a too short period of
28 // time by defining MinDuration and MinSamples.
29 
30 #ifndef LLVM_LIBC_UTILS_BENCHMARK_BENCHMARK_H
31 #define LLVM_LIBC_UTILS_BENCHMARK_BENCHMARK_H
32 
33 #include "benchmark/benchmark.h"
34 #include "llvm/ADT/ArrayRef.h"
35 #include "llvm/ADT/Optional.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include <array>
38 #include <chrono>
39 #include <cmath>
40 #include <cstdint>
41 
42 namespace llvm {
43 namespace libc_benchmarks {
44 
45 using Duration = std::chrono::duration<double>;
46 
47 enum class BenchmarkLog {
48   None, // Don't keep the internal state of the benchmark.
49   Last, // Keep only the last batch.
50   Full  // Keep all iterations states, useful for testing or debugging.
51 };
52 
53 // An object to configure the benchmark stopping conditions.
54 // See documentation at the beginning of the file for the overall algorithm and
55 // meaning of each field.
56 struct BenchmarkOptions {
57   // The minimum time for which the benchmark is running.
58   Duration MinDuration = std::chrono::seconds(0);
59   // The maximum time for which the benchmark is running.
60   Duration MaxDuration = std::chrono::seconds(10);
61   // The number of iterations in the first sample.
62   uint32_t InitialIterations = 1;
63   // The maximum number of iterations for any given sample.
64   uint32_t MaxIterations = 10000000;
65   // The minimum number of samples.
66   uint32_t MinSamples = 4;
67   // The maximum number of samples.
68   uint32_t MaxSamples = 1000;
69   // The benchmark will stop if the relative difference between the current and
70   // the last estimation is less than epsilon. This is 1% by default.
71   double Epsilon = 0.01;
72   // The number of iterations grows exponentially between each sample.
73   // Must be greater or equal to 1.
74   double ScalingFactor = 1.4;
75   BenchmarkLog Log = BenchmarkLog::None;
76 };
77 
78 // The state of a benchmark.
79 enum class BenchmarkStatus {
80   Running,
81   MaxDurationReached,
82   MaxIterationsReached,
83   MaxSamplesReached,
84   PrecisionReached,
85 };
86 
87 // The internal state of the benchmark, useful to debug, test or report
88 // statistics.
89 struct BenchmarkState {
90   size_t LastSampleIterations;
91   Duration LastBatchElapsed;
92   BenchmarkStatus CurrentStatus;
93   Duration CurrentBestGuess; // The time estimation for a single run of `foo`.
94   double ChangeRatio; // The change in time estimation between previous and
95                       // current samples.
96 };
97 
98 // A lightweight result for a benchmark.
99 struct BenchmarkResult {
100   BenchmarkStatus TerminationStatus = BenchmarkStatus::Running;
101   Duration BestGuess = {};
102   llvm::Optional<llvm::SmallVector<BenchmarkState, 16>> MaybeBenchmarkLog;
103 };
104 
105 // Stores information about a cache in the host memory system.
106 struct CacheInfo {
107   std::string Type; //  e.g. "Instruction", "Data", "Unified".
108   int Level;        // 0 is closest to processing unit.
109   int Size;         // In bytes.
110   int NumSharing;   // The number of processing units (Hyper-Threading Thread)
111                     // with which this cache is shared.
112 };
113 
114 // Stores information about the host.
115 struct HostState {
116   std::string CpuName; // returns a string compatible with the -march option.
117   double CpuFrequency; // in Hertz.
118   std::vector<CacheInfo> Caches;
119 
120   static HostState get();
121 };
122 
123 namespace internal {
124 
125 struct Measurement {
126   size_t Iterations = 0;
127   Duration Elapsed = {};
128 };
129 
130 // Updates the estimation of the elapsed time for a single iteration.
131 class RefinableRuntimeEstimation {
132   Duration TotalTime = {};
133   size_t TotalIterations = 0;
134 
135 public:
update(const Measurement & M)136   Duration update(const Measurement &M) {
137     assert(M.Iterations > 0);
138     // Duration is encoded as a double (see definition).
139     // `TotalTime` and `M.Elapsed` are of the same magnitude so we don't expect
140     // loss of precision due to radically different scales.
141     TotalTime += M.Elapsed;
142     TotalIterations += M.Iterations;
143     return TotalTime / TotalIterations;
144   }
145 };
146 
147 // This class tracks the progression of the runtime estimation.
148 class RuntimeEstimationProgression {
149   RefinableRuntimeEstimation RRE;
150 
151 public:
152   Duration CurrentEstimation = {};
153 
154   // Returns the change ratio between our best guess so far and the one from the
155   // new measurement.
computeImprovement(const Measurement & M)156   double computeImprovement(const Measurement &M) {
157     const Duration NewEstimation = RRE.update(M);
158     const double Ratio = fabs(((CurrentEstimation / NewEstimation) - 1.0));
159     CurrentEstimation = NewEstimation;
160     return Ratio;
161   }
162 };
163 
164 } // namespace internal
165 
166 // Measures the runtime of `foo` until conditions defined by `Options` are met.
167 //
168 // To avoid measurement's imprecisions we measure batches of `foo`.
169 // The batch size is growing by `ScalingFactor` to minimize the effect of
170 // measuring.
171 //
172 // Note: The benchmark is not responsible for serializing the executions of
173 // `foo`. It is not suitable for measuring, very small & side effect free
174 // functions, as the processor is free to execute several executions in
175 // parallel.
176 //
177 // - Options: A set of parameters controlling the stopping conditions for the
178 //     benchmark.
179 // - foo: The function under test. It takes one value and returns one value.
180 //     The input value is used to randomize the execution of `foo` as part of a
181 //     batch to mitigate the effect of the branch predictor. Signature:
182 //     `ProductType foo(ParameterProvider::value_type value);`
183 //     The output value is a product of the execution of `foo` and prevents the
184 //     compiler from optimizing out foo's body.
185 // - ParameterProvider: An object responsible for providing a range of
186 //     `Iterations` values to use as input for `foo`. The `value_type` of the
187 //     returned container has to be compatible with `foo` argument.
188 //     Must implement one of:
189 //     `Container<ParameterType> generateBatch(size_t Iterations);`
190 //     `const Container<ParameterType>& generateBatch(size_t Iterations);`
191 // - Clock: An object providing the current time. Must implement:
192 //     `std::chrono::time_point now();`
193 template <typename Function, typename ParameterProvider,
194           typename BenchmarkClock = const std::chrono::high_resolution_clock>
195 BenchmarkResult benchmark(const BenchmarkOptions &Options,
196                           ParameterProvider &PP, Function foo,
197                           BenchmarkClock &Clock = BenchmarkClock()) {
198   BenchmarkResult Result;
199   internal::RuntimeEstimationProgression REP;
200   Duration TotalBenchmarkDuration = {};
201   size_t Iterations = std::max(Options.InitialIterations, uint32_t(1));
202   size_t Samples = 0;
203   if (Options.ScalingFactor < 1.0)
204     report_fatal_error("ScalingFactor should be >= 1");
205   if (Options.Log != BenchmarkLog::None)
206     Result.MaybeBenchmarkLog.emplace();
207   for (;;) {
208     // Request a new Batch of size `Iterations`.
209     const auto &Batch = PP.generateBatch(Iterations);
210 
211     // Measuring this Batch.
212     const auto StartTime = Clock.now();
213     for (const auto Parameter : Batch) {
214       const auto Production = foo(Parameter);
215       benchmark::DoNotOptimize(Production);
216     }
217     const auto EndTime = Clock.now();
218     const Duration Elapsed = EndTime - StartTime;
219 
220     // Updating statistics.
221     ++Samples;
222     TotalBenchmarkDuration += Elapsed;
223     const double ChangeRatio = REP.computeImprovement({Iterations, Elapsed});
224     Result.BestGuess = REP.CurrentEstimation;
225 
226     // Stopping condition.
227     if (TotalBenchmarkDuration >= Options.MinDuration &&
228         Samples >= Options.MinSamples && ChangeRatio < Options.Epsilon)
229       Result.TerminationStatus = BenchmarkStatus::PrecisionReached;
230     else if (Samples >= Options.MaxSamples)
231       Result.TerminationStatus = BenchmarkStatus::MaxSamplesReached;
232     else if (TotalBenchmarkDuration >= Options.MaxDuration)
233       Result.TerminationStatus = BenchmarkStatus::MaxDurationReached;
234     else if (Iterations >= Options.MaxIterations)
235       Result.TerminationStatus = BenchmarkStatus::MaxIterationsReached;
236 
237     if (Result.MaybeBenchmarkLog) {
238       auto &BenchmarkLog = *Result.MaybeBenchmarkLog;
239       if (Options.Log == BenchmarkLog::Last && !BenchmarkLog.empty())
240         BenchmarkLog.pop_back();
241       BenchmarkState BS;
242       BS.LastSampleIterations = Iterations;
243       BS.LastBatchElapsed = Elapsed;
244       BS.CurrentStatus = Result.TerminationStatus;
245       BS.CurrentBestGuess = Result.BestGuess;
246       BS.ChangeRatio = ChangeRatio;
247       BenchmarkLog.push_back(BS);
248     }
249 
250     if (Result.TerminationStatus != BenchmarkStatus::Running)
251       return Result;
252 
253     if (Options.ScalingFactor > 1 &&
254         Iterations * Options.ScalingFactor == Iterations)
255       report_fatal_error(
256           "`Iterations *= ScalingFactor` is idempotent, increase ScalingFactor "
257           "or InitialIterations.");
258 
259     Iterations *= Options.ScalingFactor;
260   }
261 }
262 
263 // Interprets `Array` as a circular buffer of `Size` elements.
264 template <typename T> class CircularArrayRef {
265   llvm::ArrayRef<T> Array;
266   size_t Size;
267 
268 public:
269   using value_type = T;
270   using reference = T &;
271   using const_reference = const T &;
272   using difference_type = ssize_t;
273   using size_type = size_t;
274 
275   class const_iterator
276       : public std::iterator<std::input_iterator_tag, T, ssize_t> {
277     llvm::ArrayRef<T> Array;
278     size_t Index;
279     size_t Offset;
280 
281   public:
282     explicit const_iterator(llvm::ArrayRef<T> Array, size_t Index = 0)
Array(Array)283         : Array(Array), Index(Index), Offset(Index % Array.size()) {}
284     const_iterator &operator++() {
285       ++Index;
286       ++Offset;
287       if (Offset == Array.size())
288         Offset = 0;
289       return *this;
290     }
291     bool operator==(const_iterator Other) const { return Index == Other.Index; }
292     bool operator!=(const_iterator Other) const { return !(*this == Other); }
293     const T &operator*() const { return Array[Offset]; }
294   };
295 
CircularArrayRef(llvm::ArrayRef<T> Array,size_t Size)296   CircularArrayRef(llvm::ArrayRef<T> Array, size_t Size)
297       : Array(Array), Size(Size) {
298     assert(Array.size() > 0);
299   }
300 
begin()301   const_iterator begin() const { return const_iterator(Array); }
end()302   const_iterator end() const { return const_iterator(Array, Size); }
303 };
304 
305 // A convenient helper to produce a CircularArrayRef from an ArrayRef.
306 template <typename T>
cycle(llvm::ArrayRef<T> Array,size_t Size)307 CircularArrayRef<T> cycle(llvm::ArrayRef<T> Array, size_t Size) {
308   return {Array, Size};
309 }
310 
311 // Creates an std::array which storage size is constrained under `Bytes`.
312 template <typename T, size_t Bytes>
313 using ByteConstrainedArray = std::array<T, Bytes / sizeof(T)>;
314 
315 // A convenient helper to produce a CircularArrayRef from a
316 // ByteConstrainedArray.
317 template <typename T, size_t N>
cycle(const std::array<T,N> & Container,size_t Size)318 CircularArrayRef<T> cycle(const std::array<T, N> &Container, size_t Size) {
319   return {llvm::ArrayRef<T>(Container.cbegin(), Container.cend()), Size};
320 }
321 
322 // Makes sure the binary was compiled in release mode and that frequency
323 // governor is set on performance.
324 void checkRequirements();
325 
326 } // namespace libc_benchmarks
327 } // namespace llvm
328 
329 #endif // LLVM_LIBC_UTILS_BENCHMARK_BENCHMARK_H
330