1 //===-- Benchmark memory specific tools -------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 // This file complements the `benchmark` header with memory specific tools and
10 // benchmarking facilities.
11 
12 #ifndef LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_H
13 #define LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_H
14 
15 #include "LibcBenchmark.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/Support/Alignment.h"
19 #include <cstdint>
20 #include <random>
21 
22 namespace llvm {
23 namespace libc_benchmarks {
24 
25 //--------------
26 // Configuration
27 //--------------
28 
29 // Specifies a range of sizes to explore.
30 struct SizeRange {
31   uint32_t From = 0;  // Inclusive
32   uint32_t To = 1024; // Inclusive
33   uint32_t Step = 1;
34 };
35 
36 // An object to define how to test a memory function.
37 struct StudyConfiguration {
38   // The number of run for the study.
39   uint32_t Runs = 1;
40 
41   // The size of the buffers (1 buffer for memset but 2 for memcpy or memcmp).
42   // When testing small sizes, it's important to keep the total allocated
43   // size under the size of the L1 cache (usually 16 or 32KiB). The framework
44   // will also use 2KiB of additional L1 memory to store the function
45   // parameters.
46   uint32_t BufferSize = 8192;
47 
48   // The range of sizes to exercise.
49   SizeRange Size;
50 
51   MaybeAlign AddressAlignment; //  Unset : Use start of buffer which is at
52                                //         least cache line aligned)
53                                //     1 : Use random address,
54                                //    >1 : Use random address aligned to value.
55 
56   // The value to use for memset.
57   uint8_t MemsetValue = 0;
58 
59   // The mismatch position for memcmp.
60   uint32_t MemcmpMismatchAt = 0; //  0 : Buffer compare equal,
61                                  // >0 : Buffer compare different at byte N-1.
62 };
63 
64 //--------
65 // Results
66 //--------
67 
68 // The time to run one iteration of the function under test for the specified
69 // Size.
70 struct Measurement {
71   uint32_t Size = 0;
72   Duration Runtime = {};
73 };
74 
75 // The measurements for a specific function.
76 struct FunctionMeasurements {
77   std::string Name;
78   std::vector<Measurement> Measurements;
79 };
80 
81 // The root object containing all the data (configuration and measurements).
82 struct Study {
83   HostState Host;
84   BenchmarkOptions Options;
85   StudyConfiguration Configuration;
86   SmallVector<FunctionMeasurements, 4> Functions;
87 };
88 
89 // Provides an aligned, dynamically allocated buffer.
90 class AlignedBuffer {
91   char *const Buffer = nullptr;
92   size_t Size = 0;
93 
94 public:
95   static constexpr size_t Alignment = 1024;
96 
AlignedBuffer(size_t Size)97   explicit AlignedBuffer(size_t Size)
98       : Buffer(static_cast<char *>(aligned_alloc(1024, Size))), Size(Size) {}
~AlignedBuffer()99   ~AlignedBuffer() { free(Buffer); }
100 
101   inline char *operator+(size_t Index) { return Buffer + Index; }
102   inline const char *operator+(size_t Index) const { return Buffer + Index; }
103   inline char &operator[](size_t Index) { return Buffer[Index]; }
104   inline const char &operator[](size_t Index) const { return Buffer[Index]; }
begin()105   inline char *begin() { return Buffer; }
end()106   inline char *end() { return Buffer + Size; }
107 };
108 
109 // Implements the ParameterProvider abstraction needed by the `benchmark`
110 // function. This implementation makes sure that all parameters will fit into
111 // `StorageSize` bytes. The total memory accessed during benchmark should be
112 // less than the data L1 cache, that is the storage for the ParameterProvider
113 // and the memory buffers.
114 template <typename Context, size_t StorageSize = 8 * 1024>
115 class SmallParameterProvider {
116   using ParameterType = typename Context::ParameterType;
117   ByteConstrainedArray<ParameterType, StorageSize> Parameters;
118   size_t LastIterations;
119   Context &Ctx;
120 
121 public:
SmallParameterProvider(Context & C)122   explicit SmallParameterProvider(Context &C) : Ctx(C) {}
123   SmallParameterProvider(const SmallParameterProvider &) = delete;
124   SmallParameterProvider &operator=(const SmallParameterProvider &) = delete;
125 
126   // Useful to compute the histogram of the size parameter.
getLastBatch()127   CircularArrayRef<ParameterType> getLastBatch() const {
128     return cycle(Parameters, LastIterations);
129   }
130 
131   // Implements the interface needed by the `benchmark` function.
generateBatch(size_t Iterations)132   CircularArrayRef<ParameterType> generateBatch(size_t Iterations) {
133     LastIterations = Iterations;
134     Ctx.Randomize(Parameters);
135     return getLastBatch();
136   }
137 };
138 
139 // Helper to generate random buffer offsets that satisfy the configuration
140 // constraints.
141 class OffsetDistribution {
142   std::uniform_int_distribution<uint32_t> Distribution;
143   uint32_t Factor;
144 
145 public:
146   explicit OffsetDistribution(const StudyConfiguration &Conf);
147 
operator()148   template <class Generator> uint32_t operator()(Generator &G) {
149     return Distribution(G) * Factor;
150   }
151 };
152 
153 // Helper to generate random buffer offsets that satisfy the configuration
154 // constraints. It is specifically designed to benchmark `memcmp` functions
155 // where we may want the Nth byte to differ.
156 class MismatchOffsetDistribution {
157   std::uniform_int_distribution<size_t> MismatchIndexSelector;
158   llvm::SmallVector<uint32_t, 16> MismatchIndices;
159   const uint32_t MismatchAt;
160 
161 public:
162   explicit MismatchOffsetDistribution(const StudyConfiguration &Conf);
163 
164   explicit operator bool() const { return !MismatchIndices.empty(); }
165 
getMismatchIndices()166   const llvm::SmallVectorImpl<uint32_t> &getMismatchIndices() const {
167     return MismatchIndices;
168   }
169 
operator()170   template <class Generator> uint32_t operator()(Generator &G, uint32_t Size) {
171     const uint32_t MismatchIndex = MismatchIndices[MismatchIndexSelector(G)];
172     // We need to position the offset so that a mismatch occurs at MismatchAt.
173     if (Size >= MismatchAt)
174       return MismatchIndex - MismatchAt;
175     // Size is too small to trigger the mismatch.
176     return MismatchIndex - Size - 1;
177   }
178 };
179 
180 } // namespace libc_benchmarks
181 } // namespace llvm
182 
183 #endif // LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_H
184