1 //============================================================================
2 // Copyright (c) Kitware, Inc.
3 // All rights reserved.
4 // See LICENSE.txt for details.
5 //
6 // This software is distributed WITHOUT ANY WARRANTY; without even
7 // the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
8 // PURPOSE. See the above copyright notice for more information.
9 //============================================================================
10
11 #ifndef vtk_m_benchmarking_Benchmarker_h
12 #define vtk_m_benchmarking_Benchmarker_h
13
14 #include <vtkm/cont/RuntimeDeviceTracker.h>
15 #include <vtkm/cont/Timer.h>
16
17 #include <vtkm/cont/testing/Testing.h>
18
19 #include <vtkm/internal/brigand.hpp>
20
21 #include <benchmark/benchmark.h>
22
23 #include <ostream>
24
25 /// \file Benchmarker.h
26 /// \brief Benchmarking utilities
27 ///
28 /// VTK-m's benchmarking framework is built on top of Google Benchmark.
29 ///
30 /// A benchmark is now a single function, which is passed to a macro:
31 ///
32 /// ```
33 /// void MyBenchmark(::benchmark::State& state)
34 /// {
35 /// MyClass someClass;
36 ///
37 /// // Optional: Add a descriptive label with additional benchmark details:
38 /// state.SetLabel("Blah blah blah.");
39 ///
40 /// // Must use a vtkm timer to properly capture eg. CUDA execution times.
41 /// vtkm::cont::Timer timer;
42 /// for (auto _ : state)
43 /// {
44 /// someClass.Reset();
45 ///
46 /// timer.Start();
47 /// someClass.DoWork();
48 /// timer.Stop();
49 ///
50 /// state.SetIterationTime(timer.GetElapsedTime());
51 /// }
52 ///
53 /// // Optional: Report items and/or bytes processed per iteration in output:
54 /// state.SetItemsProcessed(state.iterations() * someClass.GetNumberOfItems());
55 /// state.SetBytesProcessed(state.iterations() * someClass.GetNumberOfBytes());
56 /// }
57 /// }
58 /// VTKM_BENCHMARK(MyBenchmark);
59 /// ```
60 ///
61 /// Google benchmark also makes it easy to implement parameter sweep benchmarks:
62 ///
63 /// ```
64 /// void MyParameterSweep(::benchmark::State& state)
65 /// {
66 /// // The current value in the sweep:
67 /// const vtkm::Id currentValue = state.range(0);
68 ///
69 /// MyClass someClass;
70 /// someClass.SetSomeParameter(currentValue);
71 ///
72 /// vtkm::cont::Timer timer;
73 /// for (auto _ : state)
74 /// {
75 /// someClass.Reset();
76 ///
77 /// timer.Start();
78 /// someClass.DoWork();
79 /// timer.Stop();
80 ///
81 /// state.SetIterationTime(timer.GetElapsedTime());
82 /// }
83 /// }
84 /// VTKM_BENCHMARK_OPTS(MyBenchmark, ->ArgName("Param")->Range(32, 1024 * 1024));
85 /// ```
86 ///
87 /// will generate and launch several benchmarks, exploring the parameter space of
88 /// `SetSomeParameter` between the values of 32 and (1024*1024). The chain of
89 /// functions calls in the second argument is applied to an instance of
90 /// ::benchmark::internal::Benchmark. See Google Benchmark's documentation for
91 /// more details.
92 ///
93 /// For more complex benchmark configurations, the VTKM_BENCHMARK_APPLY macro
94 /// accepts a function with the signature
95 /// `void Func(::benchmark::internal::Benchmark*)` that may be used to generate
96 /// more complex configurations.
97 ///
98 /// To instantiate a templated benchmark across a list of types, the
99 /// VTKM_BENCHMARK_TEMPLATE* macros take a vtkm::List of types as an additional
100 /// parameter. The templated benchmark function will be instantiated and called
101 /// for each type in the list:
102 ///
103 /// ```
104 /// template <typename T>
105 /// void MyBenchmark(::benchmark::State& state)
106 /// {
107 /// MyClass<T> someClass;
108 ///
109 /// // Must use a vtkm timer to properly capture eg. CUDA execution times.
110 /// vtkm::cont::Timer timer;
111 /// for (auto _ : state)
112 /// {
113 /// someClass.Reset();
114 ///
115 /// timer.Start();
116 /// someClass.DoWork();
117 /// timer.Stop();
118 ///
119 /// state.SetIterationTime(timer.GetElapsedTime());
120 /// }
121 /// }
122 /// }
123 /// VTKM_BENCHMARK_TEMPLATE(MyBenchmark, vtkm::List<vtkm::Float32, vtkm::Vec3f_32>);
124 /// ```
125 ///
126 /// The benchmarks are executed by calling the `VTKM_EXECUTE_BENCHMARKS(argc, argv)`
127 /// macro from `main`. There is also a `VTKM_EXECUTE_BENCHMARKS_PREAMBLE(argc, argv, some_string)`
128 /// macro that appends the contents of `some_string` to the Google Benchmark preamble.
129 ///
130 /// If a benchmark is not compatible with some configuration, it may call
131 /// `state.SkipWithError("Error message");` on the `::benchmark::State` object and return. This is
132 /// useful, for instance in the filter tests when the input is not compatible with the filter.
133 ///
134 /// When launching a benchmark executable, the following options are supported by Google Benchmark:
135 ///
136 /// - `--benchmark_list_tests`: List all available tests.
137 /// - `--benchmark_filter="[regex]"`: Only run benchmark with names that match `[regex]`.
138 /// - `--benchmark_filter="-[regex]"`: Only run benchmark with names that DON'T match `[regex]`.
139 /// - `--benchmark_min_time=[float]`: Make sure each benchmark repetition gathers `[float]` seconds
140 /// of data.
141 /// - `--benchmark_repetitions=[int]`: Run each benchmark `[int]` times and report aggregate statistics
142 /// (mean, stdev, etc). A "repetition" refers to a single execution of the benchmark function, not
143 /// an "iteration", which is a loop of the `for(auto _:state){...}` section.
144 /// - `--benchmark_report_aggregates_only="true|false"`: If true, only the aggregate statistics are
145 /// reported (affects both console and file output). Requires `--benchmark_repetitions` to be useful.
146 /// - `--benchmark_display_aggregates_only="true|false"`: If true, only the aggregate statistics are
147 /// printed to the terminal. Any file output will still contain all repetition info.
148 /// - `--benchmark_format="console|json|csv"`: Specify terminal output format: human readable
149 /// (`console`) or `csv`/`json` formats.
150 /// - `--benchmark_out_format="console|json|csv"`: Specify file output format: human readable
151 /// (`console`) or `csv`/`json` formats.
152 /// - `--benchmark_out=[filename]`: Specify output file.
153 /// - `--benchmark_color="true|false"`: Toggle color output in terminal when using `console` output.
154 /// - `--benchmark_counters_tabular="true|false"`: Print counter information (e.g. bytes/sec, items/sec)
155 /// in the table, rather than appending them as a label.
156 ///
157 /// For more information and examples of practical usage, take a look at the existing benchmarks in
158 /// vtk-m/benchmarking/.
159
160 /// \def VTKM_EXECUTE_BENCHMARKS(argc, argv)
161 ///
162 /// Run the benchmarks defined in the current file. Benchmarks may be filtered
163 /// and modified using the passed arguments; see the Google Benchmark documentation
164 /// for more details.
165 #define VTKM_EXECUTE_BENCHMARKS(argc, argv) vtkm::bench::detail::ExecuteBenchmarks(argc, argv)
166
167 /// \def VTKM_EXECUTE_BENCHMARKS_PREAMBLE(argc, argv, preamble)
168 ///
169 /// Run the benchmarks defined in the current file. Benchmarks may be filtered
170 /// and modified using the passed arguments; see the Google Benchmark documentation
171 /// for more details. The `preamble` string may be used to supply additional
172 /// information that will be appended to the output's preamble.
173 #define VTKM_EXECUTE_BENCHMARKS_PREAMBLE(argc, argv, preamble) \
174 vtkm::bench::detail::ExecuteBenchmarks(argc, argv, preamble)
175
176 /// \def VTKM_BENCHMARK(BenchFunc)
177 ///
178 /// Define a simple benchmark. A single benchmark will be generated that executes
179 /// `BenchFunc`. `BenchFunc` must have the signature:
180 ///
181 /// ```
182 /// void BenchFunc(::benchmark::State& state)
183 /// ```
184 #define VTKM_BENCHMARK(BenchFunc) \
185 BENCHMARK(BenchFunc)->UseManualTime()->Unit(benchmark::kMillisecond)
186
187 /// \def VTKM_BENCHMARK_OPTS(BenchFunc, Args)
188 ///
189 /// Similar to `VTKM_BENCHMARK`, but allows additional options to be specified
190 /// on the `::benchmark::internal::Benchmark` object. Example usage:
191 ///
192 /// ```
193 /// VTKM_BENCHMARK_OPTS(MyBenchmark, ->ArgName("MyParam")->Range(32, 1024*1024));
194 /// ```
195 ///
196 /// Note the similarity to the raw Google Benchmark usage of
197 /// `BENCHMARK(MyBenchmark)->ArgName("MyParam")->Range(32, 1024*1024);`. See
198 /// the Google Benchmark documentation for more details on the available options.
199 #define VTKM_BENCHMARK_OPTS(BenchFunc, options) \
200 BENCHMARK(BenchFunc)->UseManualTime()->Unit(benchmark::kMillisecond) options
201
202 /// \def VTKM_BENCHMARK_APPLY(BenchFunc, ConfigFunc)
203 ///
204 /// Similar to `VTKM_BENCHMARK`, but allows advanced benchmark configuration
205 /// via a supplied ConfigFunc, similar to Google Benchmark's
206 /// `BENCHMARK(BenchFunc)->Apply(ConfigFunc)`. `ConfigFunc` must have the
207 /// signature:
208 ///
209 /// ```
210 /// void ConfigFunc(::benchmark::internal::Benchmark*);
211 /// ```
212 ///
213 /// See the Google Benchmark documentation for more details on the available options.
214 #define VTKM_BENCHMARK_APPLY(BenchFunc, applyFunctor) \
215 BENCHMARK(BenchFunc)->Apply(applyFunctor)->UseManualTime()->Unit(benchmark::kMillisecond)
216
217 /// \def VTKM_BENCHMARK_TEMPLATES(BenchFunc, TypeList)
218 ///
219 /// Define a family of benchmark that vary by template argument. A single
220 /// benchmark will be generated for each type in `TypeList` (a vtkm::List of
221 /// types) that executes `BenchFunc<T>`. `BenchFunc` must have the signature:
222 ///
223 /// ```
224 /// template <typename T>
225 /// void BenchFunc(::benchmark::State& state)
226 /// ```
227 #define VTKM_BENCHMARK_TEMPLATES(BenchFunc, TypeList) \
228 VTKM_BENCHMARK_TEMPLATES_APPLY(BenchFunc, vtkm::bench::detail::NullApply, TypeList)
229
230 /// \def VTKM_BENCHMARK_TEMPLATES_OPTS(BenchFunc, Args, TypeList)
231 ///
232 /// Similar to `VTKM_BENCHMARK_TEMPLATES`, but allows additional options to be specified
233 /// on the `::benchmark::internal::Benchmark` object. Example usage:
234 ///
235 /// ```
236 /// VTKM_BENCHMARK_TEMPLATES_OPTS(MyBenchmark,
237 /// ->ArgName("MyParam")->Range(32, 1024*1024),
238 /// vtkm::List<vtkm::Float32, vtkm::Vec3f_32>);
239 /// ```
240 #define VTKM_BENCHMARK_TEMPLATES_OPTS(BenchFunc, options, TypeList) \
241 VTKM_BENCHMARK_TEMPLATES_APPLY( \
242 BenchFunc, \
243 [](::benchmark::internal::Benchmark* bm) { bm options->Unit(benchmark::kMillisecond); }, \
244 TypeList)
245
246 /// \def VTKM_BENCHMARK_TEMPLATES_APPLY(BenchFunc, ConfigFunc, TypeList)
247 ///
248 /// Similar to `VTKM_BENCHMARK_TEMPLATES`, but allows advanced benchmark configuration
249 /// via a supplied ConfigFunc, similar to Google Benchmark's
250 /// `BENCHMARK(BenchFunc)->Apply(ConfigFunc)`. `ConfigFunc` must have the
251 /// signature:
252 ///
253 /// ```
254 /// void ConfigFunc(::benchmark::internal::Benchmark*);
255 /// ```
256 ///
257 /// See the Google Benchmark documentation for more details on the available options.
258 #define VTKM_BENCHMARK_TEMPLATES_APPLY(BenchFunc, ApplyFunctor, TypeList) \
259 namespace \
260 { /* A template function cannot be used as a template parameter, so wrap the function with \
261 * a template struct to get it into the GenerateTemplateBenchmarks class. */ \
262 template <typename... Ts> \
263 struct VTKM_BENCHMARK_WRAPPER_NAME(BenchFunc) \
264 { \
265 static ::benchmark::internal::Function* GetFunction() { return BenchFunc<Ts...>; } \
266 }; \
267 } /* end anon namespace */ \
268 int BENCHMARK_PRIVATE_NAME(BenchFunc) = vtkm::bench::detail::GenerateTemplateBenchmarks< \
269 brigand::bind<VTKM_BENCHMARK_WRAPPER_NAME(BenchFunc)>, \
270 TypeList>::Register(#BenchFunc, ApplyFunctor)
271
272 // Internal use only:
273 #define VTKM_BENCHMARK_WRAPPER_NAME(BenchFunc) \
274 BENCHMARK_PRIVATE_CONCAT(_wrapper_, BenchFunc, __LINE__)
275
276 namespace vtkm
277 {
278 namespace bench
279 {
280 namespace detail
281 {
282
NullApply(::benchmark::internal::Benchmark *)283 static inline void NullApply(::benchmark::internal::Benchmark*) {}
284
285 /// Do not use directly. The VTKM_BENCHMARK_TEMPLATES macros should be used
286 /// instead.
287 // TypeLists could be expanded to compute cross products if we ever have that
288 // need.
289 template <typename BoundBench, typename TypeLists>
290 struct GenerateTemplateBenchmarks;
291
292 template <template <typename...> class BenchType, typename TypeList>
293 struct GenerateTemplateBenchmarks<brigand::bind<BenchType>, TypeList>
294 {
295 private:
296 template <typename T>
297 using MakeBenchType = BenchType<T>;
298
299 using Benchmarks = brigand::transform<TypeList, brigand::bind<MakeBenchType, brigand::_1>>;
300
301 template <typename ApplyFunctor>
302 struct RegisterImpl
303 {
304 std::string BenchName;
305 ApplyFunctor Apply;
306
307 template <typename P>
308 void operator()(brigand::type_<BenchType<P>>) const
309 {
310 std::ostringstream name;
311 name << this->BenchName << "<" << vtkm::testing::TypeName<P>::Name() << ">";
312 auto bm = ::benchmark::internal::RegisterBenchmarkInternal(
313 new ::benchmark::internal::FunctionBenchmark(name.str().c_str(),
314 BenchType<P>::GetFunction()));
315 this->Apply(bm);
316
317 // Always use manual time with vtkm::cont::Timer to capture CUDA times accurately.
318 bm->UseManualTime()->Unit(benchmark::kMillisecond);
319 }
320 };
321
322 public:
323 template <typename ApplyFunctor>
324 static int Register(const std::string& benchName, ApplyFunctor&& apply)
325 {
326 brigand::for_each<Benchmarks>(
327 RegisterImpl<ApplyFunctor>{ benchName, std::forward<ApplyFunctor>(apply) });
328 return 0;
329 }
330 };
331
332 class VTKmConsoleReporter : public ::benchmark::ConsoleReporter
333 {
334 std::string UserPreamble;
335
336 public:
337 VTKmConsoleReporter() = default;
338
339 explicit VTKmConsoleReporter(const std::string& preamble)
340 : UserPreamble{ preamble }
341 {
342 }
343
344 bool ReportContext(const Context& context) override
345 {
346 if (!::benchmark::ConsoleReporter::ReportContext(context))
347 {
348 return false;
349 }
350
351 // The rest of the preamble is printed to the error stream, so be consistent:
352 auto& out = this->GetErrorStream();
353
354 // Print list of devices:
355 out << "VTK-m Device State:\n";
356 vtkm::cont::GetRuntimeDeviceTracker().PrintSummary(out);
357 if (!this->UserPreamble.empty())
358 {
359 out << this->UserPreamble << "\n";
360 }
361 out.flush();
362
363 return true;
364 }
365 };
366
367 // Returns the number of executed benchmarks:
368 static inline vtkm::Id ExecuteBenchmarks(int& argc,
369 char* argv[],
370 const std::string& preamble = std::string{})
371 {
372 ::benchmark::Initialize(&argc, argv);
373 if (::benchmark::ReportUnrecognizedArguments(argc, argv))
374 {
375 return 1;
376 }
377
378 VTKmConsoleReporter reporter{ preamble };
379
380 vtkm::cont::Timer timer;
381 timer.Start();
382 std::size_t num = ::benchmark::RunSpecifiedBenchmarks(&reporter);
383 timer.Stop();
384
385 reporter.GetOutputStream().flush();
386 reporter.GetErrorStream().flush();
387
388 reporter.GetErrorStream() << "Ran " << num << " benchmarks in " << timer.GetElapsedTime()
389 << " seconds." << std::endl;
390
391 return static_cast<vtkm::Id>(num);
392 }
393
394 void InitializeArgs(int* argc, std::vector<char*>& args, vtkm::cont::InitializeOptions& opts)
395 {
396 bool isHelp = false;
397
398 // Inject --help
399 if (*argc == 1)
400 {
401 const char* help = "--help"; // We want it to be static
402 args.push_back(const_cast<char*>(help));
403 *argc = *argc + 1;
404 }
405
406 args.push_back(nullptr);
407
408 for (size_t i = 0; i < static_cast<size_t>(*argc); ++i)
409 {
410 auto opt_s = std::string(args[i]);
411 if (opt_s == "--help" || opt_s == "-help" || opt_s == "-h")
412 {
413 isHelp = true;
414 }
415 }
416
417 if (!isHelp)
418 {
419 return;
420 }
421
422 opts = vtkm::cont::InitializeOptions::None;
423 }
424 }
425 }
426 } // end namespace vtkm::bench::detail
427
428 #endif
429