1 //============================================================================
2 //  Copyright (c) Kitware, Inc.
3 //  All rights reserved.
4 //  See LICENSE.txt for details.
5 //
6 //  This software is distributed WITHOUT ANY WARRANTY; without even
7 //  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
8 //  PURPOSE.  See the above copyright notice for more information.
9 //============================================================================
10 
11 #ifndef vtk_m_benchmarking_Benchmarker_h
12 #define vtk_m_benchmarking_Benchmarker_h
13 
14 #include <vtkm/cont/RuntimeDeviceTracker.h>
15 #include <vtkm/cont/Timer.h>
16 
17 #include <vtkm/cont/testing/Testing.h>
18 
19 #include <vtkm/internal/brigand.hpp>
20 
21 #include <benchmark/benchmark.h>
22 
23 #include <ostream>
24 
25 /// \file Benchmarker.h
26 /// \brief Benchmarking utilities
27 ///
28 /// VTK-m's benchmarking framework is built on top of Google Benchmark.
29 ///
30 /// A benchmark is now a single function, which is passed to a macro:
31 ///
32 /// ```
33 /// void MyBenchmark(::benchmark::State& state)
34 /// {
35 ///   MyClass someClass;
36 ///
37 ///   // Optional: Add a descriptive label with additional benchmark details:
38 ///   state.SetLabel("Blah blah blah.");
39 ///
40 ///   // Must use a vtkm timer to properly capture eg. CUDA execution times.
41 ///   vtkm::cont::Timer timer;
42 ///   for (auto _ : state)
43 ///   {
44 ///     someClass.Reset();
45 ///
46 ///     timer.Start();
47 ///     someClass.DoWork();
48 ///     timer.Stop();
49 ///
50 ///     state.SetIterationTime(timer.GetElapsedTime());
51 ///   }
52 ///
53 ///   // Optional: Report items and/or bytes processed per iteration in output:
54 ///   state.SetItemsProcessed(state.iterations() * someClass.GetNumberOfItems());
55 ///   state.SetBytesProcessed(state.iterations() * someClass.GetNumberOfBytes());
56 /// }
57 /// }
58 /// VTKM_BENCHMARK(MyBenchmark);
59 /// ```
60 ///
61 /// Google benchmark also makes it easy to implement parameter sweep benchmarks:
62 ///
63 /// ```
64 /// void MyParameterSweep(::benchmark::State& state)
65 /// {
66 ///   // The current value in the sweep:
67 ///   const vtkm::Id currentValue = state.range(0);
68 ///
69 ///   MyClass someClass;
70 ///   someClass.SetSomeParameter(currentValue);
71 ///
72 ///   vtkm::cont::Timer timer;
73 ///   for (auto _ : state)
74 ///   {
75 ///     someClass.Reset();
76 ///
77 ///     timer.Start();
78 ///     someClass.DoWork();
79 ///     timer.Stop();
80 ///
81 ///     state.SetIterationTime(timer.GetElapsedTime());
82 ///   }
83 /// }
84 /// VTKM_BENCHMARK_OPTS(MyBenchmark, ->ArgName("Param")->Range(32, 1024 * 1024));
85 /// ```
86 ///
87 /// will generate and launch several benchmarks, exploring the parameter space of
88 /// `SetSomeParameter` between the values of 32 and (1024*1024). The chain of
89 ///   functions calls in the second argument is applied to an instance of
90 /// ::benchmark::internal::Benchmark. See Google Benchmark's documentation for
91 /// more details.
92 ///
93 /// For more complex benchmark configurations, the VTKM_BENCHMARK_APPLY macro
94 ///   accepts a function with the signature
95 /// `void Func(::benchmark::internal::Benchmark*)` that may be used to generate
96 /// more complex configurations.
97 ///
98 /// To instantiate a templated benchmark across a list of types, the
99 /// VTKM_BENCHMARK_TEMPLATE* macros take a vtkm::List of types as an additional
100 /// parameter. The templated benchmark function will be instantiated and called
101 /// for each type in the list:
102 ///
103 /// ```
104 /// template <typename T>
105 /// void MyBenchmark(::benchmark::State& state)
106 /// {
107 ///   MyClass<T> someClass;
108 ///
109 ///   // Must use a vtkm timer to properly capture eg. CUDA execution times.
110 ///   vtkm::cont::Timer timer;
111 ///   for (auto _ : state)
112 ///   {
113 ///     someClass.Reset();
114 ///
115 ///     timer.Start();
116 ///     someClass.DoWork();
117 ///     timer.Stop();
118 ///
119 ///     state.SetIterationTime(timer.GetElapsedTime());
120 ///   }
121 /// }
122 /// }
123 /// VTKM_BENCHMARK_TEMPLATE(MyBenchmark, vtkm::List<vtkm::Float32, vtkm::Vec3f_32>);
124 /// ```
125 ///
126 /// The benchmarks are executed by calling the `VTKM_EXECUTE_BENCHMARKS(argc, argv)`
127 /// macro from `main`. There is also a `VTKM_EXECUTE_BENCHMARKS_PREAMBLE(argc, argv, some_string)`
128 /// macro that appends the contents of `some_string` to the Google Benchmark preamble.
129 ///
130 /// If a benchmark is not compatible with some configuration, it may call
131 /// `state.SkipWithError("Error message");` on the `::benchmark::State` object and return. This is
132 /// useful, for instance in the filter tests when the input is not compatible with the filter.
133 ///
134 /// When launching a benchmark executable, the following options are supported by Google Benchmark:
135 ///
136 /// - `--benchmark_list_tests`: List all available tests.
137 /// - `--benchmark_filter="[regex]"`: Only run benchmark with names that match `[regex]`.
138 /// - `--benchmark_filter="-[regex]"`: Only run benchmark with names that DON'T match `[regex]`.
139 /// - `--benchmark_min_time=[float]`: Make sure each benchmark repetition gathers `[float]` seconds
140 ///   of data.
141 /// - `--benchmark_repetitions=[int]`: Run each benchmark `[int]` times and report aggregate statistics
142 ///   (mean, stdev, etc). A "repetition" refers to a single execution of the benchmark function, not
143 ///   an "iteration", which is a loop of the `for(auto _:state){...}` section.
144 /// - `--benchmark_report_aggregates_only="true|false"`: If true, only the aggregate statistics are
145 ///   reported (affects both console and file output). Requires `--benchmark_repetitions` to be useful.
146 /// - `--benchmark_display_aggregates_only="true|false"`: If true, only the aggregate statistics are
147 ///   printed to the terminal. Any file output will still contain all repetition info.
148 /// - `--benchmark_format="console|json|csv"`: Specify terminal output format: human readable
149 ///   (`console`) or `csv`/`json` formats.
150 /// - `--benchmark_out_format="console|json|csv"`: Specify file output format: human readable
151 ///   (`console`) or `csv`/`json` formats.
152 /// - `--benchmark_out=[filename]`: Specify output file.
153 /// - `--benchmark_color="true|false"`: Toggle color output in terminal when using `console` output.
154 /// - `--benchmark_counters_tabular="true|false"`: Print counter information (e.g. bytes/sec, items/sec)
155 ///   in the table, rather than appending them as a label.
156 ///
157 /// For more information and examples of practical usage, take a look at the existing benchmarks in
158 /// vtk-m/benchmarking/.
159 
160 /// \def VTKM_EXECUTE_BENCHMARKS(argc, argv)
161 ///
162 /// Run the benchmarks defined in the current file. Benchmarks may be filtered
163 /// and modified using the passed arguments; see the Google Benchmark documentation
164 /// for more details.
165 #define VTKM_EXECUTE_BENCHMARKS(argc, argv) vtkm::bench::detail::ExecuteBenchmarks(argc, argv)
166 
167 /// \def VTKM_EXECUTE_BENCHMARKS_PREAMBLE(argc, argv, preamble)
168 ///
169 /// Run the benchmarks defined in the current file. Benchmarks may be filtered
170 /// and modified using the passed arguments; see the Google Benchmark documentation
171 /// for more details. The `preamble` string may be used to supply additional
172 /// information that will be appended to the output's preamble.
173 #define VTKM_EXECUTE_BENCHMARKS_PREAMBLE(argc, argv, preamble) \
174   vtkm::bench::detail::ExecuteBenchmarks(argc, argv, preamble)
175 
176 /// \def VTKM_BENCHMARK(BenchFunc)
177 ///
178 /// Define a simple benchmark. A single benchmark will be generated that executes
179 /// `BenchFunc`. `BenchFunc` must have the signature:
180 ///
181 /// ```
182 /// void BenchFunc(::benchmark::State& state)
183 /// ```
184 #define VTKM_BENCHMARK(BenchFunc) \
185   BENCHMARK(BenchFunc)->UseManualTime()->Unit(benchmark::kMillisecond)
186 
187 /// \def VTKM_BENCHMARK_OPTS(BenchFunc, Args)
188 ///
189 /// Similar to `VTKM_BENCHMARK`, but allows additional options to be specified
190 /// on the `::benchmark::internal::Benchmark` object. Example usage:
191 ///
192 /// ```
193 /// VTKM_BENCHMARK_OPTS(MyBenchmark, ->ArgName("MyParam")->Range(32, 1024*1024));
194 /// ```
195 ///
196 /// Note the similarity to the raw Google Benchmark usage of
197 /// `BENCHMARK(MyBenchmark)->ArgName("MyParam")->Range(32, 1024*1024);`. See
198 /// the Google Benchmark documentation for more details on the available options.
199 #define VTKM_BENCHMARK_OPTS(BenchFunc, options) \
200   BENCHMARK(BenchFunc)->UseManualTime()->Unit(benchmark::kMillisecond) options
201 
202 /// \def VTKM_BENCHMARK_APPLY(BenchFunc, ConfigFunc)
203 ///
204 /// Similar to `VTKM_BENCHMARK`, but allows advanced benchmark configuration
205 /// via a supplied ConfigFunc, similar to Google Benchmark's
206 /// `BENCHMARK(BenchFunc)->Apply(ConfigFunc)`. `ConfigFunc` must have the
207 /// signature:
208 ///
209 /// ```
210 /// void ConfigFunc(::benchmark::internal::Benchmark*);
211 /// ```
212 ///
213 /// See the Google Benchmark documentation for more details on the available options.
214 #define VTKM_BENCHMARK_APPLY(BenchFunc, applyFunctor) \
215   BENCHMARK(BenchFunc)->Apply(applyFunctor)->UseManualTime()->Unit(benchmark::kMillisecond)
216 
217 /// \def VTKM_BENCHMARK_TEMPLATES(BenchFunc, TypeList)
218 ///
219 /// Define a family of benchmark that vary by template argument. A single
220 /// benchmark will be generated for each type in `TypeList` (a vtkm::List of
221 /// types) that executes `BenchFunc<T>`. `BenchFunc` must have the signature:
222 ///
223 /// ```
224 /// template <typename T>
225 /// void BenchFunc(::benchmark::State& state)
226 /// ```
227 #define VTKM_BENCHMARK_TEMPLATES(BenchFunc, TypeList) \
228   VTKM_BENCHMARK_TEMPLATES_APPLY(BenchFunc, vtkm::bench::detail::NullApply, TypeList)
229 
230 /// \def VTKM_BENCHMARK_TEMPLATES_OPTS(BenchFunc, Args, TypeList)
231 ///
232 /// Similar to `VTKM_BENCHMARK_TEMPLATES`, but allows additional options to be specified
233 /// on the `::benchmark::internal::Benchmark` object. Example usage:
234 ///
235 /// ```
236 /// VTKM_BENCHMARK_TEMPLATES_OPTS(MyBenchmark,
237 ///                                ->ArgName("MyParam")->Range(32, 1024*1024),
238 ///                              vtkm::List<vtkm::Float32, vtkm::Vec3f_32>);
239 /// ```
240 #define VTKM_BENCHMARK_TEMPLATES_OPTS(BenchFunc, options, TypeList)                          \
241   VTKM_BENCHMARK_TEMPLATES_APPLY(                                                            \
242     BenchFunc,                                                                               \
243     [](::benchmark::internal::Benchmark* bm) { bm options->Unit(benchmark::kMillisecond); }, \
244     TypeList)
245 
246 /// \def VTKM_BENCHMARK_TEMPLATES_APPLY(BenchFunc, ConfigFunc, TypeList)
247 ///
248 /// Similar to `VTKM_BENCHMARK_TEMPLATES`, but allows advanced benchmark configuration
249 /// via a supplied ConfigFunc, similar to Google Benchmark's
250 /// `BENCHMARK(BenchFunc)->Apply(ConfigFunc)`. `ConfigFunc` must have the
251 /// signature:
252 ///
253 /// ```
254 /// void ConfigFunc(::benchmark::internal::Benchmark*);
255 /// ```
256 ///
257 /// See the Google Benchmark documentation for more details on the available options.
258 #define VTKM_BENCHMARK_TEMPLATES_APPLY(BenchFunc, ApplyFunctor, TypeList)                            \
259   namespace                                                                                          \
260   { /* A template function cannot be used as a template parameter, so wrap the function with       \
261      * a template struct to get it into the GenerateTemplateBenchmarks class. */ \
262   template <typename... Ts>                                                                          \
263   struct VTKM_BENCHMARK_WRAPPER_NAME(BenchFunc)                                                      \
264   {                                                                                                  \
265     static ::benchmark::internal::Function* GetFunction() { return BenchFunc<Ts...>; }               \
266   };                                                                                                 \
267   } /* end anon namespace */                                                                         \
268   int BENCHMARK_PRIVATE_NAME(BenchFunc) = vtkm::bench::detail::GenerateTemplateBenchmarks<           \
269     brigand::bind<VTKM_BENCHMARK_WRAPPER_NAME(BenchFunc)>,                                           \
270     TypeList>::Register(#BenchFunc, ApplyFunctor)
271 
272 // Internal use only:
273 #define VTKM_BENCHMARK_WRAPPER_NAME(BenchFunc) \
274   BENCHMARK_PRIVATE_CONCAT(_wrapper_, BenchFunc, __LINE__)
275 
276 namespace vtkm
277 {
278 namespace bench
279 {
280 namespace detail
281 {
282 
NullApply(::benchmark::internal::Benchmark *)283 static inline void NullApply(::benchmark::internal::Benchmark*) {}
284 
285 /// Do not use directly. The VTKM_BENCHMARK_TEMPLATES macros should be used
286 /// instead.
287 // TypeLists could be expanded to compute cross products if we ever have that
288 // need.
289 template <typename BoundBench, typename TypeLists>
290 struct GenerateTemplateBenchmarks;
291 
292 template <template <typename...> class BenchType, typename TypeList>
293 struct GenerateTemplateBenchmarks<brigand::bind<BenchType>, TypeList>
294 {
295 private:
296   template <typename T>
297   using MakeBenchType = BenchType<T>;
298 
299   using Benchmarks = brigand::transform<TypeList, brigand::bind<MakeBenchType, brigand::_1>>;
300 
301   template <typename ApplyFunctor>
302   struct RegisterImpl
303   {
304     std::string BenchName;
305     ApplyFunctor Apply;
306 
307     template <typename P>
308     void operator()(brigand::type_<BenchType<P>>) const
309     {
310       std::ostringstream name;
311       name << this->BenchName << "<" << vtkm::testing::TypeName<P>::Name() << ">";
312       auto bm = ::benchmark::internal::RegisterBenchmarkInternal(
313         new ::benchmark::internal::FunctionBenchmark(name.str().c_str(),
314                                                      BenchType<P>::GetFunction()));
315       this->Apply(bm);
316 
317       // Always use manual time with vtkm::cont::Timer to capture CUDA times accurately.
318       bm->UseManualTime()->Unit(benchmark::kMillisecond);
319     }
320   };
321 
322 public:
323   template <typename ApplyFunctor>
324   static int Register(const std::string& benchName, ApplyFunctor&& apply)
325   {
326     brigand::for_each<Benchmarks>(
327       RegisterImpl<ApplyFunctor>{ benchName, std::forward<ApplyFunctor>(apply) });
328     return 0;
329   }
330 };
331 
332 class VTKmConsoleReporter : public ::benchmark::ConsoleReporter
333 {
334   std::string UserPreamble;
335 
336 public:
337   VTKmConsoleReporter() = default;
338 
339   explicit VTKmConsoleReporter(const std::string& preamble)
340     : UserPreamble{ preamble }
341   {
342   }
343 
344   bool ReportContext(const Context& context) override
345   {
346     if (!::benchmark::ConsoleReporter::ReportContext(context))
347     {
348       return false;
349     }
350 
351     // The rest of the preamble is printed to the error stream, so be consistent:
352     auto& out = this->GetErrorStream();
353 
354     // Print list of devices:
355     out << "VTK-m Device State:\n";
356     vtkm::cont::GetRuntimeDeviceTracker().PrintSummary(out);
357     if (!this->UserPreamble.empty())
358     {
359       out << this->UserPreamble << "\n";
360     }
361     out.flush();
362 
363     return true;
364   }
365 };
366 
367 // Returns the number of executed benchmarks:
368 static inline vtkm::Id ExecuteBenchmarks(int& argc,
369                                          char* argv[],
370                                          const std::string& preamble = std::string{})
371 {
372   ::benchmark::Initialize(&argc, argv);
373   if (::benchmark::ReportUnrecognizedArguments(argc, argv))
374   {
375     return 1;
376   }
377 
378   VTKmConsoleReporter reporter{ preamble };
379 
380   vtkm::cont::Timer timer;
381   timer.Start();
382   std::size_t num = ::benchmark::RunSpecifiedBenchmarks(&reporter);
383   timer.Stop();
384 
385   reporter.GetOutputStream().flush();
386   reporter.GetErrorStream().flush();
387 
388   reporter.GetErrorStream() << "Ran " << num << " benchmarks in " << timer.GetElapsedTime()
389                             << " seconds." << std::endl;
390 
391   return static_cast<vtkm::Id>(num);
392 }
393 
394 void InitializeArgs(int* argc, std::vector<char*>& args, vtkm::cont::InitializeOptions& opts)
395 {
396   bool isHelp = false;
397 
398   // Inject --help
399   if (*argc == 1)
400   {
401     const char* help = "--help"; // We want it to be static
402     args.push_back(const_cast<char*>(help));
403     *argc = *argc + 1;
404   }
405 
406   args.push_back(nullptr);
407 
408   for (size_t i = 0; i < static_cast<size_t>(*argc); ++i)
409   {
410     auto opt_s = std::string(args[i]);
411     if (opt_s == "--help" || opt_s == "-help" || opt_s == "-h")
412     {
413       isHelp = true;
414     }
415   }
416 
417   if (!isHelp)
418   {
419     return;
420   }
421 
422   opts = vtkm::cont::InitializeOptions::None;
423 }
424 }
425 }
426 } // end namespace vtkm::bench::detail
427 
428 #endif
429