1 //============================================================================
2 // Copyright (c) Kitware, Inc.
3 // All rights reserved.
4 // See LICENSE.txt for details.
5 // This software is distributed WITHOUT ANY WARRANTY; without even
6 // the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
7 // PURPOSE. See the above copyright notice for more information.
8 //
9 // Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
10 // Copyright 2018 UT-Battelle, LLC.
11 // Copyright 2018 Los Alamos National Security.
12 //
13 // Under the terms of Contract DE-NA0003525 with NTESS,
14 // the U.S. Government retains certain rights in this software.
15 //
16 // Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
17 // Laboratory (LANL), the U.S. Government retains certain rights in
18 // this software.
19 //============================================================================
20
21 #include "Benchmarker.h"
22
23 #include <vtkm/cont/ArrayHandle.h>
24 #include <vtkm/cont/AtomicArray.h>
25 #include <vtkm/cont/RuntimeDeviceTracker.h>
26 #include <vtkm/cont/Timer.h>
27
28 #include <vtkm/exec/FunctorBase.h>
29
30 #include <iomanip>
31 #include <sstream>
32 #include <string>
33
34 namespace vtkm
35 {
36 namespace benchmarking
37 {
38
39 // This is 32x larger than the largest array size.
40 static constexpr vtkm::Id NumWrites = 33554432; // 2^25
41
42 #define MAKE_ATOMIC_BENCHMARKS(Name, Class) \
43 VTKM_MAKE_BENCHMARK(Name##1, Class, 1); \
44 VTKM_MAKE_BENCHMARK(Name##8, Class, 8); \
45 VTKM_MAKE_BENCHMARK(Name##32, Class, 32); \
46 VTKM_MAKE_BENCHMARK(Name##512, Class, 512); \
47 VTKM_MAKE_BENCHMARK(Name##2048, Class, 2048); \
48 VTKM_MAKE_BENCHMARK(Name##32768, Class, 32768); \
49 VTKM_MAKE_BENCHMARK(Name##1048576, Class, 1048576)
50
51 #define RUN_ATOMIC_BENCHMARKS(Name) \
52 VTKM_RUN_BENCHMARK(Name##1, vtkm::cont::AtomicArrayTypeListTag{}); \
53 VTKM_RUN_BENCHMARK(Name##8, vtkm::cont::AtomicArrayTypeListTag{}); \
54 VTKM_RUN_BENCHMARK(Name##32, vtkm::cont::AtomicArrayTypeListTag{}); \
55 VTKM_RUN_BENCHMARK(Name##512, vtkm::cont::AtomicArrayTypeListTag{}); \
56 VTKM_RUN_BENCHMARK(Name##2048, vtkm::cont::AtomicArrayTypeListTag{}); \
57 VTKM_RUN_BENCHMARK(Name##32768, vtkm::cont::AtomicArrayTypeListTag{}); \
58 VTKM_RUN_BENCHMARK(Name##1048576, vtkm::cont::AtomicArrayTypeListTag{})
59
60 template <class Device>
61 class BenchmarkAtomicArray
62 {
63 public:
64 using Algo = vtkm::cont::DeviceAdapterAlgorithm<Device>;
65 using Timer = vtkm::cont::Timer<Device>;
66
67 // Benchmarks AtomicArray::Add such that each work index writes to adjacent
68 // indices.
69 template <typename ValueType>
70 struct BenchAddSeq
71 {
72 vtkm::Id ArraySize;
73 vtkm::cont::ArrayHandle<ValueType> Data;
74
75 template <typename PortalType>
76 struct Worker : public vtkm::exec::FunctorBase
77 {
78 vtkm::Id ArraySize;
79 PortalType Portal;
80
81 VTKM_CONT
Workervtkm::benchmarking::BenchmarkAtomicArray::BenchAddSeq::Worker82 Worker(vtkm::Id arraySize, PortalType portal)
83 : ArraySize(arraySize)
84 , Portal(portal)
85 {
86 }
87
88 VTKM_EXEC
operator ()vtkm::benchmarking::BenchmarkAtomicArray::BenchAddSeq::Worker89 void operator()(vtkm::Id i) const { this->Portal.Add(i % this->ArraySize, 1); }
90 };
91
BenchAddSeqvtkm::benchmarking::BenchmarkAtomicArray::BenchAddSeq92 BenchAddSeq(vtkm::Id arraySize)
93 : ArraySize(arraySize)
94 {
95 this->Data.PrepareForOutput(this->ArraySize, Device{});
96 }
97
98 VTKM_CONT
operator ()vtkm::benchmarking::BenchmarkAtomicArray::BenchAddSeq99 vtkm::Float64 operator()()
100 {
101 vtkm::cont::AtomicArray<ValueType> array(this->Data);
102 auto portal = array.PrepareForExecution(Device{});
103 Worker<decltype(portal)> worker{ this->ArraySize, portal };
104
105 Timer timer;
106 Algo::Schedule(worker, NumWrites);
107 return timer.GetElapsedTime();
108 }
109
110 VTKM_CONT
Descriptionvtkm::benchmarking::BenchmarkAtomicArray::BenchAddSeq111 std::string Description() const
112 {
113 std::ostringstream desc;
114 desc << "Add (Seq, Atomic, " << std::setw(7) << std::setfill('0') << this->ArraySize << ")";
115 return desc.str();
116 }
117 };
118 MAKE_ATOMIC_BENCHMARKS(AddSeq, BenchAddSeq);
119
120 // Provides a non-atomic baseline for BenchAddSeq
121 template <typename ValueType>
122 struct BenchAddSeqBaseline
123 {
124 vtkm::Id ArraySize;
125 vtkm::cont::ArrayHandle<ValueType> Data;
126
127 template <typename PortalType>
128 struct Worker : public vtkm::exec::FunctorBase
129 {
130 vtkm::Id ArraySize;
131 PortalType Portal;
132
133 VTKM_CONT
Workervtkm::benchmarking::BenchmarkAtomicArray::BenchAddSeqBaseline::Worker134 Worker(vtkm::Id arraySize, PortalType portal)
135 : ArraySize(arraySize)
136 , Portal(portal)
137 {
138 }
139
140 VTKM_EXEC
operator ()vtkm::benchmarking::BenchmarkAtomicArray::BenchAddSeqBaseline::Worker141 void operator()(vtkm::Id i) const
142 {
143 vtkm::Id idx = i % this->ArraySize;
144 this->Portal.Set(idx, this->Portal.Get(idx) + 1);
145 }
146 };
147
BenchAddSeqBaselinevtkm::benchmarking::BenchmarkAtomicArray::BenchAddSeqBaseline148 BenchAddSeqBaseline(vtkm::Id arraySize)
149 : ArraySize(arraySize)
150 {
151 }
152
153 VTKM_CONT
operator ()vtkm::benchmarking::BenchmarkAtomicArray::BenchAddSeqBaseline154 vtkm::Float64 operator()()
155 {
156 auto portal = this->Data.PrepareForOutput(this->ArraySize, Device{});
157 Worker<decltype(portal)> worker{ this->ArraySize, portal };
158
159 Timer timer;
160 Algo::Schedule(worker, NumWrites);
161 return timer.GetElapsedTime();
162 }
163
164 VTKM_CONT
Descriptionvtkm::benchmarking::BenchmarkAtomicArray::BenchAddSeqBaseline165 std::string Description() const
166 {
167 std::ostringstream desc;
168 desc << "Add (Seq, Baseline, " << std::setw(7) << std::setfill('0') << this->ArraySize << ")";
169 return desc.str();
170 }
171 };
172 MAKE_ATOMIC_BENCHMARKS(AddSeqBase, BenchAddSeqBaseline);
173
174 // Benchmarks AtomicArray::Add such that each work index writes to a strided
175 // index ( floor(i / stride) + stride * (i % stride)
176 template <typename ValueType>
177 struct BenchAddStride
178 {
179 vtkm::Id ArraySize;
180 vtkm::Id Stride;
181 vtkm::cont::ArrayHandle<ValueType> Data;
182
183 template <typename PortalType>
184 struct Worker : public vtkm::exec::FunctorBase
185 {
186 vtkm::Id ArraySize;
187 vtkm::Id Stride;
188 PortalType Portal;
189
190 VTKM_CONT
Workervtkm::benchmarking::BenchmarkAtomicArray::BenchAddStride::Worker191 Worker(vtkm::Id arraySize, vtkm::Id stride, PortalType portal)
192 : ArraySize(arraySize)
193 , Stride(stride)
194 , Portal(portal)
195 {
196 }
197
198 VTKM_EXEC
operator ()vtkm::benchmarking::BenchmarkAtomicArray::BenchAddStride::Worker199 void operator()(vtkm::Id i) const
200 {
201 vtkm::Id idx = (i / this->Stride + this->Stride * (i % this->Stride)) % this->ArraySize;
202 this->Portal.Add(idx % this->ArraySize, 1);
203 }
204 };
205
BenchAddStridevtkm::benchmarking::BenchmarkAtomicArray::BenchAddStride206 BenchAddStride(vtkm::Id arraySize, vtkm::Id stride = 32)
207 : ArraySize(arraySize)
208 , Stride(stride)
209 {
210 this->Data.PrepareForOutput(this->ArraySize, Device{});
211 }
212
213 VTKM_CONT
operator ()vtkm::benchmarking::BenchmarkAtomicArray::BenchAddStride214 vtkm::Float64 operator()()
215 {
216 vtkm::cont::AtomicArray<ValueType> array(this->Data);
217 auto portal = array.PrepareForExecution(Device{});
218 Worker<decltype(portal)> worker{ this->ArraySize, this->Stride, portal };
219
220 Timer timer;
221 Algo::Schedule(worker, NumWrites);
222 return timer.GetElapsedTime();
223 }
224
225 VTKM_CONT
Descriptionvtkm::benchmarking::BenchmarkAtomicArray::BenchAddStride226 std::string Description() const
227 {
228 std::ostringstream desc;
229 desc << "Add (Stride=" << this->Stride << ", Atomic, " << std::setw(7) << std::setfill('0')
230 << this->ArraySize << ")";
231 return desc.str();
232 }
233 };
234 MAKE_ATOMIC_BENCHMARKS(AddStride, BenchAddStride);
235
236 // Non-atomic baseline for AddStride
237 template <typename ValueType>
238 struct BenchAddStrideBaseline
239 {
240 vtkm::Id ArraySize;
241 vtkm::Id Stride;
242 vtkm::cont::ArrayHandle<ValueType> Data;
243
244 template <typename PortalType>
245 struct Worker : public vtkm::exec::FunctorBase
246 {
247 vtkm::Id ArraySize;
248 vtkm::Id Stride;
249 PortalType Portal;
250
251 VTKM_CONT
Workervtkm::benchmarking::BenchmarkAtomicArray::BenchAddStrideBaseline::Worker252 Worker(vtkm::Id arraySize, vtkm::Id stride, PortalType portal)
253 : ArraySize(arraySize)
254 , Stride(stride)
255 , Portal(portal)
256 {
257 }
258
259 VTKM_EXEC
operator ()vtkm::benchmarking::BenchmarkAtomicArray::BenchAddStrideBaseline::Worker260 void operator()(vtkm::Id i) const
261 {
262 vtkm::Id idx = (i / this->Stride + this->Stride * (i % this->Stride)) % this->ArraySize;
263 this->Portal.Set(idx, this->Portal.Get(idx) + 1);
264 }
265 };
266
BenchAddStrideBaselinevtkm::benchmarking::BenchmarkAtomicArray::BenchAddStrideBaseline267 BenchAddStrideBaseline(vtkm::Id arraySize, vtkm::Id stride = 32)
268 : ArraySize(arraySize)
269 , Stride(stride)
270 {
271 }
272
273 VTKM_CONT
operator ()vtkm::benchmarking::BenchmarkAtomicArray::BenchAddStrideBaseline274 vtkm::Float64 operator()()
275 {
276 auto portal = this->Data.PrepareForOutput(this->ArraySize, Device{});
277 Worker<decltype(portal)> worker{ this->ArraySize, this->Stride, portal };
278
279 Timer timer;
280 Algo::Schedule(worker, NumWrites);
281 return timer.GetElapsedTime();
282 }
283
284 VTKM_CONT
Descriptionvtkm::benchmarking::BenchmarkAtomicArray::BenchAddStrideBaseline285 std::string Description() const
286 {
287 std::ostringstream desc;
288 desc << "Add (Stride=" << this->Stride << ", Baseline, " << std::setw(7) << std::setfill('0')
289 << this->ArraySize << ")";
290 return desc.str();
291 }
292 };
293 MAKE_ATOMIC_BENCHMARKS(AddStrideBase, BenchAddStrideBaseline);
294
295 // Benchmarks AtomicArray::CompareAndSwap such that each work index writes to adjacent
296 // indices.
297 template <typename ValueType>
298 struct BenchCASSeq
299 {
300 vtkm::Id ArraySize;
301 vtkm::cont::ArrayHandle<ValueType> Data;
302
303 template <typename PortalType>
304 struct Worker : public vtkm::exec::FunctorBase
305 {
306 vtkm::Id ArraySize;
307 PortalType Portal;
308
309 VTKM_CONT
Workervtkm::benchmarking::BenchmarkAtomicArray::BenchCASSeq::Worker310 Worker(vtkm::Id arraySize, PortalType portal)
311 : ArraySize(arraySize)
312 , Portal(portal)
313 {
314 }
315
316 VTKM_EXEC
operator ()vtkm::benchmarking::BenchmarkAtomicArray::BenchCASSeq::Worker317 void operator()(vtkm::Id i) const
318 {
319 vtkm::Id idx = i % this->ArraySize;
320 ValueType val = static_cast<ValueType>(i);
321 // Get the old val with a no-op
322 ValueType oldVal = this->Portal.Add(idx, static_cast<ValueType>(0));
323 ValueType assumed = static_cast<ValueType>(0);
324 do
325 {
326 assumed = oldVal;
327 oldVal = this->Portal.CompareAndSwap(idx, assumed + val, assumed);
328 } while (assumed != oldVal);
329 }
330 };
331
BenchCASSeqvtkm::benchmarking::BenchmarkAtomicArray::BenchCASSeq332 BenchCASSeq(vtkm::Id arraySize)
333 : ArraySize(arraySize)
334 {
335 this->Data.PrepareForOutput(this->ArraySize, Device{});
336 }
337
338 VTKM_CONT
operator ()vtkm::benchmarking::BenchmarkAtomicArray::BenchCASSeq339 vtkm::Float64 operator()()
340 {
341 vtkm::cont::AtomicArray<ValueType> array(this->Data);
342 auto portal = array.PrepareForExecution(Device{});
343 Worker<decltype(portal)> worker{ this->ArraySize, portal };
344
345 Timer timer;
346 Algo::Schedule(worker, NumWrites);
347 return timer.GetElapsedTime();
348 }
349
350 VTKM_CONT
Descriptionvtkm::benchmarking::BenchmarkAtomicArray::BenchCASSeq351 std::string Description() const
352 {
353 std::ostringstream desc;
354 desc << "CAS (Seq, Atomic, " << std::setw(7) << std::setfill('0') << this->ArraySize << ")";
355 return desc.str();
356 }
357 };
358 MAKE_ATOMIC_BENCHMARKS(CASSeq, BenchCASSeq);
359
360 // Provides a non-atomic baseline for BenchCASSeq
361 template <typename ValueType>
362 struct BenchCASSeqBaseline
363 {
364 vtkm::Id ArraySize;
365 vtkm::cont::ArrayHandle<ValueType> Data;
366
367 template <typename PortalType>
368 struct Worker : public vtkm::exec::FunctorBase
369 {
370 vtkm::Id ArraySize;
371 PortalType Portal;
372
373 VTKM_CONT
Workervtkm::benchmarking::BenchmarkAtomicArray::BenchCASSeqBaseline::Worker374 Worker(vtkm::Id arraySize, PortalType portal)
375 : ArraySize(arraySize)
376 , Portal(portal)
377 {
378 }
379
380 VTKM_EXEC
operator ()vtkm::benchmarking::BenchmarkAtomicArray::BenchCASSeqBaseline::Worker381 void operator()(vtkm::Id i) const
382 {
383 vtkm::Id idx = i % this->ArraySize;
384 ValueType val = static_cast<ValueType>(i);
385 ValueType oldVal = this->Portal.Get(idx);
386 this->Portal.Set(idx, oldVal + val);
387 }
388 };
389
BenchCASSeqBaselinevtkm::benchmarking::BenchmarkAtomicArray::BenchCASSeqBaseline390 BenchCASSeqBaseline(vtkm::Id arraySize)
391 : ArraySize(arraySize)
392 {
393 }
394
395 VTKM_CONT
operator ()vtkm::benchmarking::BenchmarkAtomicArray::BenchCASSeqBaseline396 vtkm::Float64 operator()()
397 {
398 auto portal = this->Data.PrepareForOutput(this->ArraySize, Device{});
399 Worker<decltype(portal)> worker{ this->ArraySize, portal };
400
401 Timer timer;
402 Algo::Schedule(worker, NumWrites);
403 return timer.GetElapsedTime();
404 }
405
406 VTKM_CONT
Descriptionvtkm::benchmarking::BenchmarkAtomicArray::BenchCASSeqBaseline407 std::string Description() const
408 {
409 std::ostringstream desc;
410 desc << "CAS (Seq, Baseline, " << std::setw(7) << std::setfill('0') << this->ArraySize << ")";
411 return desc.str();
412 }
413 };
414 MAKE_ATOMIC_BENCHMARKS(CASSeqBase, BenchCASSeqBaseline);
415
416 // Benchmarks AtomicArray::CompareAndSwap such that each work index writes to
417 // a strided index:
418 // ( floor(i / stride) + stride * (i % stride)
419 template <typename ValueType>
420 struct BenchCASStride
421 {
422 vtkm::Id ArraySize;
423 vtkm::Id Stride;
424 vtkm::cont::ArrayHandle<ValueType> Data;
425
426 template <typename PortalType>
427 struct Worker : public vtkm::exec::FunctorBase
428 {
429 vtkm::Id ArraySize;
430 vtkm::Id Stride;
431 PortalType Portal;
432
433 VTKM_CONT
Workervtkm::benchmarking::BenchmarkAtomicArray::BenchCASStride::Worker434 Worker(vtkm::Id arraySize, vtkm::Id stride, PortalType portal)
435 : ArraySize(arraySize)
436 , Stride(stride)
437 , Portal(portal)
438 {
439 }
440
441 VTKM_EXEC
operator ()vtkm::benchmarking::BenchmarkAtomicArray::BenchCASStride::Worker442 void operator()(vtkm::Id i) const
443 {
444 vtkm::Id idx = (i / this->Stride + this->Stride * (i % this->Stride)) % this->ArraySize;
445 ValueType val = static_cast<ValueType>(i);
446 // Get the old val with a no-op
447 ValueType oldVal = this->Portal.Add(idx, static_cast<ValueType>(0));
448 ValueType assumed = static_cast<ValueType>(0);
449 do
450 {
451 assumed = oldVal;
452 oldVal = this->Portal.CompareAndSwap(idx, assumed + val, assumed);
453 } while (assumed != oldVal);
454 }
455 };
456
BenchCASStridevtkm::benchmarking::BenchmarkAtomicArray::BenchCASStride457 BenchCASStride(vtkm::Id arraySize, vtkm::Id stride = 32)
458 : ArraySize(arraySize)
459 , Stride(stride)
460 {
461 this->Data.PrepareForOutput(this->ArraySize, Device{});
462 }
463
464 VTKM_CONT
operator ()vtkm::benchmarking::BenchmarkAtomicArray::BenchCASStride465 vtkm::Float64 operator()()
466 {
467 vtkm::cont::AtomicArray<ValueType> array(this->Data);
468 auto portal = array.PrepareForExecution(Device{});
469 Worker<decltype(portal)> worker{ this->ArraySize, this->Stride, portal };
470
471 Timer timer;
472 Algo::Schedule(worker, NumWrites);
473 return timer.GetElapsedTime();
474 }
475
476 VTKM_CONT
Descriptionvtkm::benchmarking::BenchmarkAtomicArray::BenchCASStride477 std::string Description() const
478 {
479 std::ostringstream desc;
480 desc << "CAS (Stride=" << this->Stride << ", Atomic, " << std::setw(7) << std::setfill('0')
481 << this->ArraySize << ")";
482 return desc.str();
483 }
484 };
485 MAKE_ATOMIC_BENCHMARKS(CASStride, BenchCASStride);
486
487 // Non-atomic baseline for CASStride
488 template <typename ValueType>
489 struct BenchCASStrideBaseline
490 {
491 vtkm::Id ArraySize;
492 vtkm::Id Stride;
493 vtkm::cont::ArrayHandle<ValueType> Data;
494
495 template <typename PortalType>
496 struct Worker : public vtkm::exec::FunctorBase
497 {
498 vtkm::Id ArraySize;
499 vtkm::Id Stride;
500 PortalType Portal;
501
502 VTKM_CONT
Workervtkm::benchmarking::BenchmarkAtomicArray::BenchCASStrideBaseline::Worker503 Worker(vtkm::Id arraySize, vtkm::Id stride, PortalType portal)
504 : ArraySize(arraySize)
505 , Stride(stride)
506 , Portal(portal)
507 {
508 }
509
510 VTKM_EXEC
operator ()vtkm::benchmarking::BenchmarkAtomicArray::BenchCASStrideBaseline::Worker511 void operator()(vtkm::Id i) const
512 {
513 vtkm::Id idx = (i / this->Stride + this->Stride * (i % this->Stride)) % this->ArraySize;
514 ValueType val = static_cast<ValueType>(i);
515 ValueType oldVal = this->Portal.Get(idx);
516 this->Portal.Set(idx, oldVal + val);
517 }
518 };
519
BenchCASStrideBaselinevtkm::benchmarking::BenchmarkAtomicArray::BenchCASStrideBaseline520 BenchCASStrideBaseline(vtkm::Id arraySize, vtkm::Id stride = 32)
521 : ArraySize(arraySize)
522 , Stride(stride)
523 {
524 }
525
526 VTKM_CONT
operator ()vtkm::benchmarking::BenchmarkAtomicArray::BenchCASStrideBaseline527 vtkm::Float64 operator()()
528 {
529 auto portal = this->Data.PrepareForOutput(this->ArraySize, Device{});
530 Worker<decltype(portal)> worker{ this->ArraySize, this->Stride, portal };
531
532 Timer timer;
533 Algo::Schedule(worker, NumWrites);
534 return timer.GetElapsedTime();
535 }
536
537 VTKM_CONT
Descriptionvtkm::benchmarking::BenchmarkAtomicArray::BenchCASStrideBaseline538 std::string Description() const
539 {
540 std::ostringstream desc;
541 desc << "CAS (Stride=" << this->Stride << ", Baseline, " << std::setw(7) << std::setfill('0')
542 << this->ArraySize << ")";
543 return desc.str();
544 }
545 };
546 MAKE_ATOMIC_BENCHMARKS(CASStrideBase, BenchCASStrideBaseline);
547
Run()548 static void Run()
549 {
550 RUN_ATOMIC_BENCHMARKS(AddSeq);
551 RUN_ATOMIC_BENCHMARKS(AddSeqBase);
552 RUN_ATOMIC_BENCHMARKS(AddStride);
553 RUN_ATOMIC_BENCHMARKS(AddStrideBase);
554
555 RUN_ATOMIC_BENCHMARKS(CASSeq);
556 RUN_ATOMIC_BENCHMARKS(CASSeqBase);
557 RUN_ATOMIC_BENCHMARKS(CASStride);
558 RUN_ATOMIC_BENCHMARKS(CASStrideBase);
559 }
560 };
561 }
562 } // end namespace vtkm::benchmarking
563
main(int,char * [])564 int main(int, char* [])
565 {
566 using Device = VTKM_DEFAULT_DEVICE_ADAPTER_TAG;
567 auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker();
568 tracker.ForceDevice(Device{});
569
570 try
571 {
572 vtkm::benchmarking::BenchmarkAtomicArray<Device>::Run();
573 }
574 catch (std::exception& e)
575 {
576 std::cerr << "Benchmark encountered an exception: " << e.what() << "\n";
577 return 1;
578 }
579
580 return 0;
581 }
582