1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 // Kokkos v. 3.0
6 // Copyright (2020) National Technology & Engineering
7 // Solutions of Sandia, LLC (NTESS).
8 //
9 // Under the terms of Contract DE-NA0003525 with NTESS,
10 // the U.S. Government retains certain rights in this software.
11 //
12 // Redistribution and use in source and binary forms, with or without
13 // modification, are permitted provided that the following conditions are
14 // met:
15 //
16 // 1. Redistributions of source code must retain the above copyright
17 // notice, this list of conditions and the following disclaimer.
18 //
19 // 2. Redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution.
22 //
23 // 3. Neither the name of the Corporation nor the names of the
24 // contributors may be used to endorse or promote products derived from
25 // this software without specific prior written permission.
26 //
27 // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
28 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
31 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 //
39 // Questions? Contact Christian R. Trott (crtrott@sandia.gov)
40 //
41 // ************************************************************************
42 //@HEADER
43 */
44
45 #include <stdexcept>
46 #include <sstream>
47 #include <iostream>
48 #include <limits>
49
50 #include <Kokkos_Core.hpp>
51
52 namespace Test {
53
54 struct ReducerTag {};
55
56 template <typename ScalarType, class DeviceType>
57 class ReduceFunctor {
58 public:
59 using execution_space = DeviceType;
60 using size_type = typename execution_space::size_type;
61
62 struct value_type {
63 ScalarType value[3];
64 };
65
66 const size_type nwork;
67
68 KOKKOS_INLINE_FUNCTION
ReduceFunctor(const size_type & arg_nwork)69 ReduceFunctor(const size_type& arg_nwork) : nwork(arg_nwork) {}
70
71 KOKKOS_INLINE_FUNCTION
ReduceFunctor(const ReduceFunctor & rhs)72 ReduceFunctor(const ReduceFunctor& rhs) : nwork(rhs.nwork) {}
73
74 /*
75 KOKKOS_INLINE_FUNCTION
76 void init( value_type & dst ) const
77 {
78 dst.value[0] = 0;
79 dst.value[1] = 0;
80 dst.value[2] = 0;
81 }
82 */
83
84 KOKKOS_INLINE_FUNCTION
join(volatile value_type & dst,const volatile value_type & src) const85 void join(volatile value_type& dst, const volatile value_type& src) const {
86 dst.value[0] += src.value[0];
87 dst.value[1] += src.value[1];
88 dst.value[2] += src.value[2];
89 }
90
91 KOKKOS_INLINE_FUNCTION
operator ()(size_type iwork,value_type & dst) const92 void operator()(size_type iwork, value_type& dst) const {
93 dst.value[0] += 1;
94 dst.value[1] += iwork + 1;
95 dst.value[2] += nwork - iwork;
96 }
97 };
98
99 template <class DeviceType>
100 class ReduceFunctorFinal : public ReduceFunctor<int64_t, DeviceType> {
101 public:
102 using value_type = typename ReduceFunctor<int64_t, DeviceType>::value_type;
103
104 KOKKOS_INLINE_FUNCTION
ReduceFunctorFinal(const size_t n)105 ReduceFunctorFinal(const size_t n) : ReduceFunctor<int64_t, DeviceType>(n) {}
106
107 KOKKOS_INLINE_FUNCTION
final(value_type & dst) const108 void final(value_type& dst) const {
109 dst.value[0] = -dst.value[0];
110 dst.value[1] = -dst.value[1];
111 dst.value[2] = -dst.value[2];
112 }
113 };
114
115 template <class DeviceType>
116 class ReduceFunctorFinalTag {
117 public:
118 using execution_space = DeviceType;
119 using size_type = typename execution_space::size_type;
120 using ScalarType = int64_t;
121
122 struct value_type {
123 ScalarType value[3];
124 };
125
126 const size_type nwork;
127
128 KOKKOS_INLINE_FUNCTION
ReduceFunctorFinalTag(const size_type arg_nwork)129 ReduceFunctorFinalTag(const size_type arg_nwork) : nwork(arg_nwork) {}
130
131 KOKKOS_INLINE_FUNCTION
join(const ReducerTag,volatile value_type & dst,const volatile value_type & src) const132 void join(const ReducerTag, volatile value_type& dst,
133 const volatile value_type& src) const {
134 dst.value[0] += src.value[0];
135 dst.value[1] += src.value[1];
136 dst.value[2] += src.value[2];
137 }
138
139 KOKKOS_INLINE_FUNCTION
operator ()(const ReducerTag,size_type iwork,value_type & dst) const140 void operator()(const ReducerTag, size_type iwork, value_type& dst) const {
141 dst.value[0] -= 1;
142 dst.value[1] -= iwork + 1;
143 dst.value[2] -= nwork - iwork;
144 }
145
146 KOKKOS_INLINE_FUNCTION
final(const ReducerTag,value_type & dst) const147 void final(const ReducerTag, value_type& dst) const {
148 ++dst.value[0];
149 ++dst.value[1];
150 ++dst.value[2];
151 }
152 };
153
154 template <typename ScalarType, class DeviceType>
155 class RuntimeReduceFunctor {
156 public:
157 // Required for functor:
158 using execution_space = DeviceType;
159 using value_type = ScalarType[];
160 const unsigned value_count;
161
162 // Unit test details:
163
164 using size_type = typename execution_space::size_type;
165
166 const size_type nwork;
167
RuntimeReduceFunctor(const size_type arg_nwork,const size_type arg_count)168 RuntimeReduceFunctor(const size_type arg_nwork, const size_type arg_count)
169 : value_count(arg_count), nwork(arg_nwork) {}
170
171 KOKKOS_INLINE_FUNCTION
init(ScalarType dst[]) const172 void init(ScalarType dst[]) const {
173 for (unsigned i = 0; i < value_count; ++i) dst[i] = 0;
174 }
175
176 KOKKOS_INLINE_FUNCTION
join(volatile ScalarType dst[],const volatile ScalarType src[]) const177 void join(volatile ScalarType dst[], const volatile ScalarType src[]) const {
178 for (unsigned i = 0; i < value_count; ++i) dst[i] += src[i];
179 }
180
181 KOKKOS_INLINE_FUNCTION
operator ()(size_type iwork,ScalarType dst[]) const182 void operator()(size_type iwork, ScalarType dst[]) const {
183 const size_type tmp[3] = {1, iwork + 1, nwork - iwork};
184
185 for (size_type i = 0; i < static_cast<size_type>(value_count); ++i) {
186 dst[i] += tmp[i % 3];
187 }
188 }
189 };
190
191 template <typename ScalarType, class DeviceType>
192 class RuntimeReduceMinMax {
193 public:
194 // Required for functor:
195 using execution_space = DeviceType;
196 using value_type = ScalarType[];
197 const unsigned value_count;
198
199 // Unit test details:
200
201 using size_type = typename execution_space::size_type;
202
203 const size_type nwork;
204 const ScalarType amin;
205 const ScalarType amax;
206
RuntimeReduceMinMax(const size_type arg_nwork,const size_type arg_count)207 RuntimeReduceMinMax(const size_type arg_nwork, const size_type arg_count)
208 : value_count(arg_count),
209 nwork(arg_nwork),
210 amin(std::numeric_limits<ScalarType>::min()),
211 amax(std::numeric_limits<ScalarType>::max()) {}
212
213 KOKKOS_INLINE_FUNCTION
init(ScalarType dst[]) const214 void init(ScalarType dst[]) const {
215 for (unsigned i = 0; i < value_count; ++i) {
216 dst[i] = i % 2 ? amax : amin;
217 }
218 }
219
220 KOKKOS_INLINE_FUNCTION
join(volatile ScalarType dst[],const volatile ScalarType src[]) const221 void join(volatile ScalarType dst[], const volatile ScalarType src[]) const {
222 for (unsigned i = 0; i < value_count; ++i) {
223 dst[i] = i % 2 ? (dst[i] < src[i] ? dst[i] : src[i]) // min
224 : (dst[i] > src[i] ? dst[i] : src[i]); // max
225 }
226 }
227
228 KOKKOS_INLINE_FUNCTION
operator ()(size_type iwork,ScalarType dst[]) const229 void operator()(size_type iwork, ScalarType dst[]) const {
230 const ScalarType tmp[2] = {ScalarType(iwork + 1),
231 ScalarType(nwork - iwork)};
232
233 for (size_type i = 0; i < static_cast<size_type>(value_count); ++i) {
234 dst[i] = i % 2 ? (dst[i] < tmp[i % 2] ? dst[i] : tmp[i % 2])
235 : (dst[i] > tmp[i % 2] ? dst[i] : tmp[i % 2]);
236 }
237 }
238 };
239
240 template <class DeviceType>
241 class RuntimeReduceFunctorFinal
242 : public RuntimeReduceFunctor<int64_t, DeviceType> {
243 public:
244 using base_type = RuntimeReduceFunctor<int64_t, DeviceType>;
245 using value_type = typename base_type::value_type;
246 using scalar_type = int64_t;
247
RuntimeReduceFunctorFinal(const size_t theNwork,const size_t count)248 RuntimeReduceFunctorFinal(const size_t theNwork, const size_t count)
249 : base_type(theNwork, count) {}
250
251 KOKKOS_INLINE_FUNCTION
final(value_type dst) const252 void final(value_type dst) const {
253 for (unsigned i = 0; i < base_type::value_count; ++i) {
254 dst[i] = -dst[i];
255 }
256 }
257 };
258
259 template <class ValueType, class DeviceType>
260 class CombinedReduceFunctorSameType {
261 public:
262 using execution_space = typename DeviceType::execution_space;
263 using size_type = typename execution_space::size_type;
264
265 const size_type nwork;
266
267 KOKKOS_INLINE_FUNCTION
CombinedReduceFunctorSameType(const size_type & arg_nwork)268 constexpr explicit CombinedReduceFunctorSameType(const size_type& arg_nwork)
269 : nwork(arg_nwork) {}
270
271 KOKKOS_DEFAULTED_FUNCTION
272 constexpr CombinedReduceFunctorSameType(
273 const CombinedReduceFunctorSameType& rhs) = default;
274
275 KOKKOS_INLINE_FUNCTION
operator ()(size_type iwork,ValueType & dst1,ValueType & dst2,ValueType & dst3) const276 void operator()(size_type iwork, ValueType& dst1, ValueType& dst2,
277 ValueType& dst3) const {
278 dst1 += 1;
279 dst2 += iwork + 1;
280 dst3 += nwork - iwork;
281 }
282
283 KOKKOS_INLINE_FUNCTION
operator ()(size_type iwork,size_type always_zero_1,size_type always_zero_2,ValueType & dst1,ValueType & dst2,ValueType & dst3) const284 void operator()(size_type iwork, size_type always_zero_1,
285 size_type always_zero_2, ValueType& dst1, ValueType& dst2,
286 ValueType& dst3) const {
287 dst1 += 1 + always_zero_1;
288 dst2 += iwork + 1 + always_zero_2;
289 dst3 += nwork - iwork;
290 }
291 };
292
293 namespace {
294
295 template <typename ScalarType, class DeviceType>
296 class TestReduce {
297 public:
298 using execution_space = DeviceType;
299 using size_type = typename execution_space::size_type;
300
TestReduce(const size_type & nwork)301 TestReduce(const size_type& nwork) {
302 run_test(nwork);
303 run_test_final(nwork);
304 run_test_final_tag(nwork);
305 }
306
run_test(const size_type & nwork)307 void run_test(const size_type& nwork) {
308 using functor_type = Test::ReduceFunctor<ScalarType, execution_space>;
309 using value_type = typename functor_type::value_type;
310
311 enum { Count = 3 };
312 enum { Repeat = 100 };
313
314 value_type result[Repeat];
315
316 const uint64_t nw = nwork;
317 const uint64_t nsum = nw % 2 ? nw * ((nw + 1) / 2) : (nw / 2) * (nw + 1);
318
319 for (unsigned i = 0; i < Repeat; ++i) {
320 Kokkos::parallel_reduce(nwork, functor_type(nwork), result[i]);
321 }
322
323 for (unsigned i = 0; i < Repeat; ++i) {
324 for (unsigned j = 0; j < Count; ++j) {
325 const uint64_t correct = 0 == j % 3 ? nw : nsum;
326 ASSERT_EQ((ScalarType)correct, result[i].value[j]);
327 }
328 }
329 }
330
run_test_final(const size_type & nwork)331 void run_test_final(const size_type& nwork) {
332 using functor_type = Test::ReduceFunctorFinal<execution_space>;
333 using value_type = typename functor_type::value_type;
334
335 enum { Count = 3 };
336 enum { Repeat = 100 };
337
338 value_type result[Repeat];
339
340 const uint64_t nw = nwork;
341 const uint64_t nsum = nw % 2 ? nw * ((nw + 1) / 2) : (nw / 2) * (nw + 1);
342
343 for (unsigned i = 0; i < Repeat; ++i) {
344 if (i % 2 == 0) {
345 Kokkos::parallel_reduce(nwork, functor_type(nwork), result[i]);
346 } else {
347 Kokkos::parallel_reduce("Reduce", nwork, functor_type(nwork),
348 result[i]);
349 }
350 }
351
352 for (unsigned i = 0; i < Repeat; ++i) {
353 for (unsigned j = 0; j < Count; ++j) {
354 const uint64_t correct = 0 == j % 3 ? nw : nsum;
355 ASSERT_EQ((ScalarType)correct, -result[i].value[j]);
356 }
357 }
358 }
359
run_test_final_tag(const size_type & nwork)360 void run_test_final_tag(const size_type& nwork) {
361 using functor_type = Test::ReduceFunctorFinalTag<execution_space>;
362 using value_type = typename functor_type::value_type;
363
364 enum { Count = 3 };
365 enum { Repeat = 100 };
366
367 value_type result[Repeat];
368
369 const uint64_t nw = nwork;
370 const uint64_t nsum = nw % 2 ? nw * ((nw + 1) / 2) : (nw / 2) * (nw + 1);
371
372 for (unsigned i = 0; i < Repeat; ++i) {
373 if (i % 2 == 0) {
374 Kokkos::parallel_reduce(
375 Kokkos::RangePolicy<execution_space, ReducerTag>(0, nwork),
376 functor_type(nwork), result[i]);
377 } else {
378 Kokkos::parallel_reduce(
379 "Reduce",
380 Kokkos::RangePolicy<execution_space, ReducerTag>(0, nwork),
381 functor_type(nwork), result[i]);
382 }
383 }
384
385 for (unsigned i = 0; i < Repeat; ++i) {
386 for (unsigned j = 0; j < Count; ++j) {
387 const uint64_t correct = 0 == j % 3 ? nw : nsum;
388 ASSERT_EQ((ScalarType)correct, 1 - result[i].value[j]);
389 }
390 }
391 }
392 };
393
394 template <typename ScalarType, class DeviceType>
395 class TestReduceDynamic {
396 public:
397 using execution_space = DeviceType;
398 using size_type = typename execution_space::size_type;
399
TestReduceDynamic(const size_type nwork)400 TestReduceDynamic(const size_type nwork) {
401 run_test_dynamic(nwork);
402 run_test_dynamic_minmax(nwork);
403 run_test_dynamic_final(nwork);
404 }
405
run_test_dynamic(const size_type nwork)406 void run_test_dynamic(const size_type nwork) {
407 using functor_type =
408 Test::RuntimeReduceFunctor<ScalarType, execution_space>;
409
410 enum { Count = 3 };
411 enum { Repeat = 100 };
412
413 ScalarType result[Repeat][Count];
414
415 const uint64_t nw = nwork;
416 const uint64_t nsum = nw % 2 ? nw * ((nw + 1) / 2) : (nw / 2) * (nw + 1);
417
418 for (unsigned i = 0; i < Repeat; ++i) {
419 if (i % 2 == 0) {
420 Kokkos::parallel_reduce(nwork, functor_type(nwork, Count), result[i]);
421 } else {
422 Kokkos::parallel_reduce("Reduce", nwork, functor_type(nwork, Count),
423 result[i]);
424 }
425 }
426
427 for (unsigned i = 0; i < Repeat; ++i) {
428 for (unsigned j = 0; j < Count; ++j) {
429 const uint64_t correct = 0 == j % 3 ? nw : nsum;
430 ASSERT_EQ((ScalarType)correct, result[i][j]);
431 }
432 }
433 }
434
run_test_dynamic_minmax(const size_type nwork)435 void run_test_dynamic_minmax(const size_type nwork) {
436 using functor_type = Test::RuntimeReduceMinMax<ScalarType, execution_space>;
437
438 enum { Count = 2 };
439 enum { Repeat = 100 };
440
441 ScalarType result[Repeat][Count];
442
443 for (unsigned i = 0; i < Repeat; ++i) {
444 if (i % 2 == 0) {
445 Kokkos::parallel_reduce(nwork, functor_type(nwork, Count), result[i]);
446 } else {
447 Kokkos::parallel_reduce("Reduce", nwork, functor_type(nwork, Count),
448 result[i]);
449 }
450 }
451
452 for (unsigned i = 0; i < Repeat; ++i) {
453 for (unsigned j = 0; j < Count; ++j) {
454 if (nwork == 0) {
455 ScalarType amin(std::numeric_limits<ScalarType>::min());
456 ScalarType amax(std::numeric_limits<ScalarType>::max());
457 const ScalarType correct = (j % 2) ? amax : amin;
458 ASSERT_EQ((ScalarType)correct, result[i][j]);
459 } else {
460 const uint64_t correct = j % 2 ? 1 : nwork;
461 ASSERT_EQ((ScalarType)correct, result[i][j]);
462 }
463 }
464 }
465 }
466
run_test_dynamic_final(const size_type nwork)467 void run_test_dynamic_final(const size_type nwork) {
468 using functor_type = Test::RuntimeReduceFunctorFinal<execution_space>;
469
470 enum { Count = 3 };
471 enum { Repeat = 100 };
472
473 typename functor_type::scalar_type result[Repeat][Count];
474
475 const uint64_t nw = nwork;
476 const uint64_t nsum = nw % 2 ? nw * ((nw + 1) / 2) : (nw / 2) * (nw + 1);
477
478 for (unsigned i = 0; i < Repeat; ++i) {
479 if (i % 2 == 0) {
480 Kokkos::parallel_reduce(nwork, functor_type(nwork, Count), result[i]);
481 } else {
482 Kokkos::parallel_reduce("TestKernelReduce", nwork,
483 functor_type(nwork, Count), result[i]);
484 }
485 }
486
487 for (unsigned i = 0; i < Repeat; ++i) {
488 for (unsigned j = 0; j < Count; ++j) {
489 const uint64_t correct = 0 == j % 3 ? nw : nsum;
490 ASSERT_EQ((ScalarType)correct, -result[i][j]);
491 }
492 }
493 }
494 };
495
496 template <typename ScalarType, class DeviceType>
497 class TestReduceDynamicView {
498 public:
499 using execution_space = DeviceType;
500 using size_type = typename execution_space::size_type;
501
TestReduceDynamicView(const size_type nwork)502 TestReduceDynamicView(const size_type nwork) { run_test_dynamic_view(nwork); }
503
run_test_dynamic_view(const size_type nwork)504 void run_test_dynamic_view(const size_type nwork) {
505 using functor_type =
506 Test::RuntimeReduceFunctor<ScalarType, execution_space>;
507
508 using result_type = Kokkos::View<ScalarType*, DeviceType>;
509 using result_host_type = typename result_type::HostMirror;
510
511 const unsigned CountLimit = 23;
512
513 const uint64_t nw = nwork;
514 const uint64_t nsum = nw % 2 ? nw * ((nw + 1) / 2) : (nw / 2) * (nw + 1);
515
516 for (unsigned count = 0; count < CountLimit; ++count) {
517 result_type result("result", count);
518 result_host_type host_result = Kokkos::create_mirror(result);
519
520 // Test result to host pointer:
521
522 std::string str("TestKernelReduce");
523 if (count % 2 == 0) {
524 Kokkos::parallel_reduce(nw, functor_type(nw, count),
525 host_result.data());
526 } else {
527 Kokkos::parallel_reduce(str, nw, functor_type(nw, count),
528 host_result.data());
529 }
530
531 for (unsigned j = 0; j < count; ++j) {
532 const uint64_t correct = 0 == j % 3 ? nw : nsum;
533 ASSERT_EQ(host_result(j), (ScalarType)correct);
534 host_result(j) = 0;
535 }
536 }
537 }
538 };
539
540 } // namespace
541
TEST(TEST_CATEGORY,int64_t_reduce)542 TEST(TEST_CATEGORY, int64_t_reduce) {
543 TestReduce<int64_t, TEST_EXECSPACE>(0);
544 TestReduce<int64_t, TEST_EXECSPACE>(1000000);
545 }
546
TEST(TEST_CATEGORY,double_reduce)547 TEST(TEST_CATEGORY, double_reduce) {
548 TestReduce<double, TEST_EXECSPACE>(0);
549 TestReduce<double, TEST_EXECSPACE>(1000000);
550 }
551
TEST(TEST_CATEGORY,int64_t_reduce_dynamic)552 TEST(TEST_CATEGORY, int64_t_reduce_dynamic) {
553 TestReduceDynamic<int64_t, TEST_EXECSPACE>(0);
554 TestReduceDynamic<int64_t, TEST_EXECSPACE>(1000000);
555 }
556
TEST(TEST_CATEGORY,double_reduce_dynamic)557 TEST(TEST_CATEGORY, double_reduce_dynamic) {
558 TestReduceDynamic<double, TEST_EXECSPACE>(0);
559 TestReduceDynamic<double, TEST_EXECSPACE>(1000000);
560 }
561
TEST(TEST_CATEGORY,int64_t_reduce_dynamic_view)562 TEST(TEST_CATEGORY, int64_t_reduce_dynamic_view) {
563 TestReduceDynamicView<int64_t, TEST_EXECSPACE>(0);
564 TestReduceDynamicView<int64_t, TEST_EXECSPACE>(1000000);
565 }
566
TEST(TEST_CATEGORY,int_combined_reduce)567 TEST(TEST_CATEGORY, int_combined_reduce) {
568 using functor_type = CombinedReduceFunctorSameType<int64_t, TEST_EXECSPACE>;
569 constexpr uint64_t nw = 1000;
570
571 uint64_t nsum = (nw / 2) * (nw + 1);
572
573 int64_t result1 = 0;
574 int64_t result2 = 0;
575 int64_t result3 = 0;
576
577 Kokkos::parallel_reduce("int_combined_reduce",
578 Kokkos::RangePolicy<TEST_EXECSPACE>(0, nw),
579 functor_type(nw), result1, result2, result3);
580
581 ASSERT_EQ(nw, result1);
582 ASSERT_EQ(nsum, result2);
583 ASSERT_EQ(nsum, result3);
584 }
585
TEST(TEST_CATEGORY,mdrange_combined_reduce)586 TEST(TEST_CATEGORY, mdrange_combined_reduce) {
587 using functor_type = CombinedReduceFunctorSameType<int64_t, TEST_EXECSPACE>;
588 constexpr uint64_t nw = 1000;
589
590 uint64_t nsum = (nw / 2) * (nw + 1);
591
592 int64_t result1 = 0;
593 int64_t result2 = 0;
594 int64_t result3 = 0;
595
596 Kokkos::parallel_reduce(
597 "int_combined_reduce_mdrange",
598 Kokkos::MDRangePolicy<TEST_EXECSPACE, Kokkos::Rank<3>>({{0, 0, 0}},
599 {{nw, 1, 1}}),
600 functor_type(nw), result1, result2, result3);
601
602 ASSERT_EQ(nw, result1);
603 ASSERT_EQ(nsum, result2);
604 ASSERT_EQ(nsum, result3);
605 }
606
TEST(TEST_CATEGORY,int_combined_reduce_mixed)607 TEST(TEST_CATEGORY, int_combined_reduce_mixed) {
608 using functor_type = CombinedReduceFunctorSameType<int64_t, TEST_EXECSPACE>;
609
610 constexpr uint64_t nw = 1000;
611
612 uint64_t nsum = (nw / 2) * (nw + 1);
613
614 auto result1_v = Kokkos::View<int64_t, Kokkos::HostSpace>{"result1_v"};
615
616 int64_t result2 = 0;
617
618 auto result3_v = Kokkos::View<int64_t, Kokkos::HostSpace>{"result3_v"};
619
620 Kokkos::parallel_reduce("int_combined-reduce_mixed",
621 Kokkos::RangePolicy<TEST_EXECSPACE>(0, nw),
622 functor_type(nw), result1_v, result2,
623 Kokkos::Sum<int64_t, Kokkos::HostSpace>{result3_v});
624
625 ASSERT_EQ(nw, result1_v());
626 ASSERT_EQ(nsum, result2);
627 ASSERT_EQ(nsum, result3_v());
628 }
629 } // namespace Test
630