1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 //                        Kokkos v. 3.0
6 //       Copyright (2020) National Technology & Engineering
7 //               Solutions of Sandia, LLC (NTESS).
8 //
9 // Under the terms of Contract DE-NA0003525 with NTESS,
10 // the U.S. Government retains certain rights in this software.
11 //
12 // Redistribution and use in source and binary forms, with or without
13 // modification, are permitted provided that the following conditions are
14 // met:
15 //
16 // 1. Redistributions of source code must retain the above copyright
17 // notice, this list of conditions and the following disclaimer.
18 //
19 // 2. Redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution.
22 //
23 // 3. Neither the name of the Corporation nor the names of the
24 // contributors may be used to endorse or promote products derived from
25 // this software without specific prior written permission.
26 //
27 // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
28 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
31 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 //
39 // Questions? Contact Christian R. Trott (crtrott@sandia.gov)
40 //
41 // ************************************************************************
42 //@HEADER
43 */
44 
45 #include <stdexcept>
46 #include <sstream>
47 #include <iostream>
48 #include <limits>
49 
50 #include <Kokkos_Core.hpp>
51 
52 namespace Test {
53 
54 struct ReducerTag {};
55 
56 template <typename ScalarType, class DeviceType>
57 class ReduceFunctor {
58  public:
59   using execution_space = DeviceType;
60   using size_type       = typename execution_space::size_type;
61 
62   struct value_type {
63     ScalarType value[3];
64   };
65 
66   const size_type nwork;
67 
68   KOKKOS_INLINE_FUNCTION
ReduceFunctor(const size_type & arg_nwork)69   ReduceFunctor(const size_type& arg_nwork) : nwork(arg_nwork) {}
70 
71   KOKKOS_INLINE_FUNCTION
ReduceFunctor(const ReduceFunctor & rhs)72   ReduceFunctor(const ReduceFunctor& rhs) : nwork(rhs.nwork) {}
73 
74   /*
75     KOKKOS_INLINE_FUNCTION
76     void init( value_type & dst ) const
77     {
78       dst.value[0] = 0;
79       dst.value[1] = 0;
80       dst.value[2] = 0;
81     }
82   */
83 
84   KOKKOS_INLINE_FUNCTION
join(volatile value_type & dst,const volatile value_type & src) const85   void join(volatile value_type& dst, const volatile value_type& src) const {
86     dst.value[0] += src.value[0];
87     dst.value[1] += src.value[1];
88     dst.value[2] += src.value[2];
89   }
90 
91   KOKKOS_INLINE_FUNCTION
operator ()(size_type iwork,value_type & dst) const92   void operator()(size_type iwork, value_type& dst) const {
93     dst.value[0] += 1;
94     dst.value[1] += iwork + 1;
95     dst.value[2] += nwork - iwork;
96   }
97 };
98 
99 template <class DeviceType>
100 class ReduceFunctorFinal : public ReduceFunctor<int64_t, DeviceType> {
101  public:
102   using value_type = typename ReduceFunctor<int64_t, DeviceType>::value_type;
103 
104   KOKKOS_INLINE_FUNCTION
ReduceFunctorFinal(const size_t n)105   ReduceFunctorFinal(const size_t n) : ReduceFunctor<int64_t, DeviceType>(n) {}
106 
107   KOKKOS_INLINE_FUNCTION
final(value_type & dst) const108   void final(value_type& dst) const {
109     dst.value[0] = -dst.value[0];
110     dst.value[1] = -dst.value[1];
111     dst.value[2] = -dst.value[2];
112   }
113 };
114 
115 template <class DeviceType>
116 class ReduceFunctorFinalTag {
117  public:
118   using execution_space = DeviceType;
119   using size_type       = typename execution_space::size_type;
120   using ScalarType      = int64_t;
121 
122   struct value_type {
123     ScalarType value[3];
124   };
125 
126   const size_type nwork;
127 
128   KOKKOS_INLINE_FUNCTION
ReduceFunctorFinalTag(const size_type arg_nwork)129   ReduceFunctorFinalTag(const size_type arg_nwork) : nwork(arg_nwork) {}
130 
131   KOKKOS_INLINE_FUNCTION
join(const ReducerTag,volatile value_type & dst,const volatile value_type & src) const132   void join(const ReducerTag, volatile value_type& dst,
133             const volatile value_type& src) const {
134     dst.value[0] += src.value[0];
135     dst.value[1] += src.value[1];
136     dst.value[2] += src.value[2];
137   }
138 
139   KOKKOS_INLINE_FUNCTION
operator ()(const ReducerTag,size_type iwork,value_type & dst) const140   void operator()(const ReducerTag, size_type iwork, value_type& dst) const {
141     dst.value[0] -= 1;
142     dst.value[1] -= iwork + 1;
143     dst.value[2] -= nwork - iwork;
144   }
145 
146   KOKKOS_INLINE_FUNCTION
final(const ReducerTag,value_type & dst) const147   void final(const ReducerTag, value_type& dst) const {
148     ++dst.value[0];
149     ++dst.value[1];
150     ++dst.value[2];
151   }
152 };
153 
154 template <typename ScalarType, class DeviceType>
155 class RuntimeReduceFunctor {
156  public:
157   // Required for functor:
158   using execution_space = DeviceType;
159   using value_type      = ScalarType[];
160   const unsigned value_count;
161 
162   // Unit test details:
163 
164   using size_type = typename execution_space::size_type;
165 
166   const size_type nwork;
167 
RuntimeReduceFunctor(const size_type arg_nwork,const size_type arg_count)168   RuntimeReduceFunctor(const size_type arg_nwork, const size_type arg_count)
169       : value_count(arg_count), nwork(arg_nwork) {}
170 
171   KOKKOS_INLINE_FUNCTION
init(ScalarType dst[]) const172   void init(ScalarType dst[]) const {
173     for (unsigned i = 0; i < value_count; ++i) dst[i] = 0;
174   }
175 
176   KOKKOS_INLINE_FUNCTION
join(volatile ScalarType dst[],const volatile ScalarType src[]) const177   void join(volatile ScalarType dst[], const volatile ScalarType src[]) const {
178     for (unsigned i = 0; i < value_count; ++i) dst[i] += src[i];
179   }
180 
181   KOKKOS_INLINE_FUNCTION
operator ()(size_type iwork,ScalarType dst[]) const182   void operator()(size_type iwork, ScalarType dst[]) const {
183     const size_type tmp[3] = {1, iwork + 1, nwork - iwork};
184 
185     for (size_type i = 0; i < static_cast<size_type>(value_count); ++i) {
186       dst[i] += tmp[i % 3];
187     }
188   }
189 };
190 
191 template <typename ScalarType, class DeviceType>
192 class RuntimeReduceMinMax {
193  public:
194   // Required for functor:
195   using execution_space = DeviceType;
196   using value_type      = ScalarType[];
197   const unsigned value_count;
198 
199   // Unit test details:
200 
201   using size_type = typename execution_space::size_type;
202 
203   const size_type nwork;
204   const ScalarType amin;
205   const ScalarType amax;
206 
RuntimeReduceMinMax(const size_type arg_nwork,const size_type arg_count)207   RuntimeReduceMinMax(const size_type arg_nwork, const size_type arg_count)
208       : value_count(arg_count),
209         nwork(arg_nwork),
210         amin(std::numeric_limits<ScalarType>::min()),
211         amax(std::numeric_limits<ScalarType>::max()) {}
212 
213   KOKKOS_INLINE_FUNCTION
init(ScalarType dst[]) const214   void init(ScalarType dst[]) const {
215     for (unsigned i = 0; i < value_count; ++i) {
216       dst[i] = i % 2 ? amax : amin;
217     }
218   }
219 
220   KOKKOS_INLINE_FUNCTION
join(volatile ScalarType dst[],const volatile ScalarType src[]) const221   void join(volatile ScalarType dst[], const volatile ScalarType src[]) const {
222     for (unsigned i = 0; i < value_count; ++i) {
223       dst[i] = i % 2 ? (dst[i] < src[i] ? dst[i] : src[i])   // min
224                      : (dst[i] > src[i] ? dst[i] : src[i]);  // max
225     }
226   }
227 
228   KOKKOS_INLINE_FUNCTION
operator ()(size_type iwork,ScalarType dst[]) const229   void operator()(size_type iwork, ScalarType dst[]) const {
230     const ScalarType tmp[2] = {ScalarType(iwork + 1),
231                                ScalarType(nwork - iwork)};
232 
233     for (size_type i = 0; i < static_cast<size_type>(value_count); ++i) {
234       dst[i] = i % 2 ? (dst[i] < tmp[i % 2] ? dst[i] : tmp[i % 2])
235                      : (dst[i] > tmp[i % 2] ? dst[i] : tmp[i % 2]);
236     }
237   }
238 };
239 
240 template <class DeviceType>
241 class RuntimeReduceFunctorFinal
242     : public RuntimeReduceFunctor<int64_t, DeviceType> {
243  public:
244   using base_type   = RuntimeReduceFunctor<int64_t, DeviceType>;
245   using value_type  = typename base_type::value_type;
246   using scalar_type = int64_t;
247 
RuntimeReduceFunctorFinal(const size_t theNwork,const size_t count)248   RuntimeReduceFunctorFinal(const size_t theNwork, const size_t count)
249       : base_type(theNwork, count) {}
250 
251   KOKKOS_INLINE_FUNCTION
final(value_type dst) const252   void final(value_type dst) const {
253     for (unsigned i = 0; i < base_type::value_count; ++i) {
254       dst[i] = -dst[i];
255     }
256   }
257 };
258 
259 template <class ValueType, class DeviceType>
260 class CombinedReduceFunctorSameType {
261  public:
262   using execution_space = typename DeviceType::execution_space;
263   using size_type       = typename execution_space::size_type;
264 
265   const size_type nwork;
266 
267   KOKKOS_INLINE_FUNCTION
CombinedReduceFunctorSameType(const size_type & arg_nwork)268   constexpr explicit CombinedReduceFunctorSameType(const size_type& arg_nwork)
269       : nwork(arg_nwork) {}
270 
271   KOKKOS_DEFAULTED_FUNCTION
272   constexpr CombinedReduceFunctorSameType(
273       const CombinedReduceFunctorSameType& rhs) = default;
274 
275   KOKKOS_INLINE_FUNCTION
operator ()(size_type iwork,ValueType & dst1,ValueType & dst2,ValueType & dst3) const276   void operator()(size_type iwork, ValueType& dst1, ValueType& dst2,
277                   ValueType& dst3) const {
278     dst1 += 1;
279     dst2 += iwork + 1;
280     dst3 += nwork - iwork;
281   }
282 
283   KOKKOS_INLINE_FUNCTION
operator ()(size_type iwork,size_type always_zero_1,size_type always_zero_2,ValueType & dst1,ValueType & dst2,ValueType & dst3) const284   void operator()(size_type iwork, size_type always_zero_1,
285                   size_type always_zero_2, ValueType& dst1, ValueType& dst2,
286                   ValueType& dst3) const {
287     dst1 += 1 + always_zero_1;
288     dst2 += iwork + 1 + always_zero_2;
289     dst3 += nwork - iwork;
290   }
291 };
292 
293 namespace {
294 
295 template <typename ScalarType, class DeviceType>
296 class TestReduce {
297  public:
298   using execution_space = DeviceType;
299   using size_type       = typename execution_space::size_type;
300 
TestReduce(const size_type & nwork)301   TestReduce(const size_type& nwork) {
302     run_test(nwork);
303     run_test_final(nwork);
304     run_test_final_tag(nwork);
305   }
306 
run_test(const size_type & nwork)307   void run_test(const size_type& nwork) {
308     using functor_type = Test::ReduceFunctor<ScalarType, execution_space>;
309     using value_type   = typename functor_type::value_type;
310 
311     enum { Count = 3 };
312     enum { Repeat = 100 };
313 
314     value_type result[Repeat];
315 
316     const uint64_t nw   = nwork;
317     const uint64_t nsum = nw % 2 ? nw * ((nw + 1) / 2) : (nw / 2) * (nw + 1);
318 
319     for (unsigned i = 0; i < Repeat; ++i) {
320       Kokkos::parallel_reduce(nwork, functor_type(nwork), result[i]);
321     }
322 
323     for (unsigned i = 0; i < Repeat; ++i) {
324       for (unsigned j = 0; j < Count; ++j) {
325         const uint64_t correct = 0 == j % 3 ? nw : nsum;
326         ASSERT_EQ((ScalarType)correct, result[i].value[j]);
327       }
328     }
329   }
330 
run_test_final(const size_type & nwork)331   void run_test_final(const size_type& nwork) {
332     using functor_type = Test::ReduceFunctorFinal<execution_space>;
333     using value_type   = typename functor_type::value_type;
334 
335     enum { Count = 3 };
336     enum { Repeat = 100 };
337 
338     value_type result[Repeat];
339 
340     const uint64_t nw   = nwork;
341     const uint64_t nsum = nw % 2 ? nw * ((nw + 1) / 2) : (nw / 2) * (nw + 1);
342 
343     for (unsigned i = 0; i < Repeat; ++i) {
344       if (i % 2 == 0) {
345         Kokkos::parallel_reduce(nwork, functor_type(nwork), result[i]);
346       } else {
347         Kokkos::parallel_reduce("Reduce", nwork, functor_type(nwork),
348                                 result[i]);
349       }
350     }
351 
352     for (unsigned i = 0; i < Repeat; ++i) {
353       for (unsigned j = 0; j < Count; ++j) {
354         const uint64_t correct = 0 == j % 3 ? nw : nsum;
355         ASSERT_EQ((ScalarType)correct, -result[i].value[j]);
356       }
357     }
358   }
359 
run_test_final_tag(const size_type & nwork)360   void run_test_final_tag(const size_type& nwork) {
361     using functor_type = Test::ReduceFunctorFinalTag<execution_space>;
362     using value_type   = typename functor_type::value_type;
363 
364     enum { Count = 3 };
365     enum { Repeat = 100 };
366 
367     value_type result[Repeat];
368 
369     const uint64_t nw   = nwork;
370     const uint64_t nsum = nw % 2 ? nw * ((nw + 1) / 2) : (nw / 2) * (nw + 1);
371 
372     for (unsigned i = 0; i < Repeat; ++i) {
373       if (i % 2 == 0) {
374         Kokkos::parallel_reduce(
375             Kokkos::RangePolicy<execution_space, ReducerTag>(0, nwork),
376             functor_type(nwork), result[i]);
377       } else {
378         Kokkos::parallel_reduce(
379             "Reduce",
380             Kokkos::RangePolicy<execution_space, ReducerTag>(0, nwork),
381             functor_type(nwork), result[i]);
382       }
383     }
384 
385     for (unsigned i = 0; i < Repeat; ++i) {
386       for (unsigned j = 0; j < Count; ++j) {
387         const uint64_t correct = 0 == j % 3 ? nw : nsum;
388         ASSERT_EQ((ScalarType)correct, 1 - result[i].value[j]);
389       }
390     }
391   }
392 };
393 
394 template <typename ScalarType, class DeviceType>
395 class TestReduceDynamic {
396  public:
397   using execution_space = DeviceType;
398   using size_type       = typename execution_space::size_type;
399 
TestReduceDynamic(const size_type nwork)400   TestReduceDynamic(const size_type nwork) {
401     run_test_dynamic(nwork);
402     run_test_dynamic_minmax(nwork);
403     run_test_dynamic_final(nwork);
404   }
405 
run_test_dynamic(const size_type nwork)406   void run_test_dynamic(const size_type nwork) {
407     using functor_type =
408         Test::RuntimeReduceFunctor<ScalarType, execution_space>;
409 
410     enum { Count = 3 };
411     enum { Repeat = 100 };
412 
413     ScalarType result[Repeat][Count];
414 
415     const uint64_t nw   = nwork;
416     const uint64_t nsum = nw % 2 ? nw * ((nw + 1) / 2) : (nw / 2) * (nw + 1);
417 
418     for (unsigned i = 0; i < Repeat; ++i) {
419       if (i % 2 == 0) {
420         Kokkos::parallel_reduce(nwork, functor_type(nwork, Count), result[i]);
421       } else {
422         Kokkos::parallel_reduce("Reduce", nwork, functor_type(nwork, Count),
423                                 result[i]);
424       }
425     }
426 
427     for (unsigned i = 0; i < Repeat; ++i) {
428       for (unsigned j = 0; j < Count; ++j) {
429         const uint64_t correct = 0 == j % 3 ? nw : nsum;
430         ASSERT_EQ((ScalarType)correct, result[i][j]);
431       }
432     }
433   }
434 
run_test_dynamic_minmax(const size_type nwork)435   void run_test_dynamic_minmax(const size_type nwork) {
436     using functor_type = Test::RuntimeReduceMinMax<ScalarType, execution_space>;
437 
438     enum { Count = 2 };
439     enum { Repeat = 100 };
440 
441     ScalarType result[Repeat][Count];
442 
443     for (unsigned i = 0; i < Repeat; ++i) {
444       if (i % 2 == 0) {
445         Kokkos::parallel_reduce(nwork, functor_type(nwork, Count), result[i]);
446       } else {
447         Kokkos::parallel_reduce("Reduce", nwork, functor_type(nwork, Count),
448                                 result[i]);
449       }
450     }
451 
452     for (unsigned i = 0; i < Repeat; ++i) {
453       for (unsigned j = 0; j < Count; ++j) {
454         if (nwork == 0) {
455           ScalarType amin(std::numeric_limits<ScalarType>::min());
456           ScalarType amax(std::numeric_limits<ScalarType>::max());
457           const ScalarType correct = (j % 2) ? amax : amin;
458           ASSERT_EQ((ScalarType)correct, result[i][j]);
459         } else {
460           const uint64_t correct = j % 2 ? 1 : nwork;
461           ASSERT_EQ((ScalarType)correct, result[i][j]);
462         }
463       }
464     }
465   }
466 
run_test_dynamic_final(const size_type nwork)467   void run_test_dynamic_final(const size_type nwork) {
468     using functor_type = Test::RuntimeReduceFunctorFinal<execution_space>;
469 
470     enum { Count = 3 };
471     enum { Repeat = 100 };
472 
473     typename functor_type::scalar_type result[Repeat][Count];
474 
475     const uint64_t nw   = nwork;
476     const uint64_t nsum = nw % 2 ? nw * ((nw + 1) / 2) : (nw / 2) * (nw + 1);
477 
478     for (unsigned i = 0; i < Repeat; ++i) {
479       if (i % 2 == 0) {
480         Kokkos::parallel_reduce(nwork, functor_type(nwork, Count), result[i]);
481       } else {
482         Kokkos::parallel_reduce("TestKernelReduce", nwork,
483                                 functor_type(nwork, Count), result[i]);
484       }
485     }
486 
487     for (unsigned i = 0; i < Repeat; ++i) {
488       for (unsigned j = 0; j < Count; ++j) {
489         const uint64_t correct = 0 == j % 3 ? nw : nsum;
490         ASSERT_EQ((ScalarType)correct, -result[i][j]);
491       }
492     }
493   }
494 };
495 
496 template <typename ScalarType, class DeviceType>
497 class TestReduceDynamicView {
498  public:
499   using execution_space = DeviceType;
500   using size_type       = typename execution_space::size_type;
501 
TestReduceDynamicView(const size_type nwork)502   TestReduceDynamicView(const size_type nwork) { run_test_dynamic_view(nwork); }
503 
run_test_dynamic_view(const size_type nwork)504   void run_test_dynamic_view(const size_type nwork) {
505     using functor_type =
506         Test::RuntimeReduceFunctor<ScalarType, execution_space>;
507 
508     using result_type      = Kokkos::View<ScalarType*, DeviceType>;
509     using result_host_type = typename result_type::HostMirror;
510 
511     const unsigned CountLimit = 23;
512 
513     const uint64_t nw   = nwork;
514     const uint64_t nsum = nw % 2 ? nw * ((nw + 1) / 2) : (nw / 2) * (nw + 1);
515 
516     for (unsigned count = 0; count < CountLimit; ++count) {
517       result_type result("result", count);
518       result_host_type host_result = Kokkos::create_mirror(result);
519 
520       // Test result to host pointer:
521 
522       std::string str("TestKernelReduce");
523       if (count % 2 == 0) {
524         Kokkos::parallel_reduce(nw, functor_type(nw, count),
525                                 host_result.data());
526       } else {
527         Kokkos::parallel_reduce(str, nw, functor_type(nw, count),
528                                 host_result.data());
529       }
530 
531       for (unsigned j = 0; j < count; ++j) {
532         const uint64_t correct = 0 == j % 3 ? nw : nsum;
533         ASSERT_EQ(host_result(j), (ScalarType)correct);
534         host_result(j) = 0;
535       }
536     }
537   }
538 };
539 
540 }  // namespace
541 
TEST(TEST_CATEGORY,int64_t_reduce)542 TEST(TEST_CATEGORY, int64_t_reduce) {
543   TestReduce<int64_t, TEST_EXECSPACE>(0);
544   TestReduce<int64_t, TEST_EXECSPACE>(1000000);
545 }
546 
TEST(TEST_CATEGORY,double_reduce)547 TEST(TEST_CATEGORY, double_reduce) {
548   TestReduce<double, TEST_EXECSPACE>(0);
549   TestReduce<double, TEST_EXECSPACE>(1000000);
550 }
551 
TEST(TEST_CATEGORY,int64_t_reduce_dynamic)552 TEST(TEST_CATEGORY, int64_t_reduce_dynamic) {
553   TestReduceDynamic<int64_t, TEST_EXECSPACE>(0);
554   TestReduceDynamic<int64_t, TEST_EXECSPACE>(1000000);
555 }
556 
TEST(TEST_CATEGORY,double_reduce_dynamic)557 TEST(TEST_CATEGORY, double_reduce_dynamic) {
558   TestReduceDynamic<double, TEST_EXECSPACE>(0);
559   TestReduceDynamic<double, TEST_EXECSPACE>(1000000);
560 }
561 
TEST(TEST_CATEGORY,int64_t_reduce_dynamic_view)562 TEST(TEST_CATEGORY, int64_t_reduce_dynamic_view) {
563   TestReduceDynamicView<int64_t, TEST_EXECSPACE>(0);
564   TestReduceDynamicView<int64_t, TEST_EXECSPACE>(1000000);
565 }
566 
TEST(TEST_CATEGORY,int_combined_reduce)567 TEST(TEST_CATEGORY, int_combined_reduce) {
568   using functor_type = CombinedReduceFunctorSameType<int64_t, TEST_EXECSPACE>;
569   constexpr uint64_t nw = 1000;
570 
571   uint64_t nsum = (nw / 2) * (nw + 1);
572 
573   int64_t result1 = 0;
574   int64_t result2 = 0;
575   int64_t result3 = 0;
576 
577   Kokkos::parallel_reduce("int_combined_reduce",
578                           Kokkos::RangePolicy<TEST_EXECSPACE>(0, nw),
579                           functor_type(nw), result1, result2, result3);
580 
581   ASSERT_EQ(nw, result1);
582   ASSERT_EQ(nsum, result2);
583   ASSERT_EQ(nsum, result3);
584 }
585 
TEST(TEST_CATEGORY,mdrange_combined_reduce)586 TEST(TEST_CATEGORY, mdrange_combined_reduce) {
587   using functor_type = CombinedReduceFunctorSameType<int64_t, TEST_EXECSPACE>;
588   constexpr uint64_t nw = 1000;
589 
590   uint64_t nsum = (nw / 2) * (nw + 1);
591 
592   int64_t result1 = 0;
593   int64_t result2 = 0;
594   int64_t result3 = 0;
595 
596   Kokkos::parallel_reduce(
597       "int_combined_reduce_mdrange",
598       Kokkos::MDRangePolicy<TEST_EXECSPACE, Kokkos::Rank<3>>({{0, 0, 0}},
599                                                              {{nw, 1, 1}}),
600       functor_type(nw), result1, result2, result3);
601 
602   ASSERT_EQ(nw, result1);
603   ASSERT_EQ(nsum, result2);
604   ASSERT_EQ(nsum, result3);
605 }
606 
TEST(TEST_CATEGORY,int_combined_reduce_mixed)607 TEST(TEST_CATEGORY, int_combined_reduce_mixed) {
608   using functor_type = CombinedReduceFunctorSameType<int64_t, TEST_EXECSPACE>;
609 
610   constexpr uint64_t nw = 1000;
611 
612   uint64_t nsum = (nw / 2) * (nw + 1);
613 
614   auto result1_v = Kokkos::View<int64_t, Kokkos::HostSpace>{"result1_v"};
615 
616   int64_t result2 = 0;
617 
618   auto result3_v = Kokkos::View<int64_t, Kokkos::HostSpace>{"result3_v"};
619 
620   Kokkos::parallel_reduce("int_combined-reduce_mixed",
621                           Kokkos::RangePolicy<TEST_EXECSPACE>(0, nw),
622                           functor_type(nw), result1_v, result2,
623                           Kokkos::Sum<int64_t, Kokkos::HostSpace>{result3_v});
624 
625   ASSERT_EQ(nw, result1_v());
626   ASSERT_EQ(nsum, result2);
627   ASSERT_EQ(nsum, result3_v());
628 }
629 }  // namespace Test
630