1 #define THRUST_ENABLE_FUTURE_RAW_DATA_MEMBER
2 
3 #include <thrust/detail/config.h>
4 
5 #if THRUST_CPP_DIALECT >= 2011 && !defined(THRUST_LEGACY_GCC)
6 
7 #include <unittest/unittest.h>
8 #include <unittest/util_async.h>
9 
10 #include <thrust/async/reduce.h>
11 #include <thrust/async/copy.h>
12 #include <thrust/host_vector.h>
13 #include <thrust/device_vector.h>
14 
15 template <typename T>
16 struct custom_plus
17 {
18   __host__ __device__
operator ()custom_plus19   T operator()(T lhs, T rhs) const
20   {
21     return lhs + rhs;
22   }
23 };
24 
25 #define DEFINE_STATEFUL_ASYNC_REDUCE_INVOKER(                                 \
26     NAME, MEMBERS, CTOR, DTOR, VALIDATE, ...                                  \
27   )                                                                           \
28   template <typename T>                                                       \
29   struct NAME                                                                 \
30   {                                                                           \
31     MEMBERS                                                                   \
32                                                                               \
33     NAME() { CTOR }                                                           \
34                                                                               \
35     ~NAME() { DTOR }                                                          \
36                                                                               \
37     template <typename Event>                                                 \
38     void validate_event(Event& e)                                             \
39     {                                                                         \
40       THRUST_UNUSED_VAR(e);                                                   \
41       VALIDATE                                                                \
42     }                                                                         \
43                                                                               \
44     template <                                                                \
45       typename ForwardIt, typename Sentinel                                   \
46     >                                                                         \
47     __host__                                                                  \
48     auto operator()(                                                          \
49       ForwardIt&& first, Sentinel&& last                                      \
50     )                                                                         \
51     THRUST_DECLTYPE_RETURNS(                                                  \
52       ::thrust::async::reduce(                                                \
53         __VA_ARGS__                                                           \
54       )                                                                       \
55     )                                                                         \
56   };                                                                          \
57   /**/
58 
59 #define DEFINE_ASYNC_REDUCE_INVOKER(NAME, ...)                                \
60   DEFINE_STATEFUL_ASYNC_REDUCE_INVOKER(                                       \
61     NAME                                                                      \
62   , THRUST_PP_EMPTY(), THRUST_PP_EMPTY(), THRUST_PP_EMPTY(), THRUST_PP_EMPTY()\
63   , __VA_ARGS__                                                               \
64   )                                                                           \
65   /**/
66 
67 #define DEFINE_SYNC_REDUCE_INVOKER(NAME, ...)                                 \
68   template <typename T>                                                       \
69   struct NAME                                                                 \
70   {                                                                           \
71                                                                               \
72     template <                                                                \
73       typename ForwardIt, typename Sentinel                                   \
74     >                                                                         \
75     __host__                                                                  \
76     auto operator()(                                                          \
77       ForwardIt&& first, Sentinel&& last                                      \
78     )                                                                         \
79     THRUST_DECLTYPE_RETURNS(                                                  \
80       ::thrust::reduce(                                                       \
81         __VA_ARGS__                                                           \
82       )                                                                       \
83     )                                                                         \
84   };                                                                          \
85   /**/
86 
87 DEFINE_ASYNC_REDUCE_INVOKER(
88   reduce_async_invoker
89 , THRUST_FWD(first), THRUST_FWD(last)
90 );
91 DEFINE_ASYNC_REDUCE_INVOKER(
92   reduce_async_invoker_device
93 , thrust::device
94 , THRUST_FWD(first), THRUST_FWD(last)
95 );
96 DEFINE_ASYNC_REDUCE_INVOKER(
97   reduce_async_invoker_device_allocator
98 , thrust::device(thrust::device_allocator<void>{})
99 , THRUST_FWD(first), THRUST_FWD(last)
100 );
101 DEFINE_STATEFUL_ASYNC_REDUCE_INVOKER(
102   reduce_async_invoker_device_on
103   // Members.
104 , cudaStream_t stream_;
105   // Constructor.
106 , thrust::cuda_cub::throw_on_error(
107     cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking)
108   );
109   // Destructor.
110 , thrust::cuda_cub::throw_on_error(
111     cudaStreamDestroy(stream_)
112   );
113   // `validate_event` member.
114 , ASSERT_EQUAL_QUIET(stream_, e.stream().native_handle());
115   // Arguments to `thrust::async::reduce`.
116 , thrust::device.on(stream_)
117 , THRUST_FWD(first), THRUST_FWD(last)
118 );
119 DEFINE_STATEFUL_ASYNC_REDUCE_INVOKER(
120   reduce_async_invoker_device_allocator_on
121   // Members.
122 , cudaStream_t stream_;
123   // Constructor.
124 , thrust::cuda_cub::throw_on_error(
125     cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking)
126   );
127   // Destructor.
128 , thrust::cuda_cub::throw_on_error(
129     cudaStreamDestroy(stream_)
130   );
131   // `validate_event` member.
132 , ASSERT_EQUAL_QUIET(stream_, e.stream().native_handle());
133   // Arguments to `thrust::async::reduce`.
134 , thrust::device(thrust::device_allocator<void>{}).on(stream_)
135 , THRUST_FWD(first), THRUST_FWD(last)
136 );
137 
138 DEFINE_SYNC_REDUCE_INVOKER(
139   reduce_sync_invoker
140 , THRUST_FWD(first), THRUST_FWD(last)
141 );
142 
143 DEFINE_ASYNC_REDUCE_INVOKER(
144   reduce_async_invoker_init
145 , THRUST_FWD(first), THRUST_FWD(last)
146 , unittest::random_integer<T>()
147 );
148 DEFINE_ASYNC_REDUCE_INVOKER(
149   reduce_async_invoker_device_init
150 , thrust::device
151 , THRUST_FWD(first), THRUST_FWD(last)
152 , unittest::random_integer<T>()
153 );
154 DEFINE_ASYNC_REDUCE_INVOKER(
155   reduce_async_invoker_device_allocator_init
156 , thrust::device(thrust::device_allocator<void>{})
157 , THRUST_FWD(first), THRUST_FWD(last)
158 , unittest::random_integer<T>()
159 );
160 DEFINE_STATEFUL_ASYNC_REDUCE_INVOKER(
161   reduce_async_invoker_device_on_init
162   // Members.
163 , cudaStream_t stream_;
164   // Constructor.
165 , thrust::cuda_cub::throw_on_error(
166     cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking)
167   );
168   // Destructor.
169 , thrust::cuda_cub::throw_on_error(
170     cudaStreamDestroy(stream_)
171   );
172   // `validate_event` member.
173 , ASSERT_EQUAL_QUIET(stream_, e.stream().native_handle());
174   // Arguments to `thrust::async::reduce`.
175 , thrust::device.on(stream_)
176 , THRUST_FWD(first), THRUST_FWD(last)
177 , unittest::random_integer<T>()
178 );
179 DEFINE_STATEFUL_ASYNC_REDUCE_INVOKER(
180   reduce_async_invoker_device_allocator_on_init
181   // Members.
182 , cudaStream_t stream_;
183   // Constructor.
184 , thrust::cuda_cub::throw_on_error(
185     cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking)
186   );
187   // Destructor.
188 , thrust::cuda_cub::throw_on_error(
189     cudaStreamDestroy(stream_)
190   );
191   // `validate_event` member.
192 , ASSERT_EQUAL_QUIET(stream_, e.stream().native_handle());
193   // Arguments to `thrust::async::reduce`.
194 , thrust::device(thrust::device_allocator<void>{}).on(stream_)
195 , THRUST_FWD(first), THRUST_FWD(last)
196 , unittest::random_integer<T>()
197 );
198 
199 DEFINE_SYNC_REDUCE_INVOKER(
200   reduce_sync_invoker_init
201 , THRUST_FWD(first), THRUST_FWD(last)
202 , unittest::random_integer<T>()
203 );
204 
205 DEFINE_ASYNC_REDUCE_INVOKER(
206   reduce_async_invoker_init_plus
207 , THRUST_FWD(first), THRUST_FWD(last)
208 , unittest::random_integer<T>()
209 , thrust::plus<T>()
210 );
211 DEFINE_ASYNC_REDUCE_INVOKER(
212   reduce_async_invoker_device_init_plus
213 , thrust::device
214 , THRUST_FWD(first), THRUST_FWD(last)
215 , unittest::random_integer<T>()
216 , thrust::plus<T>()
217 );
218 DEFINE_ASYNC_REDUCE_INVOKER(
219   reduce_async_invoker_device_allocator_init_plus
220 , thrust::device(thrust::device_allocator<void>{})
221 , THRUST_FWD(first), THRUST_FWD(last)
222 , unittest::random_integer<T>()
223 , thrust::plus<T>()
224 );
225 DEFINE_STATEFUL_ASYNC_REDUCE_INVOKER(
226   reduce_async_invoker_device_on_init_plus
227   // Members.
228 , cudaStream_t stream_;
229   // Constructor.
230 , thrust::cuda_cub::throw_on_error(
231     cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking)
232   );
233   // Destructor.
234 , thrust::cuda_cub::throw_on_error(
235     cudaStreamDestroy(stream_)
236   );
237   // `validate_event` member.
238 , ASSERT_EQUAL_QUIET(stream_, e.stream().native_handle());
239   // Arguments to `thrust::async::reduce`.
240 , thrust::device.on(stream_)
241 , THRUST_FWD(first), THRUST_FWD(last)
242 , unittest::random_integer<T>()
243 , thrust::plus<T>()
244 );
245 DEFINE_STATEFUL_ASYNC_REDUCE_INVOKER(
246   reduce_async_invoker_device_allocator_on_init_plus
247   // Members.
248 , cudaStream_t stream_;
249   // Constructor.
250 , thrust::cuda_cub::throw_on_error(
251     cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking)
252   );
253   // Destructor.
254 , thrust::cuda_cub::throw_on_error(
255     cudaStreamDestroy(stream_)
256   );
257   // `validate_event` member.
258 , ASSERT_EQUAL_QUIET(stream_, e.stream().native_handle());
259   // Arguments to `thrust::async::reduce`.
260 , thrust::device(thrust::device_allocator<void>{}).on(stream_)
261 , THRUST_FWD(first), THRUST_FWD(last)
262 , unittest::random_integer<T>()
263 , thrust::plus<T>()
264 );
265 
266 DEFINE_SYNC_REDUCE_INVOKER(
267   reduce_sync_invoker_init_plus
268 , THRUST_FWD(first), THRUST_FWD(last)
269 , unittest::random_integer<T>()
270 , thrust::plus<T>()
271 );
272 
273 DEFINE_ASYNC_REDUCE_INVOKER(
274   reduce_async_invoker_init_custom_plus
275 , THRUST_FWD(first), THRUST_FWD(last)
276 , unittest::random_integer<T>()
277 , custom_plus<T>()
278 );
279 DEFINE_ASYNC_REDUCE_INVOKER(
280   reduce_async_invoker_device_init_custom_plus
281 , thrust::device
282 , THRUST_FWD(first), THRUST_FWD(last)
283 , unittest::random_integer<T>()
284 , custom_plus<T>()
285 );
286 DEFINE_ASYNC_REDUCE_INVOKER(
287   reduce_async_invoker_device_allocator_init_custom_plus
288 , thrust::device(thrust::device_allocator<void>{})
289 , THRUST_FWD(first), THRUST_FWD(last)
290 , unittest::random_integer<T>()
291 , custom_plus<T>()
292 );
293 DEFINE_STATEFUL_ASYNC_REDUCE_INVOKER(
294   reduce_async_invoker_device_on_init_custom_plus
295   // Members.
296 , cudaStream_t stream_;
297   // Constructor.
298 , thrust::cuda_cub::throw_on_error(
299     cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking)
300   );
301   // Destructor.
302 , thrust::cuda_cub::throw_on_error(
303     cudaStreamDestroy(stream_)
304   );
305   // `validate_event` member.
306 , ASSERT_EQUAL_QUIET(stream_, e.stream().native_handle());
307   // Arguments to `thrust::async::reduce`.
308 , thrust::device.on(stream_)
309 , THRUST_FWD(first), THRUST_FWD(last)
310 , unittest::random_integer<T>()
311 , custom_plus<T>()
312 );
313 DEFINE_STATEFUL_ASYNC_REDUCE_INVOKER(
314   reduce_async_invoker_device_allocator_on_init_custom_plus
315   // Members.
316 , cudaStream_t stream_;
317   // Constructor.
318 , thrust::cuda_cub::throw_on_error(
319     cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking)
320   );
321   // Destructor.
322 , thrust::cuda_cub::throw_on_error(
323     cudaStreamDestroy(stream_)
324   );
325   // `validate_event` member.
326 , ASSERT_EQUAL_QUIET(stream_, e.stream().native_handle());
327   // Arguments to `thrust::async::reduce`.
328 , thrust::device(thrust::device_allocator<void>{}).on(stream_)
329 , THRUST_FWD(first), THRUST_FWD(last)
330 , unittest::random_integer<T>()
331 , custom_plus<T>()
332 );
333 
334 DEFINE_SYNC_REDUCE_INVOKER(
335   reduce_sync_invoker_init_custom_plus
336 , THRUST_FWD(first), THRUST_FWD(last)
337 , unittest::random_integer<T>()
338 , custom_plus<T>()
339 );
340 
341 ///////////////////////////////////////////////////////////////////////////////
342 
343 template <
344   template <typename> class AsyncReduceInvoker
345 , template <typename> class SyncReduceInvoker
346 >
347 struct test_async_reduce
348 {
349   template <typename T>
350   struct tester
351   {
352     __host__
operator ()test_async_reduce::tester353     void operator()(std::size_t n)
354     {
355       thrust::host_vector<T>   h0(unittest::random_integers<T>(n));
356       thrust::device_vector<T> d0a(h0);
357       thrust::device_vector<T> d0b(h0);
358       thrust::device_vector<T> d0c(h0);
359       thrust::device_vector<T> d0d(h0);
360 
361       AsyncReduceInvoker<T> invoke_async;
362       SyncReduceInvoker<T>  invoke_sync;
363 
364       ASSERT_EQUAL(h0, d0a);
365       ASSERT_EQUAL(h0, d0b);
366       ASSERT_EQUAL(h0, d0c);
367       ASSERT_EQUAL(h0, d0d);
368 
369       auto f0a = invoke_async(d0a.begin(), d0a.end());
370       auto f0b = invoke_async(d0b.begin(), d0b.end());
371       auto f0c = invoke_async(d0c.begin(), d0c.end());
372       auto f0d = invoke_async(d0d.begin(), d0d.end());
373 
374       invoke_async.validate_event(f0a);
375       invoke_async.validate_event(f0b);
376       invoke_async.validate_event(f0c);
377       invoke_async.validate_event(f0d);
378 
379       // This potentially runs concurrently with the copies.
380       auto const r0 = invoke_sync(h0.begin(), h0.end());
381 
382       auto const r1a = TEST_FUTURE_VALUE_RETRIEVAL(f0a);
383       auto const r1b = TEST_FUTURE_VALUE_RETRIEVAL(f0b);
384       auto const r1c = TEST_FUTURE_VALUE_RETRIEVAL(f0c);
385       auto const r1d = TEST_FUTURE_VALUE_RETRIEVAL(f0d);
386 
387       ASSERT_EQUAL(r0, r1a);
388       ASSERT_EQUAL(r0, r1b);
389       ASSERT_EQUAL(r0, r1c);
390       ASSERT_EQUAL(r0, r1d);
391     }
392   };
393 };
394 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME(
395   THRUST_PP_EXPAND_ARGS(
396     test_async_reduce<
397       reduce_async_invoker
398     , reduce_sync_invoker
399     >::tester
400   )
401 , NumericTypes
402 , test_async_reduce
403 );
404 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME(
405   THRUST_PP_EXPAND_ARGS(
406     test_async_reduce<
407       reduce_async_invoker_device
408     , reduce_sync_invoker
409     >::tester
410   )
411 , NumericTypes
412 , test_async_reduce_policy
413 );
414 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME(
415   THRUST_PP_EXPAND_ARGS(
416     test_async_reduce<
417       reduce_async_invoker_device_allocator
418     , reduce_sync_invoker
419     >::tester
420   )
421 , NumericTypes
422 , test_async_reduce_policy_allocator
423 );
424 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME(
425   THRUST_PP_EXPAND_ARGS(
426     test_async_reduce<
427       reduce_async_invoker_device_on
428     , reduce_sync_invoker
429     >::tester
430   )
431 , NumericTypes
432 , test_async_reduce_policy_on
433 );
434 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME(
435   THRUST_PP_EXPAND_ARGS(
436     test_async_reduce<
437       reduce_async_invoker_device_allocator_on
438     , reduce_sync_invoker
439     >::tester
440   )
441 , NumericTypes
442 , test_async_reduce_policy_allocator_on
443 );
444 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME(
445   THRUST_PP_EXPAND_ARGS(
446     test_async_reduce<
447       reduce_async_invoker_init
448     , reduce_sync_invoker_init
449     >::tester
450   )
451 , NumericTypes
452 , test_async_reduce_init
453 );
454 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME(
455   THRUST_PP_EXPAND_ARGS(
456     test_async_reduce<
457       reduce_async_invoker_device_init
458     , reduce_sync_invoker_init
459     >::tester
460   )
461 , NumericTypes
462 , test_async_reduce_policy_init
463 );
464 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME(
465   THRUST_PP_EXPAND_ARGS(
466     test_async_reduce<
467       reduce_async_invoker_device_allocator_init
468     , reduce_sync_invoker_init
469     >::tester
470   )
471 , NumericTypes
472 , test_async_reduce_policy_allocator_init
473 );
474 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME(
475   THRUST_PP_EXPAND_ARGS(
476     test_async_reduce<
477       reduce_async_invoker_device_on_init
478     , reduce_sync_invoker_init
479     >::tester
480   )
481 , NumericTypes
482 , test_async_reduce_policy_on_init
483 );
484 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME(
485   THRUST_PP_EXPAND_ARGS(
486     test_async_reduce<
487       reduce_async_invoker_device_allocator_on_init
488     , reduce_sync_invoker_init
489     >::tester
490   )
491 , NumericTypes
492 , test_async_reduce_policy_allocator_on_init
493 );
494 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME(
495   THRUST_PP_EXPAND_ARGS(
496     test_async_reduce<
497       reduce_async_invoker_init_plus
498     , reduce_sync_invoker_init_plus
499     >::tester
500   )
501 , NumericTypes
502 , test_async_reduce_init_plus
503 );
504 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME(
505   THRUST_PP_EXPAND_ARGS(
506     test_async_reduce<
507       reduce_async_invoker_device_init_plus
508     , reduce_sync_invoker_init_plus
509     >::tester
510   )
511 , NumericTypes
512 , test_async_reduce_policy_init_plus
513 );
514 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME(
515   THRUST_PP_EXPAND_ARGS(
516     test_async_reduce<
517       reduce_async_invoker_device_allocator_init_plus
518     , reduce_sync_invoker_init_plus
519     >::tester
520   )
521 , NumericTypes
522 , test_async_reduce_policy_allocator_init_plus
523 );
524 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME(
525   THRUST_PP_EXPAND_ARGS(
526     test_async_reduce<
527       reduce_async_invoker_device_on_init_plus
528     , reduce_sync_invoker_init_plus
529     >::tester
530   )
531 , NumericTypes
532 , test_async_reduce_policy_on_init_plus
533 );
534 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME(
535   THRUST_PP_EXPAND_ARGS(
536     test_async_reduce<
537       reduce_async_invoker_device_allocator_on_init_plus
538     , reduce_sync_invoker_init_plus
539     >::tester
540   )
541 , NumericTypes
542 , test_async_reduce_policy_allocator_on_init_plus
543 );
544 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME(
545   THRUST_PP_EXPAND_ARGS(
546     test_async_reduce<
547       reduce_async_invoker_init_custom_plus
548     , reduce_sync_invoker_init_custom_plus
549     >::tester
550   )
551 , NumericTypes
552 , test_async_reduce_init_custom_plus
553 );
554 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME(
555   THRUST_PP_EXPAND_ARGS(
556     test_async_reduce<
557       reduce_async_invoker_device_init_custom_plus
558     , reduce_sync_invoker_init_custom_plus
559     >::tester
560   )
561 , NumericTypes
562 , test_async_reduce_policy_init_custom_plus
563 );
564 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME(
565   THRUST_PP_EXPAND_ARGS(
566     test_async_reduce<
567       reduce_async_invoker_device_allocator_init_custom_plus
568     , reduce_sync_invoker_init_custom_plus
569     >::tester
570   )
571 , NumericTypes
572 , test_async_reduce_policy_allocator_init_custom_plus
573 );
574 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME(
575   THRUST_PP_EXPAND_ARGS(
576     test_async_reduce<
577       reduce_async_invoker_device_on_init_custom_plus
578     , reduce_sync_invoker_init_custom_plus
579     >::tester
580   )
581 , NumericTypes
582 , test_async_reduce_policy_on_init_custom_plus
583 );
584 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME(
585   THRUST_PP_EXPAND_ARGS(
586     test_async_reduce<
587       reduce_async_invoker_device_allocator_on_init_custom_plus
588     , reduce_sync_invoker_init_custom_plus
589     >::tester
590   )
591 , NumericTypes
592 , test_async_reduce_policy_allocator_on_init_custom_plus
593 );
594 
595 ///////////////////////////////////////////////////////////////////////////////
596 
597 template <
598   template <typename> class AsyncReduceInvoker
599 , template <typename> class SyncReduceInvoker
600 >
601 struct test_async_reduce_counting_iterator
602 {
603   template <typename T>
604   struct tester
605   {
606     __host__
operator ()test_async_reduce_counting_iterator::tester607     void operator()()
608     {
609       constexpr std::size_t n = 15 * sizeof(T);
610 
611       ASSERT_LEQUAL(T(n), unittest::truncate_to_max_representable<T>(n));
612 
613       thrust::counting_iterator<T> first(0);
614       thrust::counting_iterator<T> last(n);
615 
616       AsyncReduceInvoker<T> invoke_async;
617       SyncReduceInvoker<T>  invoke_sync;
618 
619       auto f0a = invoke_async(first, last);
620       auto f0b = invoke_async(first, last);
621       auto f0c = invoke_async(first, last);
622       auto f0d = invoke_async(first, last);
623 
624       invoke_async.validate_event(f0a);
625       invoke_async.validate_event(f0b);
626       invoke_async.validate_event(f0c);
627       invoke_async.validate_event(f0d);
628 
629       // This potentially runs concurrently with the copies.
630       auto const r0 = invoke_sync(first, last);
631 
632       auto const r1a = TEST_FUTURE_VALUE_RETRIEVAL(f0a);
633       auto const r1b = TEST_FUTURE_VALUE_RETRIEVAL(f0b);
634       auto const r1c = TEST_FUTURE_VALUE_RETRIEVAL(f0c);
635       auto const r1d = TEST_FUTURE_VALUE_RETRIEVAL(f0d);
636 
637       ASSERT_EQUAL(r0, r1a);
638       ASSERT_EQUAL(r0, r1b);
639       ASSERT_EQUAL(r0, r1c);
640       ASSERT_EQUAL(r0, r1d);
641     }
642   };
643 };
644 DECLARE_GENERIC_UNITTEST_WITH_TYPES_AND_NAME(
645   THRUST_PP_EXPAND_ARGS(
646     test_async_reduce_counting_iterator<
647       reduce_async_invoker
648     , reduce_sync_invoker
649     >::tester
650   )
651 , BuiltinNumericTypes
652 , test_async_reduce_counting_iterator
653 );
654 DECLARE_GENERIC_UNITTEST_WITH_TYPES_AND_NAME(
655   THRUST_PP_EXPAND_ARGS(
656     test_async_reduce_counting_iterator<
657       reduce_async_invoker_device
658     , reduce_sync_invoker
659     >::tester
660   )
661 , BuiltinNumericTypes
662 , test_async_reduce_policy_counting_iterator
663 );
664 DECLARE_GENERIC_UNITTEST_WITH_TYPES_AND_NAME(
665   THRUST_PP_EXPAND_ARGS(
666     test_async_reduce_counting_iterator<
667       reduce_async_invoker_init
668     , reduce_sync_invoker_init
669     >::tester
670   )
671 , BuiltinNumericTypes
672 , test_async_reduce_counting_iterator_init
673 );
674 DECLARE_GENERIC_UNITTEST_WITH_TYPES_AND_NAME(
675   THRUST_PP_EXPAND_ARGS(
676     test_async_reduce_counting_iterator<
677       reduce_async_invoker_device_init
678     , reduce_sync_invoker_init
679     >::tester
680   )
681 , BuiltinNumericTypes
682 , test_async_reduce_policy_counting_iterator_init
683 );
684 DECLARE_GENERIC_UNITTEST_WITH_TYPES_AND_NAME(
685   THRUST_PP_EXPAND_ARGS(
686     test_async_reduce_counting_iterator<
687       reduce_async_invoker_init_plus
688     , reduce_sync_invoker_init_plus
689     >::tester
690   )
691 , BuiltinNumericTypes
692 , test_async_reduce_counting_iterator_init_plus
693 );
694 DECLARE_GENERIC_UNITTEST_WITH_TYPES_AND_NAME(
695   THRUST_PP_EXPAND_ARGS(
696     test_async_reduce_counting_iterator<
697       reduce_async_invoker_device_init_plus
698     , reduce_sync_invoker_init_plus
699     >::tester
700   )
701 , BuiltinNumericTypes
702 , test_async_reduce_policy_counting_iterator_init_plus
703 );
704 DECLARE_GENERIC_UNITTEST_WITH_TYPES_AND_NAME(
705   THRUST_PP_EXPAND_ARGS(
706     test_async_reduce_counting_iterator<
707       reduce_async_invoker_init_custom_plus
708     , reduce_sync_invoker_init_custom_plus
709     >::tester
710   )
711 , BuiltinNumericTypes
712 , test_async_reduce_counting_iterator_init_custom_plus
713 );
714 DECLARE_GENERIC_UNITTEST_WITH_TYPES_AND_NAME(
715   THRUST_PP_EXPAND_ARGS(
716     test_async_reduce_counting_iterator<
717       reduce_async_invoker_device_init_custom_plus
718     , reduce_sync_invoker_init_custom_plus
719     >::tester
720   )
721 , BuiltinNumericTypes
722 , test_async_reduce_policy_counting_iterator_init_custom_plus
723 );
724 
725 ///////////////////////////////////////////////////////////////////////////////
726 
727 template <typename T>
728 struct test_async_reduce_using
729 {
730   __host__
operator ()test_async_reduce_using731   void operator()(std::size_t n)
732   {
733     thrust::host_vector<T>   h0(unittest::random_integers<T>(n));
734     thrust::device_vector<T> d0a(h0);
735     thrust::device_vector<T> d0b(h0);
736 
737     ASSERT_EQUAL(h0, d0a);
738     ASSERT_EQUAL(h0, d0b);
739 
740     thrust::device_future<T> f0a;
741     thrust::device_future<T> f0b;
742 
743     // When you import the customization points into the global namespace,
744     // they should be selected instead of the synchronous algorithms.
745     {
746       using namespace thrust::async;
747       f0a = reduce(d0a.begin(), d0a.end());
748     }
749     {
750       using thrust::async::reduce;
751       f0b = reduce(d0b.begin(), d0b.end());
752     }
753 
754     // ADL should find the synchronous algorithms.
755     // This potentially runs concurrently with the copies.
756     T const r0 = reduce(h0.begin(), h0.end());
757 
758     T const r1a = TEST_FUTURE_VALUE_RETRIEVAL(f0a);
759     T const r1b = TEST_FUTURE_VALUE_RETRIEVAL(f0b);
760 
761     ASSERT_EQUAL(r0, r1a);
762     ASSERT_EQUAL(r0, r1b);
763   }
764 };
765 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES(
766   test_async_reduce_using
767 , NumericTypes
768 );
769 
770 ///////////////////////////////////////////////////////////////////////////////
771 
772 template <typename T>
773 struct test_async_reduce_after
774 {
775   __host__
operator ()test_async_reduce_after776   void operator()(std::size_t n)
777   {
778     thrust::host_vector<T>   h0(unittest::random_integers<T>(n));
779     thrust::device_vector<T> d0(h0);
780 
781     ASSERT_EQUAL(h0, d0);
782 
783     auto f0 = thrust::async::reduce(
784       d0.begin(), d0.end()
785     );
786 
787     ASSERT_EQUAL(true, f0.valid_stream());
788 
789     auto const f0_stream = f0.stream().native_handle();
790 
791     auto f1 = thrust::async::reduce(
792       thrust::device.after(f0), d0.begin(), d0.end()
793     );
794 
795     // Verify that double consumption of a future produces an exception.
796     ASSERT_THROWS_EQUAL(
797       auto x = thrust::async::reduce(
798         thrust::device.after(f0), d0.begin(), d0.end()
799       );
800       THRUST_UNUSED_VAR(x)
801     , thrust::event_error
802     , thrust::event_error(thrust::event_errc::no_state)
803     );
804 
805     ASSERT_EQUAL_QUIET(f0_stream, f1.stream().native_handle());
806 
807     auto after_policy2 = thrust::device.after(f1);
808 
809     auto f2 = thrust::async::reduce(
810       after_policy2, d0.begin(), d0.end()
811     );
812 
813     // Verify that double consumption of a policy produces an exception.
814     ASSERT_THROWS_EQUAL(
815       auto x = thrust::async::reduce(
816         after_policy2, d0.begin(), d0.end()
817       );
818       THRUST_UNUSED_VAR(x)
819     , thrust::event_error
820     , thrust::event_error(thrust::event_errc::no_state)
821     );
822 
823     ASSERT_EQUAL_QUIET(f0_stream, f2.stream().native_handle());
824 
825     // This potentially runs concurrently with the copies.
826     T const r0 = thrust::reduce(h0.begin(), h0.end());
827 
828     T const r1 = TEST_FUTURE_VALUE_RETRIEVAL(f2);
829 
830     ASSERT_EQUAL(r0, r1);
831   }
832 };
833 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES(
834   test_async_reduce_after
835 , NumericTypes
836 );
837 
838 ///////////////////////////////////////////////////////////////////////////////
839 
840 template <typename T>
841 struct test_async_reduce_on_then_after
842 {
843   __host__
operator ()test_async_reduce_on_then_after844   void operator()(std::size_t n)
845   {
846     thrust::host_vector<T>   h0(unittest::random_integers<T>(n));
847     thrust::device_vector<T> d0(h0);
848 
849     ASSERT_EQUAL(h0, d0);
850 
851     cudaStream_t stream;
852     thrust::cuda_cub::throw_on_error(
853       cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking)
854     );
855 
856     auto f0 = thrust::async::reduce(
857       thrust::device.on(stream), d0.begin(), d0.end()
858     );
859 
860     ASSERT_EQUAL_QUIET(stream, f0.stream().native_handle());
861 
862     auto f1 = thrust::async::reduce(
863       thrust::device.after(f0), d0.begin(), d0.end()
864     );
865 
866     // Verify that double consumption of a future produces an exception.
867     ASSERT_THROWS_EQUAL(
868       auto x = thrust::async::reduce(
869         thrust::device.after(f0), d0.begin(), d0.end()
870       );
871       THRUST_UNUSED_VAR(x)
872     , thrust::event_error
873     , thrust::event_error(thrust::event_errc::no_state)
874     );
875 
876     ASSERT_EQUAL_QUIET(stream, f1.stream().native_handle());
877 
878     auto after_policy2 = thrust::device.after(f1);
879 
880     auto f2 = thrust::async::reduce(
881       after_policy2, d0.begin(), d0.end()
882     );
883 
884     // Verify that double consumption of a policy produces an exception.
885     ASSERT_THROWS_EQUAL(
886       auto x = thrust::async::reduce(
887         after_policy2, d0.begin(), d0.end()
888       );
889       THRUST_UNUSED_VAR(x)
890     , thrust::event_error
891     , thrust::event_error(thrust::event_errc::no_state)
892     );
893 
894     ASSERT_EQUAL_QUIET(stream, f2.stream().native_handle());
895 
896     // This potentially runs concurrently with the copies.
897     T const r0 = thrust::reduce(h0.begin(), h0.end());
898 
899     T const r1 = TEST_FUTURE_VALUE_RETRIEVAL(f2);
900 
901     ASSERT_EQUAL(r0, r1);
902 
903     thrust::cuda_cub::throw_on_error(
904       cudaStreamDestroy(stream)
905     );
906   }
907 };
908 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES(
909   test_async_reduce_on_then_after
910 , NumericTypes
911 );
912 
913 ///////////////////////////////////////////////////////////////////////////////
914 
915 template <typename T>
916 struct test_async_reduce_allocator_on_then_after
917 {
918   __host__
operator ()test_async_reduce_allocator_on_then_after919   void operator()(std::size_t n)
920   {
921     thrust::host_vector<T>   h0(unittest::random_integers<T>(n));
922     thrust::device_vector<T> d0(h0);
923 
924     ASSERT_EQUAL(h0, d0);
925 
926     cudaStream_t stream0;
927     thrust::cuda_cub::throw_on_error(
928       cudaStreamCreateWithFlags(&stream0, cudaStreamNonBlocking)
929     );
930 
931     cudaStream_t stream1;
932     thrust::cuda_cub::throw_on_error(
933       cudaStreamCreateWithFlags(&stream1, cudaStreamNonBlocking)
934     );
935 
936     auto f0 = thrust::async::reduce(
937       thrust::device(thrust::device_allocator<void>{}).on(stream0)
938     , d0.begin(), d0.end()
939     );
940 
941     ASSERT_EQUAL_QUIET(stream0, f0.stream().native_handle());
942 
943     auto f1 = thrust::async::reduce(
944       thrust::device(thrust::device_allocator<void>{}).after(f0)
945     , d0.begin(), d0.end()
946     );
947 
948     ASSERT_THROWS_EQUAL(
949       auto x = thrust::async::reduce(
950         thrust::device(thrust::device_allocator<void>{}).after(f0)
951       , d0.begin(), d0.end()
952       );
953       THRUST_UNUSED_VAR(x)
954     , thrust::event_error
955     , thrust::event_error(thrust::event_errc::no_state)
956     );
957 
958     ASSERT_EQUAL_QUIET(stream0, f1.stream().native_handle());
959 
960     auto f2 = thrust::async::reduce(
961       thrust::device(thrust::device_allocator<void>{}).on(stream1).after(f1)
962     , d0.begin(), d0.end()
963     );
964 
965     ASSERT_THROWS_EQUAL(
966       auto x = thrust::async::reduce(
967         thrust::device(thrust::device_allocator<void>{}).on(stream1).after(f1)
968       , d0.begin(), d0.end()
969       );
970       THRUST_UNUSED_VAR(x)
971     , thrust::event_error
972     , thrust::event_error(thrust::event_errc::no_state)
973     );
974 
975     KNOWN_FAILURE;
976     // FIXME: The below fails because you can't combine allocator attachment,
977     // `.on`, and `.after`.
978     ASSERT_EQUAL_QUIET(stream1, f2.stream().native_handle());
979 
980     // This potentially runs concurrently with the copies.
981     T const r0 = thrust::reduce(h0.begin(), h0.end());
982 
983     T const r1 = TEST_FUTURE_VALUE_RETRIEVAL(f2);
984 
985     ASSERT_EQUAL(r0, r1);
986 
987     thrust::cuda_cub::throw_on_error(cudaStreamDestroy(stream0));
988     thrust::cuda_cub::throw_on_error(cudaStreamDestroy(stream1));
989   }
990 };
991 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES(
992   test_async_reduce_allocator_on_then_after
993 , NumericTypes
994 );
995 
996 ///////////////////////////////////////////////////////////////////////////////
997 
998 template <typename T>
999 struct test_async_reduce_caching
1000 {
1001   __host__
operator ()test_async_reduce_caching1002   void operator()(std::size_t n)
1003   {
1004     constexpr std::int64_t m = 32;
1005 
1006     thrust::host_vector<T>   h0(unittest::random_integers<T>(n));
1007     thrust::device_vector<T> d0(h0);
1008 
1009     ASSERT_EQUAL(h0, d0);
1010 
1011     T const* f0_raw_data;
1012 
1013     {
1014       // Perform one reduction to ensure there's an entry in the caching
1015       // allocator.
1016       auto f0 = thrust::async::reduce(d0.begin(), d0.end());
1017 
1018       TEST_EVENT_WAIT(f0);
1019 
1020       f0_raw_data = f0.raw_data();
1021     }
1022 
1023     for (std::int64_t i = 0; i < m; ++i)
1024     {
1025       auto f1 = thrust::async::reduce(d0.begin(), d0.end());
1026 
1027       ASSERT_EQUAL(true, f1.valid_stream());
1028       ASSERT_EQUAL(true, f1.valid_content());
1029 
1030       ASSERT_EQUAL_QUIET(f0_raw_data, f1.raw_data());
1031 
1032       // This potentially runs concurrently with the copies.
1033       T const r0 = thrust::reduce(h0.begin(), h0.end());
1034 
1035       T const r1 = TEST_FUTURE_VALUE_RETRIEVAL(f1);
1036 
1037       ASSERT_EQUAL(r0, r1);
1038     }
1039   }
1040 };
1041 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES(
1042   test_async_reduce_caching
1043 , NumericTypes
1044 );
1045 
1046 ///////////////////////////////////////////////////////////////////////////////
1047 
1048 template <typename T>
1049 struct test_async_copy_then_reduce
1050 {
1051   __host__
operator ()test_async_copy_then_reduce1052   void operator()(std::size_t n)
1053   {
1054     thrust::host_vector<T>   h0a(unittest::random_integers<T>(n));
1055     thrust::host_vector<T>   h0b(unittest::random_integers<T>(n));
1056     thrust::host_vector<T>   h0c(unittest::random_integers<T>(n));
1057     thrust::host_vector<T>   h0d(unittest::random_integers<T>(n));
1058 
1059     thrust::device_vector<T> d0a(n);
1060     thrust::device_vector<T> d0b(n);
1061     thrust::device_vector<T> d0c(n);
1062     thrust::device_vector<T> d0d(n);
1063 
1064     auto f0a = thrust::async::copy(h0a.begin(), h0a.end(), d0a.begin());
1065     auto f0b = thrust::async::copy(h0b.begin(), h0b.end(), d0b.begin());
1066     auto f0c = thrust::async::copy(h0c.begin(), h0c.end(), d0c.begin());
1067     auto f0d = thrust::async::copy(h0d.begin(), h0d.end(), d0d.begin());
1068 
1069     ASSERT_EQUAL(true, f0a.valid_stream());
1070     ASSERT_EQUAL(true, f0b.valid_stream());
1071     ASSERT_EQUAL(true, f0c.valid_stream());
1072     ASSERT_EQUAL(true, f0d.valid_stream());
1073 
1074     auto const f0a_stream = f0a.stream().native_handle();
1075     auto const f0b_stream = f0b.stream().native_handle();
1076     auto const f0c_stream = f0c.stream().native_handle();
1077     auto const f0d_stream = f0d.stream().native_handle();
1078 
1079     auto f1a = thrust::async::reduce(
1080       thrust::device.after(f0a), d0a.begin(), d0a.end()
1081     );
1082     auto f1b = thrust::async::reduce(
1083       thrust::device.after(f0b), d0b.begin(), d0b.end()
1084     );
1085     auto f1c = thrust::async::reduce(
1086       thrust::device.after(f0c), d0c.begin(), d0c.end()
1087     );
1088     auto f1d = thrust::async::reduce(
1089       thrust::device.after(f0d), d0d.begin(), d0d.end()
1090     );
1091 
1092     ASSERT_EQUAL(false, f0a.valid_stream());
1093     ASSERT_EQUAL(false, f0b.valid_stream());
1094     ASSERT_EQUAL(false, f0c.valid_stream());
1095     ASSERT_EQUAL(false, f0d.valid_stream());
1096 
1097     ASSERT_EQUAL(true, f1a.valid_stream());
1098     ASSERT_EQUAL(true, f1a.valid_content());
1099     ASSERT_EQUAL(true, f1b.valid_stream());
1100     ASSERT_EQUAL(true, f1b.valid_content());
1101     ASSERT_EQUAL(true, f1c.valid_stream());
1102     ASSERT_EQUAL(true, f1c.valid_content());
1103     ASSERT_EQUAL(true, f1d.valid_stream());
1104     ASSERT_EQUAL(true, f1d.valid_content());
1105 
1106     // Verify that streams were stolen.
1107     ASSERT_EQUAL_QUIET(f0a_stream, f1a.stream().native_handle());
1108     ASSERT_EQUAL_QUIET(f0b_stream, f1b.stream().native_handle());
1109     ASSERT_EQUAL_QUIET(f0c_stream, f1c.stream().native_handle());
1110     ASSERT_EQUAL_QUIET(f0d_stream, f1d.stream().native_handle());
1111 
1112     // This potentially runs concurrently with the copies.
1113     T const r0 = thrust::reduce(h0a.begin(), h0a.end());
1114 
1115     T const r1a = TEST_FUTURE_VALUE_RETRIEVAL(f1a);
1116     T const r1b = TEST_FUTURE_VALUE_RETRIEVAL(f1b);
1117     T const r1c = TEST_FUTURE_VALUE_RETRIEVAL(f1c);
1118     T const r1d = TEST_FUTURE_VALUE_RETRIEVAL(f1d);
1119 
1120     ASSERT_EQUAL(r0, r1a);
1121     ASSERT_EQUAL(r0, r1b);
1122     ASSERT_EQUAL(r0, r1c);
1123     ASSERT_EQUAL(r0, r1d);
1124   }
1125 };
1126 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES(
1127   test_async_copy_then_reduce
1128 , BuiltinNumericTypes
1129 );
1130 
1131 ///////////////////////////////////////////////////////////////////////////////
1132 
1133 // TODO: when_all from reductions.
1134 
1135 #endif
1136 
1137