1 #define THRUST_ENABLE_FUTURE_RAW_DATA_MEMBER 2 3 #include <thrust/detail/config.h> 4 5 #if THRUST_CPP_DIALECT >= 2011 && !defined(THRUST_LEGACY_GCC) 6 7 #include <unittest/unittest.h> 8 #include <unittest/util_async.h> 9 10 #include <thrust/async/reduce.h> 11 #include <thrust/async/copy.h> 12 #include <thrust/host_vector.h> 13 #include <thrust/device_vector.h> 14 15 template <typename T> 16 struct custom_plus 17 { 18 __host__ __device__ operator ()custom_plus19 T operator()(T lhs, T rhs) const 20 { 21 return lhs + rhs; 22 } 23 }; 24 25 #define DEFINE_STATEFUL_ASYNC_REDUCE_INVOKER( \ 26 NAME, MEMBERS, CTOR, DTOR, VALIDATE, ... \ 27 ) \ 28 template <typename T> \ 29 struct NAME \ 30 { \ 31 MEMBERS \ 32 \ 33 NAME() { CTOR } \ 34 \ 35 ~NAME() { DTOR } \ 36 \ 37 template <typename Event> \ 38 void validate_event(Event& e) \ 39 { \ 40 THRUST_UNUSED_VAR(e); \ 41 VALIDATE \ 42 } \ 43 \ 44 template < \ 45 typename ForwardIt, typename Sentinel \ 46 > \ 47 __host__ \ 48 auto operator()( \ 49 ForwardIt&& first, Sentinel&& last \ 50 ) \ 51 THRUST_DECLTYPE_RETURNS( \ 52 ::thrust::async::reduce( \ 53 __VA_ARGS__ \ 54 ) \ 55 ) \ 56 }; \ 57 /**/ 58 59 #define DEFINE_ASYNC_REDUCE_INVOKER(NAME, ...) \ 60 DEFINE_STATEFUL_ASYNC_REDUCE_INVOKER( \ 61 NAME \ 62 , THRUST_PP_EMPTY(), THRUST_PP_EMPTY(), THRUST_PP_EMPTY(), THRUST_PP_EMPTY()\ 63 , __VA_ARGS__ \ 64 ) \ 65 /**/ 66 67 #define DEFINE_SYNC_REDUCE_INVOKER(NAME, ...) \ 68 template <typename T> \ 69 struct NAME \ 70 { \ 71 \ 72 template < \ 73 typename ForwardIt, typename Sentinel \ 74 > \ 75 __host__ \ 76 auto operator()( \ 77 ForwardIt&& first, Sentinel&& last \ 78 ) \ 79 THRUST_DECLTYPE_RETURNS( \ 80 ::thrust::reduce( \ 81 __VA_ARGS__ \ 82 ) \ 83 ) \ 84 }; \ 85 /**/ 86 87 DEFINE_ASYNC_REDUCE_INVOKER( 88 reduce_async_invoker 89 , THRUST_FWD(first), THRUST_FWD(last) 90 ); 91 DEFINE_ASYNC_REDUCE_INVOKER( 92 reduce_async_invoker_device 93 , thrust::device 94 , THRUST_FWD(first), THRUST_FWD(last) 95 ); 96 DEFINE_ASYNC_REDUCE_INVOKER( 97 reduce_async_invoker_device_allocator 98 , thrust::device(thrust::device_allocator<void>{}) 99 , THRUST_FWD(first), THRUST_FWD(last) 100 ); 101 DEFINE_STATEFUL_ASYNC_REDUCE_INVOKER( 102 reduce_async_invoker_device_on 103 // Members. 104 , cudaStream_t stream_; 105 // Constructor. 106 , thrust::cuda_cub::throw_on_error( 107 cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking) 108 ); 109 // Destructor. 110 , thrust::cuda_cub::throw_on_error( 111 cudaStreamDestroy(stream_) 112 ); 113 // `validate_event` member. 114 , ASSERT_EQUAL_QUIET(stream_, e.stream().native_handle()); 115 // Arguments to `thrust::async::reduce`. 116 , thrust::device.on(stream_) 117 , THRUST_FWD(first), THRUST_FWD(last) 118 ); 119 DEFINE_STATEFUL_ASYNC_REDUCE_INVOKER( 120 reduce_async_invoker_device_allocator_on 121 // Members. 122 , cudaStream_t stream_; 123 // Constructor. 124 , thrust::cuda_cub::throw_on_error( 125 cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking) 126 ); 127 // Destructor. 128 , thrust::cuda_cub::throw_on_error( 129 cudaStreamDestroy(stream_) 130 ); 131 // `validate_event` member. 132 , ASSERT_EQUAL_QUIET(stream_, e.stream().native_handle()); 133 // Arguments to `thrust::async::reduce`. 134 , thrust::device(thrust::device_allocator<void>{}).on(stream_) 135 , THRUST_FWD(first), THRUST_FWD(last) 136 ); 137 138 DEFINE_SYNC_REDUCE_INVOKER( 139 reduce_sync_invoker 140 , THRUST_FWD(first), THRUST_FWD(last) 141 ); 142 143 DEFINE_ASYNC_REDUCE_INVOKER( 144 reduce_async_invoker_init 145 , THRUST_FWD(first), THRUST_FWD(last) 146 , unittest::random_integer<T>() 147 ); 148 DEFINE_ASYNC_REDUCE_INVOKER( 149 reduce_async_invoker_device_init 150 , thrust::device 151 , THRUST_FWD(first), THRUST_FWD(last) 152 , unittest::random_integer<T>() 153 ); 154 DEFINE_ASYNC_REDUCE_INVOKER( 155 reduce_async_invoker_device_allocator_init 156 , thrust::device(thrust::device_allocator<void>{}) 157 , THRUST_FWD(first), THRUST_FWD(last) 158 , unittest::random_integer<T>() 159 ); 160 DEFINE_STATEFUL_ASYNC_REDUCE_INVOKER( 161 reduce_async_invoker_device_on_init 162 // Members. 163 , cudaStream_t stream_; 164 // Constructor. 165 , thrust::cuda_cub::throw_on_error( 166 cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking) 167 ); 168 // Destructor. 169 , thrust::cuda_cub::throw_on_error( 170 cudaStreamDestroy(stream_) 171 ); 172 // `validate_event` member. 173 , ASSERT_EQUAL_QUIET(stream_, e.stream().native_handle()); 174 // Arguments to `thrust::async::reduce`. 175 , thrust::device.on(stream_) 176 , THRUST_FWD(first), THRUST_FWD(last) 177 , unittest::random_integer<T>() 178 ); 179 DEFINE_STATEFUL_ASYNC_REDUCE_INVOKER( 180 reduce_async_invoker_device_allocator_on_init 181 // Members. 182 , cudaStream_t stream_; 183 // Constructor. 184 , thrust::cuda_cub::throw_on_error( 185 cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking) 186 ); 187 // Destructor. 188 , thrust::cuda_cub::throw_on_error( 189 cudaStreamDestroy(stream_) 190 ); 191 // `validate_event` member. 192 , ASSERT_EQUAL_QUIET(stream_, e.stream().native_handle()); 193 // Arguments to `thrust::async::reduce`. 194 , thrust::device(thrust::device_allocator<void>{}).on(stream_) 195 , THRUST_FWD(first), THRUST_FWD(last) 196 , unittest::random_integer<T>() 197 ); 198 199 DEFINE_SYNC_REDUCE_INVOKER( 200 reduce_sync_invoker_init 201 , THRUST_FWD(first), THRUST_FWD(last) 202 , unittest::random_integer<T>() 203 ); 204 205 DEFINE_ASYNC_REDUCE_INVOKER( 206 reduce_async_invoker_init_plus 207 , THRUST_FWD(first), THRUST_FWD(last) 208 , unittest::random_integer<T>() 209 , thrust::plus<T>() 210 ); 211 DEFINE_ASYNC_REDUCE_INVOKER( 212 reduce_async_invoker_device_init_plus 213 , thrust::device 214 , THRUST_FWD(first), THRUST_FWD(last) 215 , unittest::random_integer<T>() 216 , thrust::plus<T>() 217 ); 218 DEFINE_ASYNC_REDUCE_INVOKER( 219 reduce_async_invoker_device_allocator_init_plus 220 , thrust::device(thrust::device_allocator<void>{}) 221 , THRUST_FWD(first), THRUST_FWD(last) 222 , unittest::random_integer<T>() 223 , thrust::plus<T>() 224 ); 225 DEFINE_STATEFUL_ASYNC_REDUCE_INVOKER( 226 reduce_async_invoker_device_on_init_plus 227 // Members. 228 , cudaStream_t stream_; 229 // Constructor. 230 , thrust::cuda_cub::throw_on_error( 231 cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking) 232 ); 233 // Destructor. 234 , thrust::cuda_cub::throw_on_error( 235 cudaStreamDestroy(stream_) 236 ); 237 // `validate_event` member. 238 , ASSERT_EQUAL_QUIET(stream_, e.stream().native_handle()); 239 // Arguments to `thrust::async::reduce`. 240 , thrust::device.on(stream_) 241 , THRUST_FWD(first), THRUST_FWD(last) 242 , unittest::random_integer<T>() 243 , thrust::plus<T>() 244 ); 245 DEFINE_STATEFUL_ASYNC_REDUCE_INVOKER( 246 reduce_async_invoker_device_allocator_on_init_plus 247 // Members. 248 , cudaStream_t stream_; 249 // Constructor. 250 , thrust::cuda_cub::throw_on_error( 251 cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking) 252 ); 253 // Destructor. 254 , thrust::cuda_cub::throw_on_error( 255 cudaStreamDestroy(stream_) 256 ); 257 // `validate_event` member. 258 , ASSERT_EQUAL_QUIET(stream_, e.stream().native_handle()); 259 // Arguments to `thrust::async::reduce`. 260 , thrust::device(thrust::device_allocator<void>{}).on(stream_) 261 , THRUST_FWD(first), THRUST_FWD(last) 262 , unittest::random_integer<T>() 263 , thrust::plus<T>() 264 ); 265 266 DEFINE_SYNC_REDUCE_INVOKER( 267 reduce_sync_invoker_init_plus 268 , THRUST_FWD(first), THRUST_FWD(last) 269 , unittest::random_integer<T>() 270 , thrust::plus<T>() 271 ); 272 273 DEFINE_ASYNC_REDUCE_INVOKER( 274 reduce_async_invoker_init_custom_plus 275 , THRUST_FWD(first), THRUST_FWD(last) 276 , unittest::random_integer<T>() 277 , custom_plus<T>() 278 ); 279 DEFINE_ASYNC_REDUCE_INVOKER( 280 reduce_async_invoker_device_init_custom_plus 281 , thrust::device 282 , THRUST_FWD(first), THRUST_FWD(last) 283 , unittest::random_integer<T>() 284 , custom_plus<T>() 285 ); 286 DEFINE_ASYNC_REDUCE_INVOKER( 287 reduce_async_invoker_device_allocator_init_custom_plus 288 , thrust::device(thrust::device_allocator<void>{}) 289 , THRUST_FWD(first), THRUST_FWD(last) 290 , unittest::random_integer<T>() 291 , custom_plus<T>() 292 ); 293 DEFINE_STATEFUL_ASYNC_REDUCE_INVOKER( 294 reduce_async_invoker_device_on_init_custom_plus 295 // Members. 296 , cudaStream_t stream_; 297 // Constructor. 298 , thrust::cuda_cub::throw_on_error( 299 cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking) 300 ); 301 // Destructor. 302 , thrust::cuda_cub::throw_on_error( 303 cudaStreamDestroy(stream_) 304 ); 305 // `validate_event` member. 306 , ASSERT_EQUAL_QUIET(stream_, e.stream().native_handle()); 307 // Arguments to `thrust::async::reduce`. 308 , thrust::device.on(stream_) 309 , THRUST_FWD(first), THRUST_FWD(last) 310 , unittest::random_integer<T>() 311 , custom_plus<T>() 312 ); 313 DEFINE_STATEFUL_ASYNC_REDUCE_INVOKER( 314 reduce_async_invoker_device_allocator_on_init_custom_plus 315 // Members. 316 , cudaStream_t stream_; 317 // Constructor. 318 , thrust::cuda_cub::throw_on_error( 319 cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking) 320 ); 321 // Destructor. 322 , thrust::cuda_cub::throw_on_error( 323 cudaStreamDestroy(stream_) 324 ); 325 // `validate_event` member. 326 , ASSERT_EQUAL_QUIET(stream_, e.stream().native_handle()); 327 // Arguments to `thrust::async::reduce`. 328 , thrust::device(thrust::device_allocator<void>{}).on(stream_) 329 , THRUST_FWD(first), THRUST_FWD(last) 330 , unittest::random_integer<T>() 331 , custom_plus<T>() 332 ); 333 334 DEFINE_SYNC_REDUCE_INVOKER( 335 reduce_sync_invoker_init_custom_plus 336 , THRUST_FWD(first), THRUST_FWD(last) 337 , unittest::random_integer<T>() 338 , custom_plus<T>() 339 ); 340 341 /////////////////////////////////////////////////////////////////////////////// 342 343 template < 344 template <typename> class AsyncReduceInvoker 345 , template <typename> class SyncReduceInvoker 346 > 347 struct test_async_reduce 348 { 349 template <typename T> 350 struct tester 351 { 352 __host__ operator ()test_async_reduce::tester353 void operator()(std::size_t n) 354 { 355 thrust::host_vector<T> h0(unittest::random_integers<T>(n)); 356 thrust::device_vector<T> d0a(h0); 357 thrust::device_vector<T> d0b(h0); 358 thrust::device_vector<T> d0c(h0); 359 thrust::device_vector<T> d0d(h0); 360 361 AsyncReduceInvoker<T> invoke_async; 362 SyncReduceInvoker<T> invoke_sync; 363 364 ASSERT_EQUAL(h0, d0a); 365 ASSERT_EQUAL(h0, d0b); 366 ASSERT_EQUAL(h0, d0c); 367 ASSERT_EQUAL(h0, d0d); 368 369 auto f0a = invoke_async(d0a.begin(), d0a.end()); 370 auto f0b = invoke_async(d0b.begin(), d0b.end()); 371 auto f0c = invoke_async(d0c.begin(), d0c.end()); 372 auto f0d = invoke_async(d0d.begin(), d0d.end()); 373 374 invoke_async.validate_event(f0a); 375 invoke_async.validate_event(f0b); 376 invoke_async.validate_event(f0c); 377 invoke_async.validate_event(f0d); 378 379 // This potentially runs concurrently with the copies. 380 auto const r0 = invoke_sync(h0.begin(), h0.end()); 381 382 auto const r1a = TEST_FUTURE_VALUE_RETRIEVAL(f0a); 383 auto const r1b = TEST_FUTURE_VALUE_RETRIEVAL(f0b); 384 auto const r1c = TEST_FUTURE_VALUE_RETRIEVAL(f0c); 385 auto const r1d = TEST_FUTURE_VALUE_RETRIEVAL(f0d); 386 387 ASSERT_EQUAL(r0, r1a); 388 ASSERT_EQUAL(r0, r1b); 389 ASSERT_EQUAL(r0, r1c); 390 ASSERT_EQUAL(r0, r1d); 391 } 392 }; 393 }; 394 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME( 395 THRUST_PP_EXPAND_ARGS( 396 test_async_reduce< 397 reduce_async_invoker 398 , reduce_sync_invoker 399 >::tester 400 ) 401 , NumericTypes 402 , test_async_reduce 403 ); 404 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME( 405 THRUST_PP_EXPAND_ARGS( 406 test_async_reduce< 407 reduce_async_invoker_device 408 , reduce_sync_invoker 409 >::tester 410 ) 411 , NumericTypes 412 , test_async_reduce_policy 413 ); 414 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME( 415 THRUST_PP_EXPAND_ARGS( 416 test_async_reduce< 417 reduce_async_invoker_device_allocator 418 , reduce_sync_invoker 419 >::tester 420 ) 421 , NumericTypes 422 , test_async_reduce_policy_allocator 423 ); 424 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME( 425 THRUST_PP_EXPAND_ARGS( 426 test_async_reduce< 427 reduce_async_invoker_device_on 428 , reduce_sync_invoker 429 >::tester 430 ) 431 , NumericTypes 432 , test_async_reduce_policy_on 433 ); 434 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME( 435 THRUST_PP_EXPAND_ARGS( 436 test_async_reduce< 437 reduce_async_invoker_device_allocator_on 438 , reduce_sync_invoker 439 >::tester 440 ) 441 , NumericTypes 442 , test_async_reduce_policy_allocator_on 443 ); 444 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME( 445 THRUST_PP_EXPAND_ARGS( 446 test_async_reduce< 447 reduce_async_invoker_init 448 , reduce_sync_invoker_init 449 >::tester 450 ) 451 , NumericTypes 452 , test_async_reduce_init 453 ); 454 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME( 455 THRUST_PP_EXPAND_ARGS( 456 test_async_reduce< 457 reduce_async_invoker_device_init 458 , reduce_sync_invoker_init 459 >::tester 460 ) 461 , NumericTypes 462 , test_async_reduce_policy_init 463 ); 464 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME( 465 THRUST_PP_EXPAND_ARGS( 466 test_async_reduce< 467 reduce_async_invoker_device_allocator_init 468 , reduce_sync_invoker_init 469 >::tester 470 ) 471 , NumericTypes 472 , test_async_reduce_policy_allocator_init 473 ); 474 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME( 475 THRUST_PP_EXPAND_ARGS( 476 test_async_reduce< 477 reduce_async_invoker_device_on_init 478 , reduce_sync_invoker_init 479 >::tester 480 ) 481 , NumericTypes 482 , test_async_reduce_policy_on_init 483 ); 484 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME( 485 THRUST_PP_EXPAND_ARGS( 486 test_async_reduce< 487 reduce_async_invoker_device_allocator_on_init 488 , reduce_sync_invoker_init 489 >::tester 490 ) 491 , NumericTypes 492 , test_async_reduce_policy_allocator_on_init 493 ); 494 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME( 495 THRUST_PP_EXPAND_ARGS( 496 test_async_reduce< 497 reduce_async_invoker_init_plus 498 , reduce_sync_invoker_init_plus 499 >::tester 500 ) 501 , NumericTypes 502 , test_async_reduce_init_plus 503 ); 504 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME( 505 THRUST_PP_EXPAND_ARGS( 506 test_async_reduce< 507 reduce_async_invoker_device_init_plus 508 , reduce_sync_invoker_init_plus 509 >::tester 510 ) 511 , NumericTypes 512 , test_async_reduce_policy_init_plus 513 ); 514 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME( 515 THRUST_PP_EXPAND_ARGS( 516 test_async_reduce< 517 reduce_async_invoker_device_allocator_init_plus 518 , reduce_sync_invoker_init_plus 519 >::tester 520 ) 521 , NumericTypes 522 , test_async_reduce_policy_allocator_init_plus 523 ); 524 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME( 525 THRUST_PP_EXPAND_ARGS( 526 test_async_reduce< 527 reduce_async_invoker_device_on_init_plus 528 , reduce_sync_invoker_init_plus 529 >::tester 530 ) 531 , NumericTypes 532 , test_async_reduce_policy_on_init_plus 533 ); 534 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME( 535 THRUST_PP_EXPAND_ARGS( 536 test_async_reduce< 537 reduce_async_invoker_device_allocator_on_init_plus 538 , reduce_sync_invoker_init_plus 539 >::tester 540 ) 541 , NumericTypes 542 , test_async_reduce_policy_allocator_on_init_plus 543 ); 544 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME( 545 THRUST_PP_EXPAND_ARGS( 546 test_async_reduce< 547 reduce_async_invoker_init_custom_plus 548 , reduce_sync_invoker_init_custom_plus 549 >::tester 550 ) 551 , NumericTypes 552 , test_async_reduce_init_custom_plus 553 ); 554 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME( 555 THRUST_PP_EXPAND_ARGS( 556 test_async_reduce< 557 reduce_async_invoker_device_init_custom_plus 558 , reduce_sync_invoker_init_custom_plus 559 >::tester 560 ) 561 , NumericTypes 562 , test_async_reduce_policy_init_custom_plus 563 ); 564 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME( 565 THRUST_PP_EXPAND_ARGS( 566 test_async_reduce< 567 reduce_async_invoker_device_allocator_init_custom_plus 568 , reduce_sync_invoker_init_custom_plus 569 >::tester 570 ) 571 , NumericTypes 572 , test_async_reduce_policy_allocator_init_custom_plus 573 ); 574 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME( 575 THRUST_PP_EXPAND_ARGS( 576 test_async_reduce< 577 reduce_async_invoker_device_on_init_custom_plus 578 , reduce_sync_invoker_init_custom_plus 579 >::tester 580 ) 581 , NumericTypes 582 , test_async_reduce_policy_on_init_custom_plus 583 ); 584 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME( 585 THRUST_PP_EXPAND_ARGS( 586 test_async_reduce< 587 reduce_async_invoker_device_allocator_on_init_custom_plus 588 , reduce_sync_invoker_init_custom_plus 589 >::tester 590 ) 591 , NumericTypes 592 , test_async_reduce_policy_allocator_on_init_custom_plus 593 ); 594 595 /////////////////////////////////////////////////////////////////////////////// 596 597 template < 598 template <typename> class AsyncReduceInvoker 599 , template <typename> class SyncReduceInvoker 600 > 601 struct test_async_reduce_counting_iterator 602 { 603 template <typename T> 604 struct tester 605 { 606 __host__ operator ()test_async_reduce_counting_iterator::tester607 void operator()() 608 { 609 constexpr std::size_t n = 15 * sizeof(T); 610 611 ASSERT_LEQUAL(T(n), unittest::truncate_to_max_representable<T>(n)); 612 613 thrust::counting_iterator<T> first(0); 614 thrust::counting_iterator<T> last(n); 615 616 AsyncReduceInvoker<T> invoke_async; 617 SyncReduceInvoker<T> invoke_sync; 618 619 auto f0a = invoke_async(first, last); 620 auto f0b = invoke_async(first, last); 621 auto f0c = invoke_async(first, last); 622 auto f0d = invoke_async(first, last); 623 624 invoke_async.validate_event(f0a); 625 invoke_async.validate_event(f0b); 626 invoke_async.validate_event(f0c); 627 invoke_async.validate_event(f0d); 628 629 // This potentially runs concurrently with the copies. 630 auto const r0 = invoke_sync(first, last); 631 632 auto const r1a = TEST_FUTURE_VALUE_RETRIEVAL(f0a); 633 auto const r1b = TEST_FUTURE_VALUE_RETRIEVAL(f0b); 634 auto const r1c = TEST_FUTURE_VALUE_RETRIEVAL(f0c); 635 auto const r1d = TEST_FUTURE_VALUE_RETRIEVAL(f0d); 636 637 ASSERT_EQUAL(r0, r1a); 638 ASSERT_EQUAL(r0, r1b); 639 ASSERT_EQUAL(r0, r1c); 640 ASSERT_EQUAL(r0, r1d); 641 } 642 }; 643 }; 644 DECLARE_GENERIC_UNITTEST_WITH_TYPES_AND_NAME( 645 THRUST_PP_EXPAND_ARGS( 646 test_async_reduce_counting_iterator< 647 reduce_async_invoker 648 , reduce_sync_invoker 649 >::tester 650 ) 651 , BuiltinNumericTypes 652 , test_async_reduce_counting_iterator 653 ); 654 DECLARE_GENERIC_UNITTEST_WITH_TYPES_AND_NAME( 655 THRUST_PP_EXPAND_ARGS( 656 test_async_reduce_counting_iterator< 657 reduce_async_invoker_device 658 , reduce_sync_invoker 659 >::tester 660 ) 661 , BuiltinNumericTypes 662 , test_async_reduce_policy_counting_iterator 663 ); 664 DECLARE_GENERIC_UNITTEST_WITH_TYPES_AND_NAME( 665 THRUST_PP_EXPAND_ARGS( 666 test_async_reduce_counting_iterator< 667 reduce_async_invoker_init 668 , reduce_sync_invoker_init 669 >::tester 670 ) 671 , BuiltinNumericTypes 672 , test_async_reduce_counting_iterator_init 673 ); 674 DECLARE_GENERIC_UNITTEST_WITH_TYPES_AND_NAME( 675 THRUST_PP_EXPAND_ARGS( 676 test_async_reduce_counting_iterator< 677 reduce_async_invoker_device_init 678 , reduce_sync_invoker_init 679 >::tester 680 ) 681 , BuiltinNumericTypes 682 , test_async_reduce_policy_counting_iterator_init 683 ); 684 DECLARE_GENERIC_UNITTEST_WITH_TYPES_AND_NAME( 685 THRUST_PP_EXPAND_ARGS( 686 test_async_reduce_counting_iterator< 687 reduce_async_invoker_init_plus 688 , reduce_sync_invoker_init_plus 689 >::tester 690 ) 691 , BuiltinNumericTypes 692 , test_async_reduce_counting_iterator_init_plus 693 ); 694 DECLARE_GENERIC_UNITTEST_WITH_TYPES_AND_NAME( 695 THRUST_PP_EXPAND_ARGS( 696 test_async_reduce_counting_iterator< 697 reduce_async_invoker_device_init_plus 698 , reduce_sync_invoker_init_plus 699 >::tester 700 ) 701 , BuiltinNumericTypes 702 , test_async_reduce_policy_counting_iterator_init_plus 703 ); 704 DECLARE_GENERIC_UNITTEST_WITH_TYPES_AND_NAME( 705 THRUST_PP_EXPAND_ARGS( 706 test_async_reduce_counting_iterator< 707 reduce_async_invoker_init_custom_plus 708 , reduce_sync_invoker_init_custom_plus 709 >::tester 710 ) 711 , BuiltinNumericTypes 712 , test_async_reduce_counting_iterator_init_custom_plus 713 ); 714 DECLARE_GENERIC_UNITTEST_WITH_TYPES_AND_NAME( 715 THRUST_PP_EXPAND_ARGS( 716 test_async_reduce_counting_iterator< 717 reduce_async_invoker_device_init_custom_plus 718 , reduce_sync_invoker_init_custom_plus 719 >::tester 720 ) 721 , BuiltinNumericTypes 722 , test_async_reduce_policy_counting_iterator_init_custom_plus 723 ); 724 725 /////////////////////////////////////////////////////////////////////////////// 726 727 template <typename T> 728 struct test_async_reduce_using 729 { 730 __host__ operator ()test_async_reduce_using731 void operator()(std::size_t n) 732 { 733 thrust::host_vector<T> h0(unittest::random_integers<T>(n)); 734 thrust::device_vector<T> d0a(h0); 735 thrust::device_vector<T> d0b(h0); 736 737 ASSERT_EQUAL(h0, d0a); 738 ASSERT_EQUAL(h0, d0b); 739 740 thrust::device_future<T> f0a; 741 thrust::device_future<T> f0b; 742 743 // When you import the customization points into the global namespace, 744 // they should be selected instead of the synchronous algorithms. 745 { 746 using namespace thrust::async; 747 f0a = reduce(d0a.begin(), d0a.end()); 748 } 749 { 750 using thrust::async::reduce; 751 f0b = reduce(d0b.begin(), d0b.end()); 752 } 753 754 // ADL should find the synchronous algorithms. 755 // This potentially runs concurrently with the copies. 756 T const r0 = reduce(h0.begin(), h0.end()); 757 758 T const r1a = TEST_FUTURE_VALUE_RETRIEVAL(f0a); 759 T const r1b = TEST_FUTURE_VALUE_RETRIEVAL(f0b); 760 761 ASSERT_EQUAL(r0, r1a); 762 ASSERT_EQUAL(r0, r1b); 763 } 764 }; 765 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES( 766 test_async_reduce_using 767 , NumericTypes 768 ); 769 770 /////////////////////////////////////////////////////////////////////////////// 771 772 template <typename T> 773 struct test_async_reduce_after 774 { 775 __host__ operator ()test_async_reduce_after776 void operator()(std::size_t n) 777 { 778 thrust::host_vector<T> h0(unittest::random_integers<T>(n)); 779 thrust::device_vector<T> d0(h0); 780 781 ASSERT_EQUAL(h0, d0); 782 783 auto f0 = thrust::async::reduce( 784 d0.begin(), d0.end() 785 ); 786 787 ASSERT_EQUAL(true, f0.valid_stream()); 788 789 auto const f0_stream = f0.stream().native_handle(); 790 791 auto f1 = thrust::async::reduce( 792 thrust::device.after(f0), d0.begin(), d0.end() 793 ); 794 795 // Verify that double consumption of a future produces an exception. 796 ASSERT_THROWS_EQUAL( 797 auto x = thrust::async::reduce( 798 thrust::device.after(f0), d0.begin(), d0.end() 799 ); 800 THRUST_UNUSED_VAR(x) 801 , thrust::event_error 802 , thrust::event_error(thrust::event_errc::no_state) 803 ); 804 805 ASSERT_EQUAL_QUIET(f0_stream, f1.stream().native_handle()); 806 807 auto after_policy2 = thrust::device.after(f1); 808 809 auto f2 = thrust::async::reduce( 810 after_policy2, d0.begin(), d0.end() 811 ); 812 813 // Verify that double consumption of a policy produces an exception. 814 ASSERT_THROWS_EQUAL( 815 auto x = thrust::async::reduce( 816 after_policy2, d0.begin(), d0.end() 817 ); 818 THRUST_UNUSED_VAR(x) 819 , thrust::event_error 820 , thrust::event_error(thrust::event_errc::no_state) 821 ); 822 823 ASSERT_EQUAL_QUIET(f0_stream, f2.stream().native_handle()); 824 825 // This potentially runs concurrently with the copies. 826 T const r0 = thrust::reduce(h0.begin(), h0.end()); 827 828 T const r1 = TEST_FUTURE_VALUE_RETRIEVAL(f2); 829 830 ASSERT_EQUAL(r0, r1); 831 } 832 }; 833 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES( 834 test_async_reduce_after 835 , NumericTypes 836 ); 837 838 /////////////////////////////////////////////////////////////////////////////// 839 840 template <typename T> 841 struct test_async_reduce_on_then_after 842 { 843 __host__ operator ()test_async_reduce_on_then_after844 void operator()(std::size_t n) 845 { 846 thrust::host_vector<T> h0(unittest::random_integers<T>(n)); 847 thrust::device_vector<T> d0(h0); 848 849 ASSERT_EQUAL(h0, d0); 850 851 cudaStream_t stream; 852 thrust::cuda_cub::throw_on_error( 853 cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking) 854 ); 855 856 auto f0 = thrust::async::reduce( 857 thrust::device.on(stream), d0.begin(), d0.end() 858 ); 859 860 ASSERT_EQUAL_QUIET(stream, f0.stream().native_handle()); 861 862 auto f1 = thrust::async::reduce( 863 thrust::device.after(f0), d0.begin(), d0.end() 864 ); 865 866 // Verify that double consumption of a future produces an exception. 867 ASSERT_THROWS_EQUAL( 868 auto x = thrust::async::reduce( 869 thrust::device.after(f0), d0.begin(), d0.end() 870 ); 871 THRUST_UNUSED_VAR(x) 872 , thrust::event_error 873 , thrust::event_error(thrust::event_errc::no_state) 874 ); 875 876 ASSERT_EQUAL_QUIET(stream, f1.stream().native_handle()); 877 878 auto after_policy2 = thrust::device.after(f1); 879 880 auto f2 = thrust::async::reduce( 881 after_policy2, d0.begin(), d0.end() 882 ); 883 884 // Verify that double consumption of a policy produces an exception. 885 ASSERT_THROWS_EQUAL( 886 auto x = thrust::async::reduce( 887 after_policy2, d0.begin(), d0.end() 888 ); 889 THRUST_UNUSED_VAR(x) 890 , thrust::event_error 891 , thrust::event_error(thrust::event_errc::no_state) 892 ); 893 894 ASSERT_EQUAL_QUIET(stream, f2.stream().native_handle()); 895 896 // This potentially runs concurrently with the copies. 897 T const r0 = thrust::reduce(h0.begin(), h0.end()); 898 899 T const r1 = TEST_FUTURE_VALUE_RETRIEVAL(f2); 900 901 ASSERT_EQUAL(r0, r1); 902 903 thrust::cuda_cub::throw_on_error( 904 cudaStreamDestroy(stream) 905 ); 906 } 907 }; 908 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES( 909 test_async_reduce_on_then_after 910 , NumericTypes 911 ); 912 913 /////////////////////////////////////////////////////////////////////////////// 914 915 template <typename T> 916 struct test_async_reduce_allocator_on_then_after 917 { 918 __host__ operator ()test_async_reduce_allocator_on_then_after919 void operator()(std::size_t n) 920 { 921 thrust::host_vector<T> h0(unittest::random_integers<T>(n)); 922 thrust::device_vector<T> d0(h0); 923 924 ASSERT_EQUAL(h0, d0); 925 926 cudaStream_t stream0; 927 thrust::cuda_cub::throw_on_error( 928 cudaStreamCreateWithFlags(&stream0, cudaStreamNonBlocking) 929 ); 930 931 cudaStream_t stream1; 932 thrust::cuda_cub::throw_on_error( 933 cudaStreamCreateWithFlags(&stream1, cudaStreamNonBlocking) 934 ); 935 936 auto f0 = thrust::async::reduce( 937 thrust::device(thrust::device_allocator<void>{}).on(stream0) 938 , d0.begin(), d0.end() 939 ); 940 941 ASSERT_EQUAL_QUIET(stream0, f0.stream().native_handle()); 942 943 auto f1 = thrust::async::reduce( 944 thrust::device(thrust::device_allocator<void>{}).after(f0) 945 , d0.begin(), d0.end() 946 ); 947 948 ASSERT_THROWS_EQUAL( 949 auto x = thrust::async::reduce( 950 thrust::device(thrust::device_allocator<void>{}).after(f0) 951 , d0.begin(), d0.end() 952 ); 953 THRUST_UNUSED_VAR(x) 954 , thrust::event_error 955 , thrust::event_error(thrust::event_errc::no_state) 956 ); 957 958 ASSERT_EQUAL_QUIET(stream0, f1.stream().native_handle()); 959 960 auto f2 = thrust::async::reduce( 961 thrust::device(thrust::device_allocator<void>{}).on(stream1).after(f1) 962 , d0.begin(), d0.end() 963 ); 964 965 ASSERT_THROWS_EQUAL( 966 auto x = thrust::async::reduce( 967 thrust::device(thrust::device_allocator<void>{}).on(stream1).after(f1) 968 , d0.begin(), d0.end() 969 ); 970 THRUST_UNUSED_VAR(x) 971 , thrust::event_error 972 , thrust::event_error(thrust::event_errc::no_state) 973 ); 974 975 KNOWN_FAILURE; 976 // FIXME: The below fails because you can't combine allocator attachment, 977 // `.on`, and `.after`. 978 ASSERT_EQUAL_QUIET(stream1, f2.stream().native_handle()); 979 980 // This potentially runs concurrently with the copies. 981 T const r0 = thrust::reduce(h0.begin(), h0.end()); 982 983 T const r1 = TEST_FUTURE_VALUE_RETRIEVAL(f2); 984 985 ASSERT_EQUAL(r0, r1); 986 987 thrust::cuda_cub::throw_on_error(cudaStreamDestroy(stream0)); 988 thrust::cuda_cub::throw_on_error(cudaStreamDestroy(stream1)); 989 } 990 }; 991 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES( 992 test_async_reduce_allocator_on_then_after 993 , NumericTypes 994 ); 995 996 /////////////////////////////////////////////////////////////////////////////// 997 998 template <typename T> 999 struct test_async_reduce_caching 1000 { 1001 __host__ operator ()test_async_reduce_caching1002 void operator()(std::size_t n) 1003 { 1004 constexpr std::int64_t m = 32; 1005 1006 thrust::host_vector<T> h0(unittest::random_integers<T>(n)); 1007 thrust::device_vector<T> d0(h0); 1008 1009 ASSERT_EQUAL(h0, d0); 1010 1011 T const* f0_raw_data; 1012 1013 { 1014 // Perform one reduction to ensure there's an entry in the caching 1015 // allocator. 1016 auto f0 = thrust::async::reduce(d0.begin(), d0.end()); 1017 1018 TEST_EVENT_WAIT(f0); 1019 1020 f0_raw_data = f0.raw_data(); 1021 } 1022 1023 for (std::int64_t i = 0; i < m; ++i) 1024 { 1025 auto f1 = thrust::async::reduce(d0.begin(), d0.end()); 1026 1027 ASSERT_EQUAL(true, f1.valid_stream()); 1028 ASSERT_EQUAL(true, f1.valid_content()); 1029 1030 ASSERT_EQUAL_QUIET(f0_raw_data, f1.raw_data()); 1031 1032 // This potentially runs concurrently with the copies. 1033 T const r0 = thrust::reduce(h0.begin(), h0.end()); 1034 1035 T const r1 = TEST_FUTURE_VALUE_RETRIEVAL(f1); 1036 1037 ASSERT_EQUAL(r0, r1); 1038 } 1039 } 1040 }; 1041 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES( 1042 test_async_reduce_caching 1043 , NumericTypes 1044 ); 1045 1046 /////////////////////////////////////////////////////////////////////////////// 1047 1048 template <typename T> 1049 struct test_async_copy_then_reduce 1050 { 1051 __host__ operator ()test_async_copy_then_reduce1052 void operator()(std::size_t n) 1053 { 1054 thrust::host_vector<T> h0a(unittest::random_integers<T>(n)); 1055 thrust::host_vector<T> h0b(unittest::random_integers<T>(n)); 1056 thrust::host_vector<T> h0c(unittest::random_integers<T>(n)); 1057 thrust::host_vector<T> h0d(unittest::random_integers<T>(n)); 1058 1059 thrust::device_vector<T> d0a(n); 1060 thrust::device_vector<T> d0b(n); 1061 thrust::device_vector<T> d0c(n); 1062 thrust::device_vector<T> d0d(n); 1063 1064 auto f0a = thrust::async::copy(h0a.begin(), h0a.end(), d0a.begin()); 1065 auto f0b = thrust::async::copy(h0b.begin(), h0b.end(), d0b.begin()); 1066 auto f0c = thrust::async::copy(h0c.begin(), h0c.end(), d0c.begin()); 1067 auto f0d = thrust::async::copy(h0d.begin(), h0d.end(), d0d.begin()); 1068 1069 ASSERT_EQUAL(true, f0a.valid_stream()); 1070 ASSERT_EQUAL(true, f0b.valid_stream()); 1071 ASSERT_EQUAL(true, f0c.valid_stream()); 1072 ASSERT_EQUAL(true, f0d.valid_stream()); 1073 1074 auto const f0a_stream = f0a.stream().native_handle(); 1075 auto const f0b_stream = f0b.stream().native_handle(); 1076 auto const f0c_stream = f0c.stream().native_handle(); 1077 auto const f0d_stream = f0d.stream().native_handle(); 1078 1079 auto f1a = thrust::async::reduce( 1080 thrust::device.after(f0a), d0a.begin(), d0a.end() 1081 ); 1082 auto f1b = thrust::async::reduce( 1083 thrust::device.after(f0b), d0b.begin(), d0b.end() 1084 ); 1085 auto f1c = thrust::async::reduce( 1086 thrust::device.after(f0c), d0c.begin(), d0c.end() 1087 ); 1088 auto f1d = thrust::async::reduce( 1089 thrust::device.after(f0d), d0d.begin(), d0d.end() 1090 ); 1091 1092 ASSERT_EQUAL(false, f0a.valid_stream()); 1093 ASSERT_EQUAL(false, f0b.valid_stream()); 1094 ASSERT_EQUAL(false, f0c.valid_stream()); 1095 ASSERT_EQUAL(false, f0d.valid_stream()); 1096 1097 ASSERT_EQUAL(true, f1a.valid_stream()); 1098 ASSERT_EQUAL(true, f1a.valid_content()); 1099 ASSERT_EQUAL(true, f1b.valid_stream()); 1100 ASSERT_EQUAL(true, f1b.valid_content()); 1101 ASSERT_EQUAL(true, f1c.valid_stream()); 1102 ASSERT_EQUAL(true, f1c.valid_content()); 1103 ASSERT_EQUAL(true, f1d.valid_stream()); 1104 ASSERT_EQUAL(true, f1d.valid_content()); 1105 1106 // Verify that streams were stolen. 1107 ASSERT_EQUAL_QUIET(f0a_stream, f1a.stream().native_handle()); 1108 ASSERT_EQUAL_QUIET(f0b_stream, f1b.stream().native_handle()); 1109 ASSERT_EQUAL_QUIET(f0c_stream, f1c.stream().native_handle()); 1110 ASSERT_EQUAL_QUIET(f0d_stream, f1d.stream().native_handle()); 1111 1112 // This potentially runs concurrently with the copies. 1113 T const r0 = thrust::reduce(h0a.begin(), h0a.end()); 1114 1115 T const r1a = TEST_FUTURE_VALUE_RETRIEVAL(f1a); 1116 T const r1b = TEST_FUTURE_VALUE_RETRIEVAL(f1b); 1117 T const r1c = TEST_FUTURE_VALUE_RETRIEVAL(f1c); 1118 T const r1d = TEST_FUTURE_VALUE_RETRIEVAL(f1d); 1119 1120 ASSERT_EQUAL(r0, r1a); 1121 ASSERT_EQUAL(r0, r1b); 1122 ASSERT_EQUAL(r0, r1c); 1123 ASSERT_EQUAL(r0, r1d); 1124 } 1125 }; 1126 DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES( 1127 test_async_copy_then_reduce 1128 , BuiltinNumericTypes 1129 ); 1130 1131 /////////////////////////////////////////////////////////////////////////////// 1132 1133 // TODO: when_all from reductions. 1134 1135 #endif 1136 1137