1 /****************************************************************************** 2 * Copyright (c) 2011, Duane Merrill. All rights reserved. 3 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are met: 7 * * Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * * Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * * Neither the name of the NVIDIA CORPORATION nor the 13 * names of its contributors may be used to endorse or promote products 14 * derived from this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY 20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 * 27 ******************************************************************************/ 28 29 /** 30 * \file 31 * Common type manipulation (metaprogramming) utilities 32 */ 33 34 #pragma once 35 36 #include <iostream> 37 #include <limits> 38 #include <cfloat> 39 40 #if (__CUDACC_VER_MAJOR__ >= 9) 41 #include <cuda_fp16.h> 42 #endif 43 44 #include "util_macro.cuh" 45 #include "util_arch.cuh" 46 #include "util_namespace.cuh" 47 48 49 50 /// Optional outer namespace(s) 51 CUB_NS_PREFIX 52 53 /// CUB namespace 54 namespace cub { 55 56 57 /** 58 * \addtogroup UtilModule 59 * @{ 60 */ 61 62 63 64 /****************************************************************************** 65 * Type equality 66 ******************************************************************************/ 67 68 /** 69 * \brief Type selection (<tt>IF ? ThenType : ElseType</tt>) 70 */ 71 template <bool IF, typename ThenType, typename ElseType> 72 struct If 73 { 74 /// Conditional type result 75 typedef ThenType Type; // true 76 }; 77 78 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document 79 80 template <typename ThenType, typename ElseType> 81 struct If<false, ThenType, ElseType> 82 { 83 typedef ElseType Type; // false 84 }; 85 86 #endif // DOXYGEN_SHOULD_SKIP_THIS 87 88 89 90 /****************************************************************************** 91 * Conditional types 92 ******************************************************************************/ 93 94 /** 95 * \brief Type equality test 96 */ 97 template <typename A, typename B> 98 struct Equals 99 { 100 enum { 101 VALUE = 0, 102 NEGATE = 1 103 }; 104 }; 105 106 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document 107 108 template <typename A> 109 struct Equals <A, A> 110 { 111 enum { 112 VALUE = 1, 113 NEGATE = 0 114 }; 115 }; 116 117 #endif // DOXYGEN_SHOULD_SKIP_THIS 118 119 120 /****************************************************************************** 121 * Static math 122 ******************************************************************************/ 123 124 /** 125 * \brief Statically determine log2(N), rounded up. 126 * 127 * For example: 128 * Log2<8>::VALUE // 3 129 * Log2<3>::VALUE // 2 130 */ 131 template <int N, int CURRENT_VAL = N, int COUNT = 0> 132 struct Log2 133 { 134 /// Static logarithm value 135 enum { VALUE = Log2<N, (CURRENT_VAL >> 1), COUNT + 1>::VALUE }; // Inductive case 136 }; 137 138 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document 139 140 template <int N, int COUNT> 141 struct Log2<N, 0, COUNT> 142 { 143 enum {VALUE = (1 << (COUNT - 1) < N) ? // Base case 144 COUNT : 145 COUNT - 1 }; 146 }; 147 148 #endif // DOXYGEN_SHOULD_SKIP_THIS 149 150 151 /** 152 * \brief Statically determine if N is a power-of-two 153 */ 154 template <int N> 155 struct PowerOfTwo 156 { 157 enum { VALUE = ((N & (N - 1)) == 0) }; 158 }; 159 160 161 162 /****************************************************************************** 163 * Pointer vs. iterator detection 164 ******************************************************************************/ 165 166 /** 167 * \brief Pointer vs. iterator 168 */ 169 template <typename Tp> 170 struct IsPointer 171 { 172 enum { VALUE = 0 }; 173 }; 174 175 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document 176 177 template <typename Tp> 178 struct IsPointer<Tp*> 179 { 180 enum { VALUE = 1 }; 181 }; 182 183 #endif // DOXYGEN_SHOULD_SKIP_THIS 184 185 186 187 /****************************************************************************** 188 * Qualifier detection 189 ******************************************************************************/ 190 191 /** 192 * \brief Volatile modifier test 193 */ 194 template <typename Tp> 195 struct IsVolatile 196 { 197 enum { VALUE = 0 }; 198 }; 199 200 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document 201 202 template <typename Tp> 203 struct IsVolatile<Tp volatile> 204 { 205 enum { VALUE = 1 }; 206 }; 207 208 #endif // DOXYGEN_SHOULD_SKIP_THIS 209 210 211 /****************************************************************************** 212 * Qualifier removal 213 ******************************************************************************/ 214 215 /** 216 * \brief Removes \p const and \p volatile qualifiers from type \p Tp. 217 * 218 * For example: 219 * <tt>typename RemoveQualifiers<volatile int>::Type // int;</tt> 220 */ 221 template <typename Tp, typename Up = Tp> 222 struct RemoveQualifiers 223 { 224 /// Type without \p const and \p volatile qualifiers 225 typedef Up Type; 226 }; 227 228 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document 229 230 template <typename Tp, typename Up> 231 struct RemoveQualifiers<Tp, volatile Up> 232 { 233 typedef Up Type; 234 }; 235 236 template <typename Tp, typename Up> 237 struct RemoveQualifiers<Tp, const Up> 238 { 239 typedef Up Type; 240 }; 241 242 template <typename Tp, typename Up> 243 struct RemoveQualifiers<Tp, const volatile Up> 244 { 245 typedef Up Type; 246 }; 247 248 249 /****************************************************************************** 250 * Marker types 251 ******************************************************************************/ 252 253 /** 254 * \brief A simple "NULL" marker type 255 */ 256 struct NullType 257 { 258 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document 259 260 template <typename T> operator =cub::NullType261 __host__ __device__ __forceinline__ NullType& operator =(const T&) { return *this; } 262 operator ==cub::NullType263 __host__ __device__ __forceinline__ bool operator ==(const NullType&) { return true; } 264 operator !=cub::NullType265 __host__ __device__ __forceinline__ bool operator !=(const NullType&) { return false; } 266 267 #endif // DOXYGEN_SHOULD_SKIP_THIS 268 }; 269 270 271 /** 272 * \brief Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static call dispatch based on constant integral values) 273 */ 274 template <int A> 275 struct Int2Type 276 { 277 enum {VALUE = A}; 278 }; 279 280 281 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document 282 283 284 /****************************************************************************** 285 * Size and alignment 286 ******************************************************************************/ 287 288 /// Structure alignment 289 template <typename T> 290 struct AlignBytes 291 { 292 struct Pad 293 { 294 T val; 295 char byte; 296 }; 297 298 enum 299 { 300 /// The "true CUDA" alignment of T in bytes 301 ALIGN_BYTES = sizeof(Pad) - sizeof(T) 302 }; 303 304 /// The "truly aligned" type 305 typedef T Type; 306 }; 307 308 // Specializations where host C++ compilers (e.g., 32-bit Windows) may disagree 309 // with device C++ compilers (EDG) on types passed as template parameters through 310 // kernel functions 311 312 #define __CUB_ALIGN_BYTES(t, b) \ 313 template <> struct AlignBytes<t> \ 314 { enum { ALIGN_BYTES = b }; typedef __align__(b) t Type; }; 315 316 __CUB_ALIGN_BYTES(short4, 8) 317 __CUB_ALIGN_BYTES(ushort4, 8) 318 __CUB_ALIGN_BYTES(int2, 8) 319 __CUB_ALIGN_BYTES(uint2, 8) 320 __CUB_ALIGN_BYTES(long long, 8) 321 __CUB_ALIGN_BYTES(unsigned long long, 8) 322 __CUB_ALIGN_BYTES(float2, 8) 323 __CUB_ALIGN_BYTES(double, 8) 324 #ifdef _WIN32 325 __CUB_ALIGN_BYTES(long2, 8) 326 __CUB_ALIGN_BYTES(ulong2, 8) 327 #else 328 __CUB_ALIGN_BYTES(long2, 16) 329 __CUB_ALIGN_BYTES(ulong2, 16) 330 #endif 331 __CUB_ALIGN_BYTES(int4, 16) 332 __CUB_ALIGN_BYTES(uint4, 16) 333 __CUB_ALIGN_BYTES(float4, 16) 334 __CUB_ALIGN_BYTES(long4, 16) 335 __CUB_ALIGN_BYTES(ulong4, 16) 336 __CUB_ALIGN_BYTES(longlong2, 16) 337 __CUB_ALIGN_BYTES(ulonglong2, 16) 338 __CUB_ALIGN_BYTES(double2, 16) 339 __CUB_ALIGN_BYTES(longlong4, 16) 340 __CUB_ALIGN_BYTES(ulonglong4, 16) 341 __CUB_ALIGN_BYTES(double4, 16) 342 343 template <typename T> struct AlignBytes<volatile T> : AlignBytes<T> {}; 344 template <typename T> struct AlignBytes<const T> : AlignBytes<T> {}; 345 template <typename T> struct AlignBytes<const volatile T> : AlignBytes<T> {}; 346 347 348 /// Unit-words of data movement 349 template <typename T> 350 struct UnitWord 351 { 352 enum { 353 ALIGN_BYTES = AlignBytes<T>::ALIGN_BYTES 354 }; 355 356 template <typename Unit> 357 struct IsMultiple 358 { 359 enum { 360 UNIT_ALIGN_BYTES = AlignBytes<Unit>::ALIGN_BYTES, 361 IS_MULTIPLE = (sizeof(T) % sizeof(Unit) == 0) && (ALIGN_BYTES % UNIT_ALIGN_BYTES == 0) 362 }; 363 }; 364 365 /// Biggest shuffle word that T is a whole multiple of and is not larger than the alignment of T 366 typedef typename If<IsMultiple<int>::IS_MULTIPLE, 367 unsigned int, 368 typename If<IsMultiple<short>::IS_MULTIPLE, 369 unsigned short, 370 unsigned char>::Type>::Type ShuffleWord; 371 372 /// Biggest volatile word that T is a whole multiple of and is not larger than the alignment of T 373 typedef typename If<IsMultiple<long long>::IS_MULTIPLE, 374 unsigned long long, 375 ShuffleWord>::Type VolatileWord; 376 377 /// Biggest memory-access word that T is a whole multiple of and is not larger than the alignment of T 378 typedef typename If<IsMultiple<longlong2>::IS_MULTIPLE, 379 ulonglong2, 380 VolatileWord>::Type DeviceWord; 381 382 /// Biggest texture reference word that T is a whole multiple of and is not larger than the alignment of T 383 typedef typename If<IsMultiple<int4>::IS_MULTIPLE, 384 uint4, 385 typename If<IsMultiple<int2>::IS_MULTIPLE, 386 uint2, 387 ShuffleWord>::Type>::Type TextureWord; 388 }; 389 390 391 // float2 specialization workaround (for SM10-SM13) 392 template <> 393 struct UnitWord <float2> 394 { 395 typedef int ShuffleWord; 396 #if (CUB_PTX_ARCH > 0) && (CUB_PTX_ARCH <= 130) 397 typedef float VolatileWord; 398 typedef uint2 DeviceWord; 399 #else 400 typedef unsigned long long VolatileWord; 401 typedef unsigned long long DeviceWord; 402 #endif 403 typedef float2 TextureWord; 404 }; 405 406 // float4 specialization workaround (for SM10-SM13) 407 template <> 408 struct UnitWord <float4> 409 { 410 typedef int ShuffleWord; 411 #if (CUB_PTX_ARCH > 0) && (CUB_PTX_ARCH <= 130) 412 typedef float VolatileWord; 413 typedef uint4 DeviceWord; 414 #else 415 typedef unsigned long long VolatileWord; 416 typedef ulonglong2 DeviceWord; 417 #endif 418 typedef float4 TextureWord; 419 }; 420 421 422 // char2 specialization workaround (for SM10-SM13) 423 template <> 424 struct UnitWord <char2> 425 { 426 typedef unsigned short ShuffleWord; 427 #if (CUB_PTX_ARCH > 0) && (CUB_PTX_ARCH <= 130) 428 typedef unsigned short VolatileWord; 429 typedef short DeviceWord; 430 #else 431 typedef unsigned short VolatileWord; 432 typedef unsigned short DeviceWord; 433 #endif 434 typedef unsigned short TextureWord; 435 }; 436 437 438 template <typename T> struct UnitWord<volatile T> : UnitWord<T> {}; 439 template <typename T> struct UnitWord<const T> : UnitWord<T> {}; 440 template <typename T> struct UnitWord<const volatile T> : UnitWord<T> {}; 441 442 443 #endif // DOXYGEN_SHOULD_SKIP_THIS 444 445 446 447 /****************************************************************************** 448 * Vector type inference utilities. 449 ******************************************************************************/ 450 451 /** 452 * \brief Exposes a member typedef \p Type that names the corresponding CUDA vector type if one exists. Otherwise \p Type refers to the CubVector structure itself, which will wrap the corresponding \p x, \p y, etc. vector fields. 453 */ 454 template <typename T, int vec_elements> struct CubVector; 455 456 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document 457 458 enum 459 { 460 /// The maximum number of elements in CUDA vector types 461 MAX_VEC_ELEMENTS = 4, 462 }; 463 464 465 /** 466 * Generic vector-1 type 467 */ 468 template <typename T> 469 struct CubVector<T, 1> 470 { 471 T x; 472 473 typedef T BaseType; 474 typedef CubVector<T, 1> Type; 475 }; 476 477 /** 478 * Generic vector-2 type 479 */ 480 template <typename T> 481 struct CubVector<T, 2> 482 { 483 T x; 484 T y; 485 486 typedef T BaseType; 487 typedef CubVector<T, 2> Type; 488 }; 489 490 /** 491 * Generic vector-3 type 492 */ 493 template <typename T> 494 struct CubVector<T, 3> 495 { 496 T x; 497 T y; 498 T z; 499 500 typedef T BaseType; 501 typedef CubVector<T, 3> Type; 502 }; 503 504 /** 505 * Generic vector-4 type 506 */ 507 template <typename T> 508 struct CubVector<T, 4> 509 { 510 T x; 511 T y; 512 T z; 513 T w; 514 515 typedef T BaseType; 516 typedef CubVector<T, 4> Type; 517 }; 518 519 520 /** 521 * Macro for expanding partially-specialized built-in vector types 522 */ 523 #define CUB_DEFINE_VECTOR_TYPE(base_type,short_type) \ 524 \ 525 template<> struct CubVector<base_type, 1> : short_type##1 \ 526 { \ 527 typedef base_type BaseType; \ 528 typedef short_type##1 Type; \ 529 __host__ __device__ __forceinline__ CubVector operator+(const CubVector &other) const { \ 530 CubVector retval; \ 531 retval.x = x + other.x; \ 532 return retval; \ 533 } \ 534 __host__ __device__ __forceinline__ CubVector operator-(const CubVector &other) const { \ 535 CubVector retval; \ 536 retval.x = x - other.x; \ 537 return retval; \ 538 } \ 539 }; \ 540 \ 541 template<> struct CubVector<base_type, 2> : short_type##2 \ 542 { \ 543 typedef base_type BaseType; \ 544 typedef short_type##2 Type; \ 545 __host__ __device__ __forceinline__ CubVector operator+(const CubVector &other) const { \ 546 CubVector retval; \ 547 retval.x = x + other.x; \ 548 retval.y = y + other.y; \ 549 return retval; \ 550 } \ 551 __host__ __device__ __forceinline__ CubVector operator-(const CubVector &other) const { \ 552 CubVector retval; \ 553 retval.x = x - other.x; \ 554 retval.y = y - other.y; \ 555 return retval; \ 556 } \ 557 }; \ 558 \ 559 template<> struct CubVector<base_type, 3> : short_type##3 \ 560 { \ 561 typedef base_type BaseType; \ 562 typedef short_type##3 Type; \ 563 __host__ __device__ __forceinline__ CubVector operator+(const CubVector &other) const { \ 564 CubVector retval; \ 565 retval.x = x + other.x; \ 566 retval.y = y + other.y; \ 567 retval.z = z + other.z; \ 568 return retval; \ 569 } \ 570 __host__ __device__ __forceinline__ CubVector operator-(const CubVector &other) const { \ 571 CubVector retval; \ 572 retval.x = x - other.x; \ 573 retval.y = y - other.y; \ 574 retval.z = z - other.z; \ 575 return retval; \ 576 } \ 577 }; \ 578 \ 579 template<> struct CubVector<base_type, 4> : short_type##4 \ 580 { \ 581 typedef base_type BaseType; \ 582 typedef short_type##4 Type; \ 583 __host__ __device__ __forceinline__ CubVector operator+(const CubVector &other) const { \ 584 CubVector retval; \ 585 retval.x = x + other.x; \ 586 retval.y = y + other.y; \ 587 retval.z = z + other.z; \ 588 retval.w = w + other.w; \ 589 return retval; \ 590 } \ 591 __host__ __device__ __forceinline__ CubVector operator-(const CubVector &other) const { \ 592 CubVector retval; \ 593 retval.x = x - other.x; \ 594 retval.y = y - other.y; \ 595 retval.z = z - other.z; \ 596 retval.w = w - other.w; \ 597 return retval; \ 598 } \ 599 }; 600 601 602 603 // Expand CUDA vector types for built-in primitives 604 CUB_DEFINE_VECTOR_TYPE(char, char) 605 CUB_DEFINE_VECTOR_TYPE(signed char, char) 606 CUB_DEFINE_VECTOR_TYPE(short, short) 607 CUB_DEFINE_VECTOR_TYPE(int, int) 608 CUB_DEFINE_VECTOR_TYPE(long, long) 609 CUB_DEFINE_VECTOR_TYPE(long long, longlong) 610 CUB_DEFINE_VECTOR_TYPE(unsigned char, uchar) 611 CUB_DEFINE_VECTOR_TYPE(unsigned short, ushort) 612 CUB_DEFINE_VECTOR_TYPE(unsigned int, uint) 613 CUB_DEFINE_VECTOR_TYPE(unsigned long, ulong) 614 CUB_DEFINE_VECTOR_TYPE(unsigned long long, ulonglong) 615 CUB_DEFINE_VECTOR_TYPE(float, float) 616 CUB_DEFINE_VECTOR_TYPE(double, double) 617 CUB_DEFINE_VECTOR_TYPE(bool, uchar) 618 619 // Undefine macros 620 #undef CUB_DEFINE_VECTOR_TYPE 621 622 #endif // DOXYGEN_SHOULD_SKIP_THIS 623 624 625 626 /****************************************************************************** 627 * Wrapper types 628 ******************************************************************************/ 629 630 /** 631 * \brief A storage-backing wrapper that allows types with non-trivial constructors to be aliased in unions 632 */ 633 template <typename T> 634 struct Uninitialized 635 { 636 /// Biggest memory-access word that T is a whole multiple of and is not larger than the alignment of T 637 typedef typename UnitWord<T>::DeviceWord DeviceWord; 638 639 enum 640 { 641 WORDS = sizeof(T) / sizeof(DeviceWord) 642 }; 643 644 /// Backing storage 645 DeviceWord storage[WORDS]; 646 647 /// Alias Aliascub::Uninitialized648 __host__ __device__ __forceinline__ T& Alias() 649 { 650 return reinterpret_cast<T&>(*this); 651 } 652 }; 653 654 655 /** 656 * \brief A key identifier paired with a corresponding value 657 */ 658 template < 659 typename _Key, 660 typename _Value 661 #if defined(_WIN32) && !defined(_WIN64) 662 , bool KeyIsLT = (AlignBytes<_Key>::ALIGN_BYTES < AlignBytes<_Value>::ALIGN_BYTES) 663 , bool ValIsLT = (AlignBytes<_Value>::ALIGN_BYTES < AlignBytes<_Key>::ALIGN_BYTES) 664 #endif // #if defined(_WIN32) && !defined(_WIN64) 665 > 666 struct KeyValuePair 667 { 668 typedef _Key Key; ///< Key data type 669 typedef _Value Value; ///< Value data type 670 671 Key key; ///< Item key 672 Value value; ///< Item value 673 674 /// Constructor 675 __host__ __device__ __forceinline__ KeyValuePaircub::KeyValuePair676 KeyValuePair() {} 677 678 /// Constructor 679 __host__ __device__ __forceinline__ KeyValuePaircub::KeyValuePair680 KeyValuePair(Key const& key, Value const& value) : key(key), value(value) {} 681 682 /// Inequality operator operator !=cub::KeyValuePair683 __host__ __device__ __forceinline__ bool operator !=(const KeyValuePair &b) 684 { 685 return (value != b.value) || (key != b.key); 686 } 687 }; 688 689 #if defined(_WIN32) && !defined(_WIN64) 690 691 /** 692 * Win32 won't do 16B alignment. This can present two problems for 693 * should-be-16B-aligned (but actually 8B aligned) built-in and intrinsics members: 694 * 1) If a smaller-aligned item were to be listed first, the host compiler places the 695 * should-be-16B item at too early an offset (and disagrees with device compiler) 696 * 2) Or, if a smaller-aligned item lists second, the host compiler gets the size 697 * of the struct wrong (and disagrees with device compiler) 698 * 699 * So we put the larger-should-be-aligned item first, and explicitly pad the 700 * end of the struct 701 */ 702 703 /// Smaller key specialization 704 template <typename K, typename V> 705 struct KeyValuePair<K, V, true, false> 706 { 707 typedef K Key; 708 typedef V Value; 709 710 typedef char Pad[AlignBytes<V>::ALIGN_BYTES - AlignBytes<K>::ALIGN_BYTES]; 711 712 Value value; // Value has larger would-be alignment and goes first 713 Key key; 714 Pad pad; 715 716 /// Constructor 717 __host__ __device__ __forceinline__ KeyValuePaircub::KeyValuePair718 KeyValuePair() {} 719 720 /// Constructor 721 __host__ __device__ __forceinline__ KeyValuePaircub::KeyValuePair722 KeyValuePair(Key const& key, Value const& value) : key(key), value(value) {} 723 724 /// Inequality operator operator !=cub::KeyValuePair725 __host__ __device__ __forceinline__ bool operator !=(const KeyValuePair &b) 726 { 727 return (value != b.value) || (key != b.key); 728 } 729 }; 730 731 732 /// Smaller value specialization 733 template <typename K, typename V> 734 struct KeyValuePair<K, V, false, true> 735 { 736 typedef K Key; 737 typedef V Value; 738 739 typedef char Pad[AlignBytes<K>::ALIGN_BYTES - AlignBytes<V>::ALIGN_BYTES]; 740 741 Key key; // Key has larger would-be alignment and goes first 742 Value value; 743 Pad pad; 744 745 /// Constructor 746 __host__ __device__ __forceinline__ KeyValuePaircub::KeyValuePair747 KeyValuePair() {} 748 749 /// Constructor 750 __host__ __device__ __forceinline__ KeyValuePaircub::KeyValuePair751 KeyValuePair(Key const& key, Value const& value) : key(key), value(value) {} 752 753 /// Inequality operator operator !=cub::KeyValuePair754 __host__ __device__ __forceinline__ bool operator !=(const KeyValuePair &b) 755 { 756 return (value != b.value) || (key != b.key); 757 } 758 }; 759 760 #endif // #if defined(_WIN32) && !defined(_WIN64) 761 762 763 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document 764 765 766 /** 767 * \brief A wrapper for passing simple static arrays as kernel parameters 768 */ 769 template <typename T, int COUNT> 770 struct ArrayWrapper 771 { 772 773 /// Statically-sized array of type \p T 774 T array[COUNT]; 775 776 /// Constructor ArrayWrappercub::ArrayWrapper777 __host__ __device__ __forceinline__ ArrayWrapper() {} 778 }; 779 780 #endif // DOXYGEN_SHOULD_SKIP_THIS 781 782 /** 783 * \brief Double-buffer storage wrapper for multi-pass stream transformations that require more than one storage array for streaming intermediate results back and forth. 784 * 785 * Many multi-pass computations require a pair of "ping-pong" storage 786 * buffers (e.g., one for reading from and the other for writing to, and then 787 * vice-versa for the subsequent pass). This structure wraps a set of device 788 * buffers and a "selector" member to track which is "current". 789 */ 790 template <typename T> 791 struct DoubleBuffer 792 { 793 /// Pair of device buffer pointers 794 T *d_buffers[2]; 795 796 /// Selector into \p d_buffers (i.e., the active/valid buffer) 797 int selector; 798 799 /// \brief Constructor DoubleBuffercub::DoubleBuffer800 __host__ __device__ __forceinline__ DoubleBuffer() 801 { 802 selector = 0; 803 d_buffers[0] = NULL; 804 d_buffers[1] = NULL; 805 } 806 807 /// \brief Constructor DoubleBuffercub::DoubleBuffer808 __host__ __device__ __forceinline__ DoubleBuffer( 809 T *d_current, ///< The currently valid buffer 810 T *d_alternate) ///< Alternate storage buffer of the same size as \p d_current 811 { 812 selector = 0; 813 d_buffers[0] = d_current; 814 d_buffers[1] = d_alternate; 815 } 816 817 /// \brief Return pointer to the currently valid buffer Currentcub::DoubleBuffer818 __host__ __device__ __forceinline__ T* Current() { return d_buffers[selector]; } 819 820 /// \brief Return pointer to the currently invalid buffer Alternatecub::DoubleBuffer821 __host__ __device__ __forceinline__ T* Alternate() { return d_buffers[selector ^ 1]; } 822 823 }; 824 825 826 827 /****************************************************************************** 828 * Typedef-detection 829 ******************************************************************************/ 830 831 832 /** 833 * \brief Defines a structure \p detector_name that is templated on type \p T. The \p detector_name struct exposes a constant member \p VALUE indicating whether or not parameter \p T exposes a nested type \p nested_type_name 834 */ 835 #define CUB_DEFINE_DETECT_NESTED_TYPE(detector_name, nested_type_name) \ 836 template <typename T> \ 837 struct detector_name \ 838 { \ 839 template <typename C> \ 840 static char& test(typename C::nested_type_name*); \ 841 template <typename> \ 842 static int& test(...); \ 843 enum \ 844 { \ 845 VALUE = sizeof(test<T>(0)) < sizeof(int) \ 846 }; \ 847 }; 848 849 850 851 /****************************************************************************** 852 * Simple enable-if (similar to Boost) 853 ******************************************************************************/ 854 855 /** 856 * \brief Simple enable-if (similar to Boost) 857 */ 858 template <bool Condition, class T = void> 859 struct EnableIf 860 { 861 /// Enable-if type for SFINAE dummy variables 862 typedef T Type; 863 }; 864 865 866 template <class T> 867 struct EnableIf<false, T> {}; 868 869 870 871 /****************************************************************************** 872 * Typedef-detection 873 ******************************************************************************/ 874 875 /** 876 * \brief Determine whether or not BinaryOp's functor is of the form <tt>bool operator()(const T& a, const T&b)</tt> or <tt>bool operator()(const T& a, const T&b, unsigned int idx)</tt> 877 */ 878 template <typename T, typename BinaryOp> 879 struct BinaryOpHasIdxParam 880 { 881 private: 882 /* 883 template <typename BinaryOpT, bool (BinaryOpT::*)(const T &a, const T &b, unsigned int idx) const> struct SFINAE1 {}; 884 template <typename BinaryOpT, bool (BinaryOpT::*)(const T &a, const T &b, unsigned int idx)> struct SFINAE2 {}; 885 template <typename BinaryOpT, bool (BinaryOpT::*)(T a, T b, unsigned int idx) const> struct SFINAE3 {}; 886 template <typename BinaryOpT, bool (BinaryOpT::*)(T a, T b, unsigned int idx)> struct SFINAE4 {}; 887 */ 888 template <typename BinaryOpT, bool (BinaryOpT::*)(const T &a, const T &b, int idx) const> struct SFINAE5 {}; 889 template <typename BinaryOpT, bool (BinaryOpT::*)(const T &a, const T &b, int idx)> struct SFINAE6 {}; 890 template <typename BinaryOpT, bool (BinaryOpT::*)(T a, T b, int idx) const> struct SFINAE7 {}; 891 template <typename BinaryOpT, bool (BinaryOpT::*)(T a, T b, int idx)> struct SFINAE8 {}; 892 /* 893 template <typename BinaryOpT> static char Test(SFINAE1<BinaryOpT, &BinaryOpT::operator()> *); 894 template <typename BinaryOpT> static char Test(SFINAE2<BinaryOpT, &BinaryOpT::operator()> *); 895 template <typename BinaryOpT> static char Test(SFINAE3<BinaryOpT, &BinaryOpT::operator()> *); 896 template <typename BinaryOpT> static char Test(SFINAE4<BinaryOpT, &BinaryOpT::operator()> *); 897 */ 898 template <typename BinaryOpT> __host__ __device__ static char Test(SFINAE5<BinaryOpT, &BinaryOpT::operator()> *); 899 template <typename BinaryOpT> __host__ __device__ static char Test(SFINAE6<BinaryOpT, &BinaryOpT::operator()> *); 900 template <typename BinaryOpT> __host__ __device__ static char Test(SFINAE7<BinaryOpT, &BinaryOpT::operator()> *); 901 template <typename BinaryOpT> __host__ __device__ static char Test(SFINAE8<BinaryOpT, &BinaryOpT::operator()> *); 902 903 template <typename BinaryOpT> static int Test(...); 904 905 public: 906 907 /// Whether the functor BinaryOp has a third <tt>unsigned int</tt> index param 908 static const bool HAS_PARAM = sizeof(Test<BinaryOp>(NULL)) == sizeof(char); 909 }; 910 911 912 913 914 /****************************************************************************** 915 * Simple type traits utilities. 916 * 917 * For example: 918 * Traits<int>::CATEGORY // SIGNED_INTEGER 919 * Traits<NullType>::NULL_TYPE // true 920 * Traits<uint4>::CATEGORY // NOT_A_NUMBER 921 * Traits<uint4>::PRIMITIVE; // false 922 * 923 ******************************************************************************/ 924 925 /** 926 * \brief Basic type traits categories 927 */ 928 enum Category 929 { 930 NOT_A_NUMBER, 931 SIGNED_INTEGER, 932 UNSIGNED_INTEGER, 933 FLOATING_POINT 934 }; 935 936 937 /** 938 * \brief Basic type traits 939 */ 940 template <Category _CATEGORY, bool _PRIMITIVE, bool _NULL_TYPE, typename _UnsignedBits, typename T> 941 struct BaseTraits 942 { 943 /// Category 944 static const Category CATEGORY = _CATEGORY; 945 enum 946 { 947 PRIMITIVE = _PRIMITIVE, 948 NULL_TYPE = _NULL_TYPE, 949 }; 950 }; 951 952 953 /** 954 * Basic type traits (unsigned primitive specialization) 955 */ 956 template <typename _UnsignedBits, typename T> 957 struct BaseTraits<UNSIGNED_INTEGER, true, false, _UnsignedBits, T> 958 { 959 typedef _UnsignedBits UnsignedBits; 960 961 static const Category CATEGORY = UNSIGNED_INTEGER; 962 static const UnsignedBits LOWEST_KEY = UnsignedBits(0); 963 static const UnsignedBits MAX_KEY = UnsignedBits(-1); 964 965 enum 966 { 967 PRIMITIVE = true, 968 NULL_TYPE = false, 969 }; 970 971 TwiddleIncub::BaseTraits972 static __device__ __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key) 973 { 974 return key; 975 } 976 TwiddleOutcub::BaseTraits977 static __device__ __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key) 978 { 979 return key; 980 } 981 Maxcub::BaseTraits982 static __host__ __device__ __forceinline__ T Max() 983 { 984 UnsignedBits retval = MAX_KEY; 985 return reinterpret_cast<T&>(retval); 986 } 987 Lowestcub::BaseTraits988 static __host__ __device__ __forceinline__ T Lowest() 989 { 990 UnsignedBits retval = LOWEST_KEY; 991 return reinterpret_cast<T&>(retval); 992 } 993 }; 994 995 996 /** 997 * Basic type traits (signed primitive specialization) 998 */ 999 template <typename _UnsignedBits, typename T> 1000 struct BaseTraits<SIGNED_INTEGER, true, false, _UnsignedBits, T> 1001 { 1002 typedef _UnsignedBits UnsignedBits; 1003 1004 static const Category CATEGORY = SIGNED_INTEGER; 1005 static const UnsignedBits HIGH_BIT = UnsignedBits(1) << ((sizeof(UnsignedBits) * 8) - 1); 1006 static const UnsignedBits LOWEST_KEY = HIGH_BIT; 1007 static const UnsignedBits MAX_KEY = UnsignedBits(-1) ^ HIGH_BIT; 1008 1009 enum 1010 { 1011 PRIMITIVE = true, 1012 NULL_TYPE = false, 1013 }; 1014 TwiddleIncub::BaseTraits1015 static __device__ __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key) 1016 { 1017 return key ^ HIGH_BIT; 1018 }; 1019 TwiddleOutcub::BaseTraits1020 static __device__ __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key) 1021 { 1022 return key ^ HIGH_BIT; 1023 }; 1024 Maxcub::BaseTraits1025 static __host__ __device__ __forceinline__ T Max() 1026 { 1027 UnsignedBits retval = MAX_KEY; 1028 return reinterpret_cast<T&>(retval); 1029 } 1030 Lowestcub::BaseTraits1031 static __host__ __device__ __forceinline__ T Lowest() 1032 { 1033 UnsignedBits retval = LOWEST_KEY; 1034 return reinterpret_cast<T&>(retval); 1035 } 1036 }; 1037 1038 template <typename _T> 1039 struct FpLimits; 1040 1041 template <> 1042 struct FpLimits<float> 1043 { Maxcub::FpLimits1044 static __host__ __device__ __forceinline__ float Max() { 1045 return FLT_MAX; 1046 } 1047 Lowestcub::FpLimits1048 static __host__ __device__ __forceinline__ float Lowest() { 1049 return FLT_MAX * float(-1); 1050 } 1051 }; 1052 1053 template <> 1054 struct FpLimits<double> 1055 { Maxcub::FpLimits1056 static __host__ __device__ __forceinline__ double Max() { 1057 return DBL_MAX; 1058 } 1059 Lowestcub::FpLimits1060 static __host__ __device__ __forceinline__ double Lowest() { 1061 return DBL_MAX * double(-1); 1062 } 1063 }; 1064 1065 1066 #if (__CUDACC_VER_MAJOR__ >= 9) 1067 template <> 1068 struct FpLimits<__half> 1069 { Maxcub::FpLimits1070 static __host__ __device__ __forceinline__ __half Max() { 1071 unsigned short max_word = 0x7BFF; 1072 return reinterpret_cast<__half&>(max_word); 1073 } 1074 Lowestcub::FpLimits1075 static __host__ __device__ __forceinline__ __half Lowest() { 1076 unsigned short lowest_word = 0xFBFF; 1077 return reinterpret_cast<__half&>(lowest_word); 1078 } 1079 }; 1080 #endif 1081 1082 1083 /** 1084 * Basic type traits (fp primitive specialization) 1085 */ 1086 template <typename _UnsignedBits, typename T> 1087 struct BaseTraits<FLOATING_POINT, true, false, _UnsignedBits, T> 1088 { 1089 typedef _UnsignedBits UnsignedBits; 1090 1091 static const Category CATEGORY = FLOATING_POINT; 1092 static const UnsignedBits HIGH_BIT = UnsignedBits(1) << ((sizeof(UnsignedBits) * 8) - 1); 1093 static const UnsignedBits LOWEST_KEY = UnsignedBits(-1); 1094 static const UnsignedBits MAX_KEY = UnsignedBits(-1) ^ HIGH_BIT; 1095 1096 enum 1097 { 1098 PRIMITIVE = true, 1099 NULL_TYPE = false, 1100 }; 1101 TwiddleIncub::BaseTraits1102 static __device__ __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key) 1103 { 1104 UnsignedBits mask = (key & HIGH_BIT) ? UnsignedBits(-1) : HIGH_BIT; 1105 return key ^ mask; 1106 }; 1107 TwiddleOutcub::BaseTraits1108 static __device__ __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key) 1109 { 1110 UnsignedBits mask = (key & HIGH_BIT) ? HIGH_BIT : UnsignedBits(-1); 1111 return key ^ mask; 1112 }; 1113 Maxcub::BaseTraits1114 static __host__ __device__ __forceinline__ T Max() { 1115 return FpLimits<T>::Max(); 1116 } 1117 Lowestcub::BaseTraits1118 static __host__ __device__ __forceinline__ T Lowest() { 1119 return FpLimits<T>::Lowest(); 1120 } 1121 }; 1122 1123 1124 /** 1125 * \brief Numeric type traits 1126 */ 1127 template <typename T> struct NumericTraits : BaseTraits<NOT_A_NUMBER, false, false, T, T> {}; 1128 1129 template <> struct NumericTraits<NullType> : BaseTraits<NOT_A_NUMBER, false, true, NullType, NullType> {}; 1130 1131 template <> struct NumericTraits<char> : BaseTraits<(std::numeric_limits<char>::is_signed) ? SIGNED_INTEGER : UNSIGNED_INTEGER, true, false, unsigned char, char> {}; 1132 template <> struct NumericTraits<signed char> : BaseTraits<SIGNED_INTEGER, true, false, unsigned char, signed char> {}; 1133 template <> struct NumericTraits<short> : BaseTraits<SIGNED_INTEGER, true, false, unsigned short, short> {}; 1134 template <> struct NumericTraits<int> : BaseTraits<SIGNED_INTEGER, true, false, unsigned int, int> {}; 1135 template <> struct NumericTraits<long> : BaseTraits<SIGNED_INTEGER, true, false, unsigned long, long> {}; 1136 template <> struct NumericTraits<long long> : BaseTraits<SIGNED_INTEGER, true, false, unsigned long long, long long> {}; 1137 1138 template <> struct NumericTraits<unsigned char> : BaseTraits<UNSIGNED_INTEGER, true, false, unsigned char, unsigned char> {}; 1139 template <> struct NumericTraits<unsigned short> : BaseTraits<UNSIGNED_INTEGER, true, false, unsigned short, unsigned short> {}; 1140 template <> struct NumericTraits<unsigned int> : BaseTraits<UNSIGNED_INTEGER, true, false, unsigned int, unsigned int> {}; 1141 template <> struct NumericTraits<unsigned long> : BaseTraits<UNSIGNED_INTEGER, true, false, unsigned long, unsigned long> {}; 1142 template <> struct NumericTraits<unsigned long long> : BaseTraits<UNSIGNED_INTEGER, true, false, unsigned long long, unsigned long long> {}; 1143 1144 template <> struct NumericTraits<float> : BaseTraits<FLOATING_POINT, true, false, unsigned int, float> {}; 1145 template <> struct NumericTraits<double> : BaseTraits<FLOATING_POINT, true, false, unsigned long long, double> {}; 1146 #if (__CUDACC_VER_MAJOR__ >= 9) 1147 template <> struct NumericTraits<__half> : BaseTraits<FLOATING_POINT, true, false, unsigned short, __half> {}; 1148 #endif 1149 1150 template <> struct NumericTraits<bool> : BaseTraits<UNSIGNED_INTEGER, true, false, typename UnitWord<bool>::VolatileWord, bool> {}; 1151 1152 1153 1154 /** 1155 * \brief Type traits 1156 */ 1157 template <typename T> 1158 struct Traits : NumericTraits<typename RemoveQualifiers<T>::Type> {}; 1159 1160 1161 #endif // DOXYGEN_SHOULD_SKIP_THIS 1162 1163 1164 /** @} */ // end group UtilModule 1165 1166 } // CUB namespace 1167 CUB_NS_POSTFIX // Optional outer namespace(s) 1168