1 /****************************************************************************** 2 * Copyright (c) 2011, Duane Merrill. All rights reserved. 3 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are met: 7 * * Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * * Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * * Neither the name of the NVIDIA CORPORATION nor the 13 * names of its contributors may be used to endorse or promote products 14 * derived from this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY 20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 * 27 ******************************************************************************/ 28 29 /** 30 * \file 31 * Random-access iterator types 32 */ 33 34 #pragma once 35 36 #include <iterator> 37 #include <iostream> 38 39 #include "../thread/thread_load.cuh" 40 #include "../thread/thread_store.cuh" 41 #include "../util_device.cuh" 42 #include "../util_debug.cuh" 43 #include "../util_namespace.cuh" 44 45 #if (CUDA_VERSION >= 5050) || defined(DOXYGEN_ACTIVE) // This iterator is compatible with CUDA 5.5 and newer 46 47 #if (THRUST_VERSION >= 100700) // This iterator is compatible with Thrust API 1.7 and newer 48 #include <thrust/iterator/iterator_facade.h> 49 #include <thrust/iterator/iterator_traits.h> 50 #endif // THRUST_VERSION 51 52 53 /// Optional outer namespace(s) 54 CUB_NS_PREFIX 55 56 /// CUB namespace 57 namespace cub { 58 59 60 /****************************************************************************** 61 * Static file-scope Tesla/Fermi-style texture references 62 *****************************************************************************/ 63 64 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document 65 66 // Anonymous namespace 67 namespace { 68 69 /// Global texture reference specialized by type 70 template <typename T> 71 struct IteratorTexRef 72 { 73 /// And by unique ID 74 template <int UNIQUE_ID> 75 struct TexId 76 { 77 // Largest texture word we can use in device 78 typedef typename UnitWord<T>::DeviceWord DeviceWord; 79 typedef typename UnitWord<T>::TextureWord TextureWord; 80 81 // Number of texture words per T 82 enum { 83 DEVICE_MULTIPLE = sizeof(T) / sizeof(DeviceWord), 84 TEXTURE_MULTIPLE = sizeof(T) / sizeof(TextureWord) 85 }; 86 87 // Texture reference type 88 typedef texture<TextureWord> TexRef; 89 90 // Texture reference 91 static TexRef ref; 92 93 /// Bind texture BindTexturecub::__anon7d650ba20111::IteratorTexRef::TexId94 static cudaError_t BindTexture(void *d_in, size_t &offset) 95 { 96 if (d_in) 97 { 98 cudaChannelFormatDesc tex_desc = cudaCreateChannelDesc<TextureWord>(); 99 ref.channelDesc = tex_desc; 100 return (CubDebug(cudaBindTexture(&offset, ref, d_in))); 101 } 102 103 return cudaSuccess; 104 } 105 106 /// Unbind texture UnbindTexturecub::__anon7d650ba20111::IteratorTexRef::TexId107 static cudaError_t UnbindTexture() 108 { 109 return CubDebug(cudaUnbindTexture(ref)); 110 } 111 112 /// Fetch element 113 template <typename Distance> Fetchcub::__anon7d650ba20111::IteratorTexRef::TexId114 static __device__ __forceinline__ T Fetch(Distance tex_offset) 115 { 116 DeviceWord temp[DEVICE_MULTIPLE]; 117 TextureWord *words = reinterpret_cast<TextureWord*>(temp); 118 119 #pragma unroll 120 for (int i = 0; i < TEXTURE_MULTIPLE; ++i) 121 { 122 words[i] = tex1Dfetch(ref, (tex_offset * TEXTURE_MULTIPLE) + i); 123 } 124 125 return reinterpret_cast<T&>(temp); 126 } 127 }; 128 }; 129 130 // Texture reference definitions 131 template <typename T> 132 template <int UNIQUE_ID> 133 typename IteratorTexRef<T>::template TexId<UNIQUE_ID>::TexRef IteratorTexRef<T>::template TexId<UNIQUE_ID>::ref = 0; 134 135 136 } // Anonymous namespace 137 138 139 #endif // DOXYGEN_SHOULD_SKIP_THIS 140 141 142 143 /** 144 * \addtogroup UtilIterator 145 * @{ 146 */ 147 148 149 150 /** 151 * \brief A random-access input wrapper for dereferencing array values through texture cache. Uses older Tesla/Fermi-style texture references. 152 * 153 * \par Overview 154 * - TexRefInputIteratorTwraps a native device pointer of type <tt>ValueType*</tt>. References 155 * to elements are to be loaded through texture cache. 156 * - Can be used to load any data type from memory through texture cache. 157 * - Can be manipulated and exchanged within and between host and device 158 * functions, can only be constructed within host functions, and can only be 159 * dereferenced within device functions. 160 * - The \p UNIQUE_ID template parameter is used to statically name the underlying texture 161 * reference. Only one TexRefInputIteratorTinstance can be bound at any given time for a 162 * specific combination of (1) data type \p T, (2) \p UNIQUE_ID, (3) host 163 * thread, and (4) compilation .o unit. 164 * - With regard to nested/dynamic parallelism, TexRefInputIteratorTiterators may only be 165 * created by the host thread and used by a top-level kernel (i.e. the one which is launched 166 * from the host). 167 * - Compatible with Thrust API v1.7 or newer. 168 * - Compatible with CUDA toolkit v5.5 or newer. 169 * 170 * \par Snippet 171 * The code snippet below illustrates the use of \p TexRefInputIteratorTto 172 * dereference a device array of doubles through texture cache. 173 * \par 174 * \code 175 * #include <cub/cub.cuh> // or equivalently <cub/iterator/tex_ref_input_iterator.cuh> 176 * 177 * // Declare, allocate, and initialize a device array 178 * int num_items; // e.g., 7 179 * double *d_in; // e.g., [8.0, 6.0, 7.0, 5.0, 3.0, 0.0, 9.0] 180 * 181 * // Create an iterator wrapper 182 * cub::TexRefInputIterator<double, __LINE__> itr; 183 * itr.BindTexture(d_in, sizeof(double) * num_items); 184 * ... 185 * 186 * // Within device code: 187 * printf("%f\n", itr[0]); // 8.0 188 * printf("%f\n", itr[1]); // 6.0 189 * printf("%f\n", itr[6]); // 9.0 190 * 191 * ... 192 * itr.UnbindTexture(); 193 * 194 * \endcode 195 * 196 * \tparam T The value type of this iterator 197 * \tparam UNIQUE_ID A globally-unique identifier (within the compilation unit) to name the underlying texture reference 198 * \tparam OffsetT The difference type of this iterator (Default: \p ptrdiff_t) 199 */ 200 template < 201 typename T, 202 int UNIQUE_ID, 203 typename OffsetT = ptrdiff_t> 204 class TexRefInputIterator 205 { 206 public: 207 208 // Required iterator traits 209 typedef TexRefInputIterator self_type; ///< My own type 210 typedef OffsetT difference_type; ///< Type to express the result of subtracting one iterator from another 211 typedef T value_type; ///< The type of the element the iterator can point to 212 typedef T* pointer; ///< The type of a pointer to an element the iterator can point to 213 typedef T reference; ///< The type of a reference to an element the iterator can point to 214 215 #if (THRUST_VERSION >= 100700) 216 // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods 217 typedef typename thrust::detail::iterator_facade_category< 218 thrust::device_system_tag, 219 thrust::random_access_traversal_tag, 220 value_type, 221 reference 222 >::type iterator_category; ///< The iterator category 223 #else 224 typedef std::random_access_iterator_tag iterator_category; ///< The iterator category 225 #endif // THRUST_VERSION 226 227 private: 228 229 T* ptr; 230 difference_type tex_offset; 231 232 // Texture reference wrapper (old Tesla/Fermi-style textures) 233 typedef typename IteratorTexRef<T>::template TexId<UNIQUE_ID> TexId; 234 235 public: 236 /* 237 /// Constructor 238 __host__ __device__ __forceinline__ TexRefInputIterator() 239 : 240 ptr(NULL), 241 tex_offset(0) 242 {} 243 */ 244 /// Use this iterator to bind \p ptr with a texture reference 245 template <typename QualifiedT> BindTexture(QualifiedT * ptr,size_t bytes=size_t (-1),size_t tex_offset=0)246 cudaError_t BindTexture( 247 QualifiedT *ptr, ///< Native pointer to wrap that is aligned to cudaDeviceProp::textureAlignment 248 size_t bytes = size_t(-1), ///< Number of bytes in the range 249 size_t tex_offset = 0) ///< OffsetT (in items) from \p ptr denoting the position of the iterator 250 { 251 this->ptr = const_cast<typename RemoveQualifiers<QualifiedT>::Type *>(ptr); 252 size_t offset; 253 cudaError_t retval = TexId::BindTexture(this->ptr + tex_offset, offset); 254 this->tex_offset = (difference_type) (offset / sizeof(QualifiedT)); 255 return retval; 256 } 257 258 /// Unbind this iterator from its texture reference UnbindTexture()259 cudaError_t UnbindTexture() 260 { 261 return TexId::UnbindTexture(); 262 } 263 264 /// Postfix increment operator ++(int)265 __host__ __device__ __forceinline__ self_type operator++(int) 266 { 267 self_type retval = *this; 268 tex_offset++; 269 return retval; 270 } 271 272 /// Prefix increment operator ++()273 __host__ __device__ __forceinline__ self_type operator++() 274 { 275 tex_offset++; 276 return *this; 277 } 278 279 /// Indirection operator *() const280 __host__ __device__ __forceinline__ reference operator*() const 281 { 282 #if (CUB_PTX_ARCH == 0) 283 // Simply dereference the pointer on the host 284 return ptr[tex_offset]; 285 #else 286 // Use the texture reference 287 return TexId::Fetch(tex_offset); 288 #endif 289 } 290 291 /// Addition 292 template <typename Distance> operator +(Distance n) const293 __host__ __device__ __forceinline__ self_type operator+(Distance n) const 294 { 295 self_type retval; 296 retval.ptr = ptr; 297 retval.tex_offset = tex_offset + n; 298 return retval; 299 } 300 301 /// Addition assignment 302 template <typename Distance> operator +=(Distance n)303 __host__ __device__ __forceinline__ self_type& operator+=(Distance n) 304 { 305 tex_offset += n; 306 return *this; 307 } 308 309 /// Subtraction 310 template <typename Distance> operator -(Distance n) const311 __host__ __device__ __forceinline__ self_type operator-(Distance n) const 312 { 313 self_type retval; 314 retval.ptr = ptr; 315 retval.tex_offset = tex_offset - n; 316 return retval; 317 } 318 319 /// Subtraction assignment 320 template <typename Distance> operator -=(Distance n)321 __host__ __device__ __forceinline__ self_type& operator-=(Distance n) 322 { 323 tex_offset -= n; 324 return *this; 325 } 326 327 /// Distance operator -(self_type other) const328 __host__ __device__ __forceinline__ difference_type operator-(self_type other) const 329 { 330 return tex_offset - other.tex_offset; 331 } 332 333 /// Array subscript 334 template <typename Distance> operator [](Distance n) const335 __host__ __device__ __forceinline__ reference operator[](Distance n) const 336 { 337 self_type offset = (*this) + n; 338 return *offset; 339 } 340 341 /// Structure dereference operator ->()342 __host__ __device__ __forceinline__ pointer operator->() 343 { 344 return &(*(*this)); 345 } 346 347 /// Equal to operator ==(const self_type & rhs)348 __host__ __device__ __forceinline__ bool operator==(const self_type& rhs) 349 { 350 return ((ptr == rhs.ptr) && (tex_offset == rhs.tex_offset)); 351 } 352 353 /// Not equal to operator !=(const self_type & rhs)354 __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs) 355 { 356 return ((ptr != rhs.ptr) || (tex_offset != rhs.tex_offset)); 357 } 358 359 /// ostream operator operator <<(std::ostream & os,const self_type & itr)360 friend std::ostream& operator<<(std::ostream& os, const self_type& itr) 361 { 362 return os; 363 } 364 365 }; 366 367 368 369 /** @} */ // end group UtilIterator 370 371 } // CUB namespace 372 CUB_NS_POSTFIX // Optional outer namespace(s) 373 374 #endif // CUDA_VERSION 375