1 /******************************************************************************
2  * Copyright (c) 2011, Duane Merrill.  All rights reserved.
3  * Copyright (c) 2011-2018, NVIDIA CORPORATION.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *     * Redistributions of source code must retain the above copyright
8  *       notice, this list of conditions and the following disclaimer.
9  *     * Redistributions in binary form must reproduce the above copyright
10  *       notice, this list of conditions and the following disclaimer in the
11  *       documentation and/or other materials provided with the distribution.
12  *     * Neither the name of the NVIDIA CORPORATION nor the
13  *       names of its contributors may be used to endorse or promote products
14  *       derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  ******************************************************************************/
28 
29 /**
30  * \file
31  * Random-access iterator types
32  */
33 
34 #pragma once
35 
36 #include <iterator>
37 #include <iostream>
38 
39 #include "../thread/thread_load.cuh"
40 #include "../thread/thread_store.cuh"
41 #include "../util_device.cuh"
42 #include "../util_debug.cuh"
43 #include "../util_namespace.cuh"
44 
45 #if (CUDA_VERSION >= 5050) || defined(DOXYGEN_ACTIVE)  // This iterator is compatible with CUDA 5.5 and newer
46 
47 #if (THRUST_VERSION >= 100700)    // This iterator is compatible with Thrust API 1.7 and newer
48     #include <thrust/iterator/iterator_facade.h>
49     #include <thrust/iterator/iterator_traits.h>
50 #endif // THRUST_VERSION
51 
52 
53 /// Optional outer namespace(s)
54 CUB_NS_PREFIX
55 
56 /// CUB namespace
57 namespace cub {
58 
59 
60 /******************************************************************************
61  * Static file-scope Tesla/Fermi-style texture references
62  *****************************************************************************/
63 
64 #ifndef DOXYGEN_SHOULD_SKIP_THIS    // Do not document
65 
66 // Anonymous namespace
67 namespace {
68 
69 /// Global texture reference specialized by type
70 template <typename T>
71 struct IteratorTexRef
72 {
73     /// And by unique ID
74     template <int UNIQUE_ID>
75     struct TexId
76     {
77         // Largest texture word we can use in device
78         typedef typename UnitWord<T>::DeviceWord DeviceWord;
79         typedef typename UnitWord<T>::TextureWord TextureWord;
80 
81         // Number of texture words per T
82         enum {
83             DEVICE_MULTIPLE = sizeof(T) / sizeof(DeviceWord),
84             TEXTURE_MULTIPLE = sizeof(T) / sizeof(TextureWord)
85         };
86 
87         // Texture reference type
88         typedef texture<TextureWord> TexRef;
89 
90         // Texture reference
91         static TexRef ref;
92 
93         /// Bind texture
BindTexturecub::__anon7d650ba20111::IteratorTexRef::TexId94         static cudaError_t BindTexture(void *d_in, size_t &offset)
95         {
96             if (d_in)
97             {
98                 cudaChannelFormatDesc tex_desc = cudaCreateChannelDesc<TextureWord>();
99                 ref.channelDesc = tex_desc;
100                 return (CubDebug(cudaBindTexture(&offset, ref, d_in)));
101             }
102 
103             return cudaSuccess;
104         }
105 
106         /// Unbind texture
UnbindTexturecub::__anon7d650ba20111::IteratorTexRef::TexId107         static cudaError_t UnbindTexture()
108         {
109             return CubDebug(cudaUnbindTexture(ref));
110         }
111 
112         /// Fetch element
113         template <typename Distance>
Fetchcub::__anon7d650ba20111::IteratorTexRef::TexId114         static __device__ __forceinline__ T Fetch(Distance tex_offset)
115         {
116             DeviceWord temp[DEVICE_MULTIPLE];
117             TextureWord *words = reinterpret_cast<TextureWord*>(temp);
118 
119             #pragma unroll
120             for (int i = 0; i < TEXTURE_MULTIPLE; ++i)
121             {
122                 words[i] = tex1Dfetch(ref, (tex_offset * TEXTURE_MULTIPLE) + i);
123             }
124 
125             return reinterpret_cast<T&>(temp);
126         }
127     };
128 };
129 
130 // Texture reference definitions
131 template <typename  T>
132 template <int       UNIQUE_ID>
133 typename IteratorTexRef<T>::template TexId<UNIQUE_ID>::TexRef IteratorTexRef<T>::template TexId<UNIQUE_ID>::ref = 0;
134 
135 
136 } // Anonymous namespace
137 
138 
139 #endif // DOXYGEN_SHOULD_SKIP_THIS
140 
141 
142 
143 /**
144  * \addtogroup UtilIterator
145  * @{
146  */
147 
148 
149 
150 /**
151  * \brief A random-access input wrapper for dereferencing array values through texture cache.  Uses older Tesla/Fermi-style texture references.
152  *
153  * \par Overview
154  * - TexRefInputIteratorTwraps a native device pointer of type <tt>ValueType*</tt>. References
155  *   to elements are to be loaded through texture cache.
156  * - Can be used to load any data type from memory through texture cache.
157  * - Can be manipulated and exchanged within and between host and device
158  *   functions, can only be constructed within host functions, and can only be
159  *   dereferenced within device functions.
160  * - The \p UNIQUE_ID template parameter is used to statically name the underlying texture
161  *   reference.  Only one TexRefInputIteratorTinstance can be bound at any given time for a
162  *   specific combination of (1) data type \p T, (2) \p UNIQUE_ID, (3) host
163  *   thread, and (4) compilation .o unit.
164  * - With regard to nested/dynamic parallelism, TexRefInputIteratorTiterators may only be
165  *   created by the host thread and used by a top-level kernel (i.e. the one which is launched
166  *   from the host).
167  * - Compatible with Thrust API v1.7 or newer.
168  * - Compatible with CUDA toolkit v5.5 or newer.
169  *
170  * \par Snippet
171  * The code snippet below illustrates the use of \p TexRefInputIteratorTto
172  * dereference a device array of doubles through texture cache.
173  * \par
174  * \code
175  * #include <cub/cub.cuh>   // or equivalently <cub/iterator/tex_ref_input_iterator.cuh>
176  *
177  * // Declare, allocate, and initialize a device array
178  * int num_items;   // e.g., 7
179  * double *d_in;    // e.g., [8.0, 6.0, 7.0, 5.0, 3.0, 0.0, 9.0]
180  *
181  * // Create an iterator wrapper
182  * cub::TexRefInputIterator<double, __LINE__> itr;
183  * itr.BindTexture(d_in, sizeof(double) * num_items);
184  * ...
185  *
186  * // Within device code:
187  * printf("%f\n", itr[0]);      // 8.0
188  * printf("%f\n", itr[1]);      // 6.0
189  * printf("%f\n", itr[6]);      // 9.0
190  *
191  * ...
192  * itr.UnbindTexture();
193  *
194  * \endcode
195  *
196  * \tparam T                    The value type of this iterator
197  * \tparam UNIQUE_ID            A globally-unique identifier (within the compilation unit) to name the underlying texture reference
198  * \tparam OffsetT              The difference type of this iterator (Default: \p ptrdiff_t)
199  */
200 template <
201     typename    T,
202     int         UNIQUE_ID,
203     typename    OffsetT = ptrdiff_t>
204 class TexRefInputIterator
205 {
206 public:
207 
208     // Required iterator traits
209     typedef TexRefInputIterator                 self_type;              ///< My own type
210     typedef OffsetT                             difference_type;        ///< Type to express the result of subtracting one iterator from another
211     typedef T                                   value_type;             ///< The type of the element the iterator can point to
212     typedef T*                                  pointer;                ///< The type of a pointer to an element the iterator can point to
213     typedef T                                   reference;              ///< The type of a reference to an element the iterator can point to
214 
215 #if (THRUST_VERSION >= 100700)
216     // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods
217     typedef typename thrust::detail::iterator_facade_category<
218         thrust::device_system_tag,
219         thrust::random_access_traversal_tag,
220         value_type,
221         reference
222       >::type iterator_category;                                        ///< The iterator category
223 #else
224     typedef std::random_access_iterator_tag     iterator_category;      ///< The iterator category
225 #endif  // THRUST_VERSION
226 
227 private:
228 
229     T*              ptr;
230     difference_type tex_offset;
231 
232     // Texture reference wrapper (old Tesla/Fermi-style textures)
233     typedef typename IteratorTexRef<T>::template TexId<UNIQUE_ID> TexId;
234 
235 public:
236 /*
237     /// Constructor
238     __host__ __device__ __forceinline__ TexRefInputIterator()
239     :
240         ptr(NULL),
241         tex_offset(0)
242     {}
243 */
244     /// Use this iterator to bind \p ptr with a texture reference
245     template <typename QualifiedT>
BindTexture(QualifiedT * ptr,size_t bytes=size_t (-1),size_t tex_offset=0)246     cudaError_t BindTexture(
247         QualifiedT      *ptr,                   ///< Native pointer to wrap that is aligned to cudaDeviceProp::textureAlignment
248         size_t          bytes = size_t(-1),     ///< Number of bytes in the range
249         size_t          tex_offset = 0)         ///< OffsetT (in items) from \p ptr denoting the position of the iterator
250     {
251         this->ptr = const_cast<typename RemoveQualifiers<QualifiedT>::Type *>(ptr);
252         size_t offset;
253         cudaError_t retval = TexId::BindTexture(this->ptr + tex_offset, offset);
254         this->tex_offset = (difference_type) (offset / sizeof(QualifiedT));
255         return retval;
256     }
257 
258     /// Unbind this iterator from its texture reference
UnbindTexture()259     cudaError_t UnbindTexture()
260     {
261         return TexId::UnbindTexture();
262     }
263 
264     /// Postfix increment
operator ++(int)265     __host__ __device__ __forceinline__ self_type operator++(int)
266     {
267         self_type retval = *this;
268         tex_offset++;
269         return retval;
270     }
271 
272     /// Prefix increment
operator ++()273     __host__ __device__ __forceinline__ self_type operator++()
274     {
275         tex_offset++;
276         return *this;
277     }
278 
279     /// Indirection
operator *() const280     __host__ __device__ __forceinline__ reference operator*() const
281     {
282 #if (CUB_PTX_ARCH == 0)
283         // Simply dereference the pointer on the host
284         return ptr[tex_offset];
285 #else
286         // Use the texture reference
287         return TexId::Fetch(tex_offset);
288 #endif
289     }
290 
291     /// Addition
292     template <typename Distance>
operator +(Distance n) const293     __host__ __device__ __forceinline__ self_type operator+(Distance n) const
294     {
295         self_type retval;
296         retval.ptr = ptr;
297         retval.tex_offset = tex_offset + n;
298         return retval;
299     }
300 
301     /// Addition assignment
302     template <typename Distance>
operator +=(Distance n)303     __host__ __device__ __forceinline__ self_type& operator+=(Distance n)
304     {
305         tex_offset += n;
306         return *this;
307     }
308 
309     /// Subtraction
310     template <typename Distance>
operator -(Distance n) const311     __host__ __device__ __forceinline__ self_type operator-(Distance n) const
312     {
313         self_type retval;
314         retval.ptr = ptr;
315         retval.tex_offset = tex_offset - n;
316         return retval;
317     }
318 
319     /// Subtraction assignment
320     template <typename Distance>
operator -=(Distance n)321     __host__ __device__ __forceinline__ self_type& operator-=(Distance n)
322     {
323         tex_offset -= n;
324         return *this;
325     }
326 
327     /// Distance
operator -(self_type other) const328     __host__ __device__ __forceinline__ difference_type operator-(self_type other) const
329     {
330         return tex_offset - other.tex_offset;
331     }
332 
333     /// Array subscript
334     template <typename Distance>
operator [](Distance n) const335     __host__ __device__ __forceinline__ reference operator[](Distance n) const
336     {
337         self_type offset = (*this) + n;
338         return *offset;
339     }
340 
341     /// Structure dereference
operator ->()342     __host__ __device__ __forceinline__ pointer operator->()
343     {
344         return &(*(*this));
345     }
346 
347     /// Equal to
operator ==(const self_type & rhs)348     __host__ __device__ __forceinline__ bool operator==(const self_type& rhs)
349     {
350         return ((ptr == rhs.ptr) && (tex_offset == rhs.tex_offset));
351     }
352 
353     /// Not equal to
operator !=(const self_type & rhs)354     __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs)
355     {
356         return ((ptr != rhs.ptr) || (tex_offset != rhs.tex_offset));
357     }
358 
359     /// ostream operator
operator <<(std::ostream & os,const self_type & itr)360     friend std::ostream& operator<<(std::ostream& os, const self_type& itr)
361     {
362         return os;
363     }
364 
365 };
366 
367 
368 
369 /** @} */       // end group UtilIterator
370 
371 }               // CUB namespace
372 CUB_NS_POSTFIX  // Optional outer namespace(s)
373 
374 #endif // CUDA_VERSION
375