1 /******************************************************************************
2  * Copyright (c) 2011, Duane Merrill.  All rights reserved.
3  * Copyright (c) 2011-2018, NVIDIA CORPORATION.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *     * Redistributions of source code must retain the above copyright
8  *       notice, this list of conditions and the following disclaimer.
9  *     * Redistributions in binary form must reproduce the above copyright
10  *       notice, this list of conditions and the following disclaimer in the
11  *       documentation and/or other materials provided with the distribution.
12  *     * Neither the name of the NVIDIA CORPORATION nor the
13  *       names of its contributors may be used to endorse or promote products
14  *       derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  ******************************************************************************/
28 
29 /******************************************************************************
30  * Simple example of DevicePartition::If().
31  *
32  * Partitions items from from a sequence of int keys using a
33  * section functor (greater-than)
34  *
35  * To compile using the command line:
36  *   nvcc -arch=sm_XX example_device_select_if.cu -I../.. -lcudart -O3
37  *
38  ******************************************************************************/
39 
40 // Ensure printing of CUDA runtime errors to console
41 #define CUB_STDERR
42 
43 #include <stdio.h>
44 
45 #include <cub/util_allocator.cuh>
46 #include <cub/device/device_partition.cuh>
47 
48 #include "../../test/test_util.h"
49 
50 using namespace cub;
51 
52 
53 //---------------------------------------------------------------------
54 // Globals, constants and typedefs
55 //---------------------------------------------------------------------
56 
57 bool                    g_verbose = false;  // Whether to display input/output to console
58 CachingDeviceAllocator  g_allocator(true);  // Caching allocator for device memory
59 
60 
61 /// Selection functor type
62 struct GreaterThan
63 {
64     int compare;
65 
66     __host__ __device__ __forceinline__
GreaterThanGreaterThan67     GreaterThan(int compare) : compare(compare) {}
68 
69     __host__ __device__ __forceinline__
operator ()GreaterThan70     bool operator()(const int &a) const {
71         return (a > compare);
72     }
73 };
74 
75 
76 //---------------------------------------------------------------------
77 // Test generation
78 //---------------------------------------------------------------------
79 
80 /**
81  * Initialize problem, setting runs of random length chosen from [1..max_segment]
82  */
Initialize(int * h_in,int num_items,int max_segment)83 void Initialize(
84     int     *h_in,
85     int     num_items,
86     int     max_segment)
87 {
88     int key = 0;
89     int i = 0;
90     while (i < num_items)
91     {
92         // Randomly select number of repeating occurrences uniformly from [1..max_segment]
93         unsigned short max_short = (unsigned short) -1;
94         unsigned short repeat;
95         RandomBits(repeat);
96         repeat = (unsigned short) ((float(repeat) * (float(max_segment) / float(max_short))));
97         repeat = CUB_MAX(1, repeat);
98 
99         int j = i;
100         while (j < CUB_MIN(i + repeat, num_items))
101         {
102             h_in[j] = key;
103             j++;
104         }
105 
106         i = j;
107         key++;
108     }
109 
110     if (g_verbose)
111     {
112         printf("Input:\n");
113         DisplayResults(h_in, num_items);
114         printf("\n\n");
115     }
116 }
117 
118 
119 /**
120  * Solve unique problem
121  */
122 template <typename SelectOp>
Solve(int * h_in,SelectOp select_op,int * h_reference,int num_items)123 int Solve(
124     int             *h_in,
125     SelectOp        select_op,
126     int             *h_reference,
127     int             num_items)
128 {
129     int num_selected = 0;
130     for (int i = 0; i < num_items; ++i)
131     {
132         if (select_op(h_in[i]))
133         {
134             h_reference[num_selected] = h_in[i];
135             num_selected++;
136         }
137         else
138         {
139             h_reference[num_items - (i - num_selected) - 1] = h_in[i];
140         }
141     }
142 
143     return num_selected;
144 }
145 
146 
147 //---------------------------------------------------------------------
148 // Main
149 //---------------------------------------------------------------------
150 
151 /**
152  * Main
153  */
main(int argc,char ** argv)154 int main(int argc, char** argv)
155 {
156     int num_items           = 150;
157     int max_segment         = 40;       // Maximum segment length
158 
159     // Initialize command line
160     CommandLineArgs args(argc, argv);
161     g_verbose = args.CheckCmdLineFlag("v");
162     args.GetCmdLineArgument("n", num_items);
163     args.GetCmdLineArgument("maxseg", max_segment);
164 
165     // Print usage
166     if (args.CheckCmdLineFlag("help"))
167     {
168         printf("%s "
169             "[--n=<input items> "
170             "[--device=<device-id>] "
171             "[--maxseg=<max segment length>]"
172             "[--v] "
173             "\n", argv[0]);
174         exit(0);
175     }
176 
177     // Initialize device
178     CubDebugExit(args.DeviceInit());
179 
180     // Allocate host arrays
181     int *h_in        = new int[num_items];
182     int *h_reference = new int[num_items];
183 
184     // DevicePartition a pivot index
185     unsigned int pivot_index;
186     unsigned int max_int = (unsigned int) -1;
187     RandomBits(pivot_index);
188     pivot_index = (unsigned int) ((float(pivot_index) * (float(num_items - 1) / float(max_int))));
189     printf("Pivot idx: %d\n", pivot_index); fflush(stdout);
190 
191     // Initialize problem and solution
192     Initialize(h_in, num_items, max_segment);
193     GreaterThan select_op(h_in[pivot_index]);
194 
195     int num_selected = Solve(h_in, select_op, h_reference, num_items);
196 
197     printf("cub::DevicePartition::If %d items, %d selected (avg run length %d), %d-byte elements\n",
198         num_items, num_selected, (num_selected > 0) ? num_items / num_selected : 0, (int) sizeof(int));
199     fflush(stdout);
200 
201     // Allocate problem device arrays
202     int *d_in = NULL;
203     CubDebugExit(g_allocator.DeviceAllocate((void**)&d_in, sizeof(int) * num_items));
204 
205     // Initialize device input
206     CubDebugExit(cudaMemcpy(d_in, h_in, sizeof(int) * num_items, cudaMemcpyHostToDevice));
207 
208     // Allocate device output array and num selected
209     int     *d_out            = NULL;
210     int     *d_num_selected_out   = NULL;
211     CubDebugExit(g_allocator.DeviceAllocate((void**)&d_out, sizeof(int) * num_items));
212     CubDebugExit(g_allocator.DeviceAllocate((void**)&d_num_selected_out, sizeof(int)));
213 
214     // Allocate temporary storage
215     void            *d_temp_storage = NULL;
216     size_t          temp_storage_bytes = 0;
217     CubDebugExit(DevicePartition::If(d_temp_storage, temp_storage_bytes, d_in, d_out, d_num_selected_out, num_items, select_op));
218     CubDebugExit(g_allocator.DeviceAllocate(&d_temp_storage, temp_storage_bytes));
219 
220     // Run
221     CubDebugExit(DevicePartition::If(d_temp_storage, temp_storage_bytes, d_in, d_out, d_num_selected_out, num_items, select_op));
222 
223     // Check for correctness (and display results, if specified)
224     int compare = CompareDeviceResults(h_reference, d_out, num_items, true, g_verbose);
225     printf("\t Data %s ", compare ? "FAIL" : "PASS");
226     compare = compare | CompareDeviceResults(&num_selected, d_num_selected_out, 1, true, g_verbose);
227     printf("\t Count %s ", compare ? "FAIL" : "PASS");
228     AssertEquals(0, compare);
229 
230     // Cleanup
231     if (h_in) delete[] h_in;
232     if (h_reference) delete[] h_reference;
233     if (d_in) CubDebugExit(g_allocator.DeviceFree(d_in));
234     if (d_out) CubDebugExit(g_allocator.DeviceFree(d_out));
235     if (d_num_selected_out) CubDebugExit(g_allocator.DeviceFree(d_num_selected_out));
236     if (d_temp_storage) CubDebugExit(g_allocator.DeviceFree(d_temp_storage));
237 
238     printf("\n\n");
239 
240     return 0;
241 }
242 
243 
244 
245