1 /******************************************************************************
2 * Copyright (c) 2011, Duane Merrill. All rights reserved.
3 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of the NVIDIA CORPORATION nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 *
27 ******************************************************************************/
28
29 /******************************************************************************
30 * Simple example of DevicePartition::If().
31 *
32 * Partitions items from from a sequence of int keys using a
33 * section functor (greater-than)
34 *
35 * To compile using the command line:
36 * nvcc -arch=sm_XX example_device_select_if.cu -I../.. -lcudart -O3
37 *
38 ******************************************************************************/
39
40 // Ensure printing of CUDA runtime errors to console
41 #define CUB_STDERR
42
43 #include <stdio.h>
44
45 #include <cub/util_allocator.cuh>
46 #include <cub/device/device_partition.cuh>
47
48 #include "../../test/test_util.h"
49
50 using namespace cub;
51
52
53 //---------------------------------------------------------------------
54 // Globals, constants and typedefs
55 //---------------------------------------------------------------------
56
57 bool g_verbose = false; // Whether to display input/output to console
58 CachingDeviceAllocator g_allocator(true); // Caching allocator for device memory
59
60
61 /// Selection functor type
62 struct GreaterThan
63 {
64 int compare;
65
66 __host__ __device__ __forceinline__
GreaterThanGreaterThan67 GreaterThan(int compare) : compare(compare) {}
68
69 __host__ __device__ __forceinline__
operator ()GreaterThan70 bool operator()(const int &a) const {
71 return (a > compare);
72 }
73 };
74
75
76 //---------------------------------------------------------------------
77 // Test generation
78 //---------------------------------------------------------------------
79
80 /**
81 * Initialize problem, setting runs of random length chosen from [1..max_segment]
82 */
Initialize(int * h_in,int num_items,int max_segment)83 void Initialize(
84 int *h_in,
85 int num_items,
86 int max_segment)
87 {
88 int key = 0;
89 int i = 0;
90 while (i < num_items)
91 {
92 // Randomly select number of repeating occurrences uniformly from [1..max_segment]
93 unsigned short max_short = (unsigned short) -1;
94 unsigned short repeat;
95 RandomBits(repeat);
96 repeat = (unsigned short) ((float(repeat) * (float(max_segment) / float(max_short))));
97 repeat = CUB_MAX(1, repeat);
98
99 int j = i;
100 while (j < CUB_MIN(i + repeat, num_items))
101 {
102 h_in[j] = key;
103 j++;
104 }
105
106 i = j;
107 key++;
108 }
109
110 if (g_verbose)
111 {
112 printf("Input:\n");
113 DisplayResults(h_in, num_items);
114 printf("\n\n");
115 }
116 }
117
118
119 /**
120 * Solve unique problem
121 */
122 template <typename SelectOp>
Solve(int * h_in,SelectOp select_op,int * h_reference,int num_items)123 int Solve(
124 int *h_in,
125 SelectOp select_op,
126 int *h_reference,
127 int num_items)
128 {
129 int num_selected = 0;
130 for (int i = 0; i < num_items; ++i)
131 {
132 if (select_op(h_in[i]))
133 {
134 h_reference[num_selected] = h_in[i];
135 num_selected++;
136 }
137 else
138 {
139 h_reference[num_items - (i - num_selected) - 1] = h_in[i];
140 }
141 }
142
143 return num_selected;
144 }
145
146
147 //---------------------------------------------------------------------
148 // Main
149 //---------------------------------------------------------------------
150
151 /**
152 * Main
153 */
main(int argc,char ** argv)154 int main(int argc, char** argv)
155 {
156 int num_items = 150;
157 int max_segment = 40; // Maximum segment length
158
159 // Initialize command line
160 CommandLineArgs args(argc, argv);
161 g_verbose = args.CheckCmdLineFlag("v");
162 args.GetCmdLineArgument("n", num_items);
163 args.GetCmdLineArgument("maxseg", max_segment);
164
165 // Print usage
166 if (args.CheckCmdLineFlag("help"))
167 {
168 printf("%s "
169 "[--n=<input items> "
170 "[--device=<device-id>] "
171 "[--maxseg=<max segment length>]"
172 "[--v] "
173 "\n", argv[0]);
174 exit(0);
175 }
176
177 // Initialize device
178 CubDebugExit(args.DeviceInit());
179
180 // Allocate host arrays
181 int *h_in = new int[num_items];
182 int *h_reference = new int[num_items];
183
184 // DevicePartition a pivot index
185 unsigned int pivot_index;
186 unsigned int max_int = (unsigned int) -1;
187 RandomBits(pivot_index);
188 pivot_index = (unsigned int) ((float(pivot_index) * (float(num_items - 1) / float(max_int))));
189 printf("Pivot idx: %d\n", pivot_index); fflush(stdout);
190
191 // Initialize problem and solution
192 Initialize(h_in, num_items, max_segment);
193 GreaterThan select_op(h_in[pivot_index]);
194
195 int num_selected = Solve(h_in, select_op, h_reference, num_items);
196
197 printf("cub::DevicePartition::If %d items, %d selected (avg run length %d), %d-byte elements\n",
198 num_items, num_selected, (num_selected > 0) ? num_items / num_selected : 0, (int) sizeof(int));
199 fflush(stdout);
200
201 // Allocate problem device arrays
202 int *d_in = NULL;
203 CubDebugExit(g_allocator.DeviceAllocate((void**)&d_in, sizeof(int) * num_items));
204
205 // Initialize device input
206 CubDebugExit(cudaMemcpy(d_in, h_in, sizeof(int) * num_items, cudaMemcpyHostToDevice));
207
208 // Allocate device output array and num selected
209 int *d_out = NULL;
210 int *d_num_selected_out = NULL;
211 CubDebugExit(g_allocator.DeviceAllocate((void**)&d_out, sizeof(int) * num_items));
212 CubDebugExit(g_allocator.DeviceAllocate((void**)&d_num_selected_out, sizeof(int)));
213
214 // Allocate temporary storage
215 void *d_temp_storage = NULL;
216 size_t temp_storage_bytes = 0;
217 CubDebugExit(DevicePartition::If(d_temp_storage, temp_storage_bytes, d_in, d_out, d_num_selected_out, num_items, select_op));
218 CubDebugExit(g_allocator.DeviceAllocate(&d_temp_storage, temp_storage_bytes));
219
220 // Run
221 CubDebugExit(DevicePartition::If(d_temp_storage, temp_storage_bytes, d_in, d_out, d_num_selected_out, num_items, select_op));
222
223 // Check for correctness (and display results, if specified)
224 int compare = CompareDeviceResults(h_reference, d_out, num_items, true, g_verbose);
225 printf("\t Data %s ", compare ? "FAIL" : "PASS");
226 compare = compare | CompareDeviceResults(&num_selected, d_num_selected_out, 1, true, g_verbose);
227 printf("\t Count %s ", compare ? "FAIL" : "PASS");
228 AssertEquals(0, compare);
229
230 // Cleanup
231 if (h_in) delete[] h_in;
232 if (h_reference) delete[] h_reference;
233 if (d_in) CubDebugExit(g_allocator.DeviceFree(d_in));
234 if (d_out) CubDebugExit(g_allocator.DeviceFree(d_out));
235 if (d_num_selected_out) CubDebugExit(g_allocator.DeviceFree(d_num_selected_out));
236 if (d_temp_storage) CubDebugExit(g_allocator.DeviceFree(d_temp_storage));
237
238 printf("\n\n");
239
240 return 0;
241 }
242
243
244
245