1 /******************************************************************************
2  * Copyright (c) 2011, Duane Merrill.  All rights reserved.
3  * Copyright (c) 2011-2018, NVIDIA CORPORATION.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *     * Redistributions of source code must retain the above copyright
8  *       notice, this list of conditions and the following disclaimer.
9  *     * Redistributions in binary form must reproduce the above copyright
10  *       notice, this list of conditions and the following disclaimer in the
11  *       documentation and/or other materials provided with the distribution.
12  *     * Neither the name of the NVIDIA CORPORATION nor the
13  *       names of its contributors may be used to endorse or promote products
14  *       derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  ******************************************************************************/
28 
29 /******************************************************************************
30  * Simple example of DevicePartition::Flagged().
31  *
32  * Partition flagged items from from a sequence of int keys using a
33  * corresponding sequence of unsigned char flags.
34  *
35  * To compile using the command line:
36  *   nvcc -arch=sm_XX example_device_partition_flagged.cu -I../.. -lcudart -O3
37  *
38  ******************************************************************************/
39 
40 // Ensure printing of CUDA runtime errors to console
41 #define CUB_STDERR
42 
43 #include <stdio.h>
44 
45 #include <cub/util_allocator.cuh>
46 #include <cub/device/device_partition.cuh>
47 
48 #include "../../test/test_util.h"
49 
50 using namespace cub;
51 
52 
53 //---------------------------------------------------------------------
54 // Globals, constants and typedefs
55 //---------------------------------------------------------------------
56 
57 bool                    g_verbose = false;  // Whether to display input/output to console
58 CachingDeviceAllocator  g_allocator(true);  // Caching allocator for device memory
59 
60 
61 //---------------------------------------------------------------------
62 // Test generation
63 //---------------------------------------------------------------------
64 
65 
66 /**
67  * Initialize problem, setting flags at distances of random length
68  * chosen from [1..max_segment]
69  */
Initialize(int * h_in,unsigned char * h_flags,int num_items,int max_segment)70 void Initialize(
71     int             *h_in,
72     unsigned char   *h_flags,
73     int             num_items,
74     int             max_segment)
75 {
76     unsigned short max_short = (unsigned short) -1;
77 
78     int key = 0;
79     int i = 0;
80     while (i < num_items)
81     {
82         // Select number of repeating occurrences
83         unsigned short repeat;
84         RandomBits(repeat);
85         repeat = (unsigned short) ((float(repeat) * (float(max_segment) / float(max_short))));
86         repeat = CUB_MAX(1, repeat);
87 
88         int j = i;
89         while (j < CUB_MIN(i + repeat, num_items))
90         {
91             h_flags[j] = 0;
92             h_in[j] = key;
93             j++;
94         }
95 
96         h_flags[i] = 1;
97         i = j;
98         key++;
99     }
100 
101     if (g_verbose)
102     {
103         printf("Input:\n");
104         DisplayResults(h_in, num_items);
105         printf("Flags:\n");
106         DisplayResults(h_flags, num_items);
107         printf("\n\n");
108     }
109 }
110 
111 
112 /**
113  * Solve unique problem
114  */
Solve(int * h_in,unsigned char * h_flags,int * h_reference,int num_items)115 int Solve(
116     int             *h_in,
117     unsigned char   *h_flags,
118     int             *h_reference,
119     int             num_items)
120 {
121     int num_selected = 0;
122     for (int i = 0; i < num_items; ++i)
123     {
124         if (h_flags[i])
125         {
126             h_reference[num_selected] = h_in[i];
127             num_selected++;
128         }
129         else
130         {
131             h_reference[num_items - (i - num_selected) - 1] = h_in[i];
132         }
133     }
134 
135     return num_selected;
136 }
137 
138 
139 //---------------------------------------------------------------------
140 // Main
141 //---------------------------------------------------------------------
142 
143 /**
144  * Main
145  */
main(int argc,char ** argv)146 int main(int argc, char** argv)
147 {
148     int num_items           = 150;
149     int max_segment         = 40;       // Maximum segment length
150 
151     // Initialize command line
152     CommandLineArgs args(argc, argv);
153     g_verbose = args.CheckCmdLineFlag("v");
154     args.GetCmdLineArgument("n", num_items);
155     args.GetCmdLineArgument("maxseg", max_segment);
156 
157     // Print usage
158     if (args.CheckCmdLineFlag("help"))
159     {
160         printf("%s "
161             "[--n=<input items> "
162             "[--device=<device-id>] "
163             "[--maxseg=<max segment length>] "
164             "[--v] "
165             "\n", argv[0]);
166         exit(0);
167     }
168 
169     // Initialize device
170     CubDebugExit(args.DeviceInit());
171 
172     // Allocate host arrays
173     int             *h_in        = new int[num_items];
174     int             *h_reference = new int[num_items];
175     unsigned char   *h_flags     = new unsigned char[num_items];
176 
177     // Initialize problem and solution
178     Initialize(h_in, h_flags, num_items, max_segment);
179     int num_selected = Solve(h_in, h_flags, h_reference, num_items);
180 
181     printf("cub::DevicePartition::Flagged %d items, %d selected (avg distance %d), %d-byte elements\n",
182         num_items, num_selected, (num_selected > 0) ? num_items / num_selected : 0, (int) sizeof(int));
183     fflush(stdout);
184 
185     // Allocate problem device arrays
186     int             *d_in = NULL;
187     unsigned char   *d_flags = NULL;
188 
189     CubDebugExit(g_allocator.DeviceAllocate((void**)&d_in, sizeof(int) * num_items));
190     CubDebugExit(g_allocator.DeviceAllocate((void**)&d_flags, sizeof(unsigned char) * num_items));
191 
192     // Initialize device input
193     CubDebugExit(cudaMemcpy(d_in, h_in, sizeof(int) * num_items, cudaMemcpyHostToDevice));
194     CubDebugExit(cudaMemcpy(d_flags, h_flags, sizeof(unsigned char) * num_items, cudaMemcpyHostToDevice));
195 
196     // Allocate device output array and num selected
197     int     *d_out            = NULL;
198     int     *d_num_selected_out   = NULL;
199     CubDebugExit(g_allocator.DeviceAllocate((void**)&d_out, sizeof(int) * num_items));
200     CubDebugExit(g_allocator.DeviceAllocate((void**)&d_num_selected_out, sizeof(int)));
201 
202     // Allocate temporary storage
203     void            *d_temp_storage = NULL;
204     size_t          temp_storage_bytes = 0;
205     CubDebugExit(DevicePartition::Flagged(d_temp_storage, temp_storage_bytes, d_in, d_flags, d_out, d_num_selected_out, num_items));
206     CubDebugExit(g_allocator.DeviceAllocate(&d_temp_storage, temp_storage_bytes));
207 
208     // Run
209     CubDebugExit(DevicePartition::Flagged(d_temp_storage, temp_storage_bytes, d_in, d_flags, d_out, d_num_selected_out, num_items));
210 
211     // Check for correctness (and display results, if specified)
212     int compare = CompareDeviceResults(h_reference, d_out, num_items, true, g_verbose);
213     printf("\t Data %s ", compare ? "FAIL" : "PASS");
214     compare |= CompareDeviceResults(&num_selected, d_num_selected_out, 1, true, g_verbose);
215     printf("\t Count %s ", compare ? "FAIL" : "PASS");
216     AssertEquals(0, compare);
217 
218     // Cleanup
219     if (h_in) delete[] h_in;
220     if (h_reference) delete[] h_reference;
221     if (d_out) CubDebugExit(g_allocator.DeviceFree(d_out));
222     if (d_num_selected_out) CubDebugExit(g_allocator.DeviceFree(d_num_selected_out));
223     if (d_temp_storage) CubDebugExit(g_allocator.DeviceFree(d_temp_storage));
224     if (d_in) CubDebugExit(g_allocator.DeviceFree(d_in));
225     if (d_flags) CubDebugExit(g_allocator.DeviceFree(d_flags));
226 
227     printf("\n\n");
228 
229     return 0;
230 }
231 
232 
233 
234