1 /******************************************************************************
2 * Copyright (c) 2011, Duane Merrill. All rights reserved.
3 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of the NVIDIA CORPORATION nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 *
27 ******************************************************************************/
28
29 /******************************************************************************
30 * Simple example of DeviceReduce::Sum().
31 *
32 * Sums an array of int keys.
33 *
34 * To compile using the command line:
35 * nvcc -arch=sm_XX example_device_reduce.cu -I../.. -lcudart -O3
36 *
37 ******************************************************************************/
38
39 // Ensure printing of CUDA runtime errors to console
40 #define CUB_STDERR
41
42 #include <stdio.h>
43
44 #include <cub/util_allocator.cuh>
45 #include <cub/device/device_reduce.cuh>
46
47 #include "../../test/test_util.h"
48
49 using namespace cub;
50
51
52 //---------------------------------------------------------------------
53 // Globals, constants and typedefs
54 //---------------------------------------------------------------------
55
56 bool g_verbose = false; // Whether to display input/output to console
57 CachingDeviceAllocator g_allocator(true); // Caching allocator for device memory
58
59
60 //---------------------------------------------------------------------
61 // Test generation
62 //---------------------------------------------------------------------
63
64 /**
65 * Initialize problem
66 */
Initialize(int * h_in,int num_items)67 void Initialize(
68 int *h_in,
69 int num_items)
70 {
71 for (int i = 0; i < num_items; ++i)
72 h_in[i] = i;
73
74 if (g_verbose)
75 {
76 printf("Input:\n");
77 DisplayResults(h_in, num_items);
78 printf("\n\n");
79 }
80 }
81
82
83 /**
84 * Compute solution
85 */
Solve(int * h_in,int & h_reference,int num_items)86 void Solve(
87 int *h_in,
88 int &h_reference,
89 int num_items)
90 {
91 for (int i = 0; i < num_items; ++i)
92 {
93 if (i == 0)
94 h_reference = h_in[0];
95 else
96 h_reference += h_in[i];
97 }
98 }
99
100
101 //---------------------------------------------------------------------
102 // Main
103 //---------------------------------------------------------------------
104
105 /**
106 * Main
107 */
main(int argc,char ** argv)108 int main(int argc, char** argv)
109 {
110 int num_items = 150;
111
112 // Initialize command line
113 CommandLineArgs args(argc, argv);
114 g_verbose = args.CheckCmdLineFlag("v");
115 args.GetCmdLineArgument("n", num_items);
116
117 // Print usage
118 if (args.CheckCmdLineFlag("help"))
119 {
120 printf("%s "
121 "[--n=<input items> "
122 "[--device=<device-id>] "
123 "[--v] "
124 "\n", argv[0]);
125 exit(0);
126 }
127
128 // Initialize device
129 CubDebugExit(args.DeviceInit());
130
131 printf("cub::DeviceReduce::Sum() %d items (%d-byte elements)\n",
132 num_items, (int) sizeof(int));
133 fflush(stdout);
134
135 // Allocate host arrays
136 int* h_in = new int[num_items];
137 int h_reference;
138
139 // Initialize problem and solution
140 Initialize(h_in, num_items);
141 Solve(h_in, h_reference, num_items);
142
143 // Allocate problem device arrays
144 int *d_in = NULL;
145 CubDebugExit(g_allocator.DeviceAllocate((void**)&d_in, sizeof(int) * num_items));
146
147 // Initialize device input
148 CubDebugExit(cudaMemcpy(d_in, h_in, sizeof(int) * num_items, cudaMemcpyHostToDevice));
149
150 // Allocate device output array
151 int *d_out = NULL;
152 CubDebugExit(g_allocator.DeviceAllocate((void**)&d_out, sizeof(int) * 1));
153
154 // Request and allocate temporary storage
155 void *d_temp_storage = NULL;
156 size_t temp_storage_bytes = 0;
157 CubDebugExit(DeviceReduce::Sum(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items));
158 CubDebugExit(g_allocator.DeviceAllocate(&d_temp_storage, temp_storage_bytes));
159
160 // Run
161 CubDebugExit(DeviceReduce::Sum(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items));
162
163 // Check for correctness (and display results, if specified)
164 int compare = CompareDeviceResults(&h_reference, d_out, 1, g_verbose, g_verbose);
165 printf("\t%s", compare ? "FAIL" : "PASS");
166 AssertEquals(0, compare);
167
168 // Cleanup
169 if (h_in) delete[] h_in;
170 if (d_in) CubDebugExit(g_allocator.DeviceFree(d_in));
171 if (d_out) CubDebugExit(g_allocator.DeviceFree(d_out));
172 if (d_temp_storage) CubDebugExit(g_allocator.DeviceFree(d_temp_storage));
173
174 printf("\n\n");
175
176 return 0;
177 }
178
179
180
181