1 /******************************************************************************
2  * Copyright (c) 2011, Duane Merrill.  All rights reserved.
3  * Copyright (c) 2011-2018, NVIDIA CORPORATION.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *     * Redistributions of source code must retain the above copyright
8  *       notice, this list of conditions and the following disclaimer.
9  *     * Redistributions in binary form must reproduce the above copyright
10  *       notice, this list of conditions and the following disclaimer in the
11  *       documentation and/or other materials provided with the distribution.
12  *     * Neither the name of the NVIDIA CORPORATION nor the
13  *       names of its contributors may be used to endorse or promote products
14  *       derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  ******************************************************************************/
28 
29 /******************************************************************************
30  * Simple example of DeviceReduce::Sum().
31  *
32  * Sums an array of int keys.
33  *
34  * To compile using the command line:
35  *   nvcc -arch=sm_XX example_device_reduce.cu -I../.. -lcudart -O3
36  *
37  ******************************************************************************/
38 
39 // Ensure printing of CUDA runtime errors to console
40 #define CUB_STDERR
41 
42 #include <stdio.h>
43 
44 #include <cub/util_allocator.cuh>
45 #include <cub/device/device_reduce.cuh>
46 
47 #include "../../test/test_util.h"
48 
49 using namespace cub;
50 
51 
52 //---------------------------------------------------------------------
53 // Globals, constants and typedefs
54 //---------------------------------------------------------------------
55 
56 bool                    g_verbose = false;  // Whether to display input/output to console
57 CachingDeviceAllocator  g_allocator(true);  // Caching allocator for device memory
58 
59 
60 //---------------------------------------------------------------------
61 // Test generation
62 //---------------------------------------------------------------------
63 
64 /**
65  * Initialize problem
66  */
Initialize(int * h_in,int num_items)67 void Initialize(
68     int   *h_in,
69     int     num_items)
70 {
71     for (int i = 0; i < num_items; ++i)
72         h_in[i] = i;
73 
74     if (g_verbose)
75     {
76         printf("Input:\n");
77         DisplayResults(h_in, num_items);
78         printf("\n\n");
79     }
80 }
81 
82 
83 /**
84  * Compute solution
85  */
Solve(int * h_in,int & h_reference,int num_items)86 void Solve(
87     int           *h_in,
88     int           &h_reference,
89     int             num_items)
90 {
91     for (int i = 0; i < num_items; ++i)
92     {
93         if (i == 0)
94             h_reference = h_in[0];
95         else
96             h_reference += h_in[i];
97     }
98 }
99 
100 
101 //---------------------------------------------------------------------
102 // Main
103 //---------------------------------------------------------------------
104 
105 /**
106  * Main
107  */
main(int argc,char ** argv)108 int main(int argc, char** argv)
109 {
110     int num_items = 150;
111 
112     // Initialize command line
113     CommandLineArgs args(argc, argv);
114     g_verbose = args.CheckCmdLineFlag("v");
115     args.GetCmdLineArgument("n", num_items);
116 
117     // Print usage
118     if (args.CheckCmdLineFlag("help"))
119     {
120         printf("%s "
121             "[--n=<input items> "
122             "[--device=<device-id>] "
123             "[--v] "
124             "\n", argv[0]);
125         exit(0);
126     }
127 
128     // Initialize device
129     CubDebugExit(args.DeviceInit());
130 
131     printf("cub::DeviceReduce::Sum() %d items (%d-byte elements)\n",
132         num_items, (int) sizeof(int));
133     fflush(stdout);
134 
135     // Allocate host arrays
136     int* h_in = new int[num_items];
137     int  h_reference;
138 
139     // Initialize problem and solution
140     Initialize(h_in, num_items);
141     Solve(h_in, h_reference, num_items);
142 
143     // Allocate problem device arrays
144     int *d_in = NULL;
145     CubDebugExit(g_allocator.DeviceAllocate((void**)&d_in, sizeof(int) * num_items));
146 
147     // Initialize device input
148     CubDebugExit(cudaMemcpy(d_in, h_in, sizeof(int) * num_items, cudaMemcpyHostToDevice));
149 
150     // Allocate device output array
151     int *d_out = NULL;
152     CubDebugExit(g_allocator.DeviceAllocate((void**)&d_out, sizeof(int) * 1));
153 
154     // Request and allocate temporary storage
155     void            *d_temp_storage = NULL;
156     size_t          temp_storage_bytes = 0;
157     CubDebugExit(DeviceReduce::Sum(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items));
158     CubDebugExit(g_allocator.DeviceAllocate(&d_temp_storage, temp_storage_bytes));
159 
160     // Run
161     CubDebugExit(DeviceReduce::Sum(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items));
162 
163     // Check for correctness (and display results, if specified)
164     int compare = CompareDeviceResults(&h_reference, d_out, 1, g_verbose, g_verbose);
165     printf("\t%s", compare ? "FAIL" : "PASS");
166     AssertEquals(0, compare);
167 
168     // Cleanup
169     if (h_in) delete[] h_in;
170     if (d_in) CubDebugExit(g_allocator.DeviceFree(d_in));
171     if (d_out) CubDebugExit(g_allocator.DeviceFree(d_out));
172     if (d_temp_storage) CubDebugExit(g_allocator.DeviceFree(d_temp_storage));
173 
174     printf("\n\n");
175 
176     return 0;
177 }
178 
179 
180 
181