1 /*
2 Copyright (c) 2020-2021, Intel Corporation
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8
9 * Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
11
12 * Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16 * Neither the name of Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
19
20
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
22 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
25 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33 #ifdef _MSC_VER
34 #define NOMINMAX
35 #pragma warning(disable : 4244)
36 #pragma warning(disable : 4305)
37 // preventing MSVC fopen() deprecation complaints
38 #define _CRT_SECURE_NO_DEPRECATE
39 #endif
40
41 #include <chrono>
42 #include <cmath>
43 #include <cstdio>
44 #include <cstdlib>
45 #include <iostream>
46 #include <limits>
47
48 #include "common_helpers.h"
49 #include "timing.h"
50
51 // ispcrt
52 #include "ispcrt.hpp"
53
54
55 #define CORRECTNESS_THRESHOLD 0.0002
56 #define WIDTH 768
57 #define HEIGHT 768
58 #define SZ WIDTH *HEIGHT
59 #define TIMEOUT (40 * 1000)
60
61 extern void noise_serial(float x0, float y0, float x1, float y1, int width, int height, float output[]);
62
63 using namespace hostutil;
64
65 struct Parameters {
66 float x0;
67 float y0;
68 float x1;
69 float y1;
70 int width;
71 int height;
72 float *output{nullptr};
73 };
74
run(int niter,int gx,int gy)75 static int run(int niter, int gx, int gy) {
76 std::cout.setf(std::ios::unitbuf);
77 const unsigned int height = HEIGHT;
78 const unsigned int width = WIDTH;
79
80 const float x0 = -10;
81 const float y0 = -10;
82 const float x1 = 10;
83 const float y1 = 10;
84
85 std::vector<float> buf(SZ);
86 std::vector<float> gold(SZ);
87
88 auto run_kernel = [&](ISPCRTDeviceType type) {
89 ispcrt::Device device(type);
90
91 // Setup output array
92 ispcrt::Array<float> buf_dev(device, buf);
93
94 // Setup parameters structure
95 Parameters p;
96
97 p.x0 = x0;
98 p.y0 = y0;
99 p.x1 = x1;
100 p.y1 = y1;
101 p.width = width;
102 p.height = height;
103 p.output = buf_dev.devicePtr();
104
105 auto p_dev = ispcrt::Array<Parameters>(device, p);
106
107 // Create module and kernel to execute
108 ispcrt::Module module(device, "genx_noise");
109 ispcrt::Kernel kernel(device, module, "noise_ispc");
110
111 // Create task queue and execute kernel
112 ispcrt::TaskQueue queue(device);
113 double minCyclesISPC = 1e30;
114 double kernelTicks = 1e30;
115 const char *device_str = (type == ISPCRT_DEVICE_TYPE_GPU) ? "GPU" : "CPU";
116 std::fill(buf.begin(), buf.end(), 0);
117 for (unsigned int i = 0; i < niter; i++) {
118 reset_and_start_timer();
119 queue.copyToDevice(p_dev);
120 queue.barrier();
121 auto res = queue.launch(kernel, p_dev, gx, gy);
122 queue.barrier();
123 queue.copyToHost(buf_dev);
124 queue.barrier();
125 queue.sync();
126 if (res.valid()) {
127 kernelTicks = res.time() * 1e-6;
128 }
129 double mcycles = get_elapsed_mcycles();
130 // Print resulting time
131 printf("@time of %s run:\t\t\t[%.3f] milliseconds\n", device_str, kernelTicks);
132 printf("@time of %s run:\t\t\t[%.3f] million cycles\n", device_str, mcycles);
133 minCyclesISPC = std::min(minCyclesISPC, mcycles);
134 }
135 printf("[noise ISPC %s]:\t\t[%.3f] million cycles (%d x %d image)\n", device_str, minCyclesISPC, width, height);
136 };
137
138 run_kernel(ISPCRT_DEVICE_TYPE_CPU);
139 run_kernel(ISPCRT_DEVICE_TYPE_GPU);
140
141 double minCyclesSerial = 1e30;
142 std::fill(gold.begin(), gold.end(), 0);
143 for (unsigned int i = 0; i < niter; i++) {
144 reset_and_start_timer();
145 auto wct = std::chrono::system_clock::now();
146 noise_serial(x0, y0, x1, y1, width, height, gold.data());
147 double mcycles = get_elapsed_mcycles();
148 auto dur = (std::chrono::system_clock::now() - wct);
149 auto secs = std::chrono::duration_cast<std::chrono::milliseconds>(dur);
150
151 // Print resulting time
152 printf("@time of serial run:\t\t\t[%ld] milliseconds\n", secs.count());
153 printf("@time of serial run:\t\t\t[%.3f] million cycles\n", mcycles);
154 minCyclesSerial = std::min(minCyclesSerial, mcycles);
155 }
156
157 printf("[noise serial]:\t\t[%.3f] million cycles (%d x %d image)\n", minCyclesSerial, width, height);
158
159 // Result check
160 bool pass = true;
161 double err = 0.0;
162 double max_err = 0.0;
163
164 int i = 0;
165 for (; i < width * height; i++) {
166 err = std::fabs(buf.at(i) - gold.at(i));
167 max_err = std::max(err, max_err);
168 if (err > CORRECTNESS_THRESHOLD) {
169 pass = false;
170 break;
171 }
172 }
173 if (!pass) {
174 std::cout << "Mismatch on " << i << "th value." << std::endl;
175 std::cout << "Was " << buf.at(i) << ", should be " << gold.at(i) << std::endl;
176 } else {
177 std::cout << "No issues found, max error:" << max_err << std::endl;
178 }
179
180 return (pass) ? 0 : 1;
181 }
182
usage()183 static void usage() {
184 fprintf(stderr, "usage: noise [niterations] [group threads width] [group threads height]\n");
185 exit(1);
186 }
187
main(int argc,char * argv[])188 int main(int argc, char *argv[]) {
189 int niterations = 1;
190 int gx = 1, gy = 8;
191 if (argc == 4) {
192 niterations = atoi(argv[1]);
193 gx = atoi(argv[2]);
194 gy = atoi(argv[3]);
195 }
196 if (niterations < 1 || gx < 1 || gy < 1) {
197 usage();
198 }
199 int success = 0;
200
201 std::cout << "Running test with " << niterations << " iterations of ISPC on GEN and CPU using " << gx << " * " << gy
202 << " threads." << std::endl;
203 success = run(niterations, gx, gy);
204
205 return success;
206 }
207