1 /*
2 * Copyright (C) 2009-2016 Dr. Christoph L. Spiel
3 *
4 * This file is part of Enblend.
5 *
6 * Enblend is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * Enblend is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with Enblend; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
20
21
22 // No test, no bug. -- Chris Spiel
23
24
25 #ifdef HAVE_CONFIG_H
26 #include <config.h>
27 #endif
28
29 #include <getopt.h>
30 #include <stdlib.h>
31 #include <string.h>
32
33 #include <cassert>
34 #include <cmath> // fabsf
35 #include <iostream>
36 #include <string>
37
38 #include "self_test.h"
39
40
41 #define lengthof(m_array) (sizeof(m_array) / sizeof(m_array[0]))
42
43
44 extern const std::string command;
45
46
47 ////////////////////////////////////////////////////////////////////////
48
49
50 // Number of arguments we pass to getopt_long in each of our tests.
51 #define ARG_COUNT 4
52
53
54 enum {a_short, b_short, c_short, a_long, b_long, c_long, FLAG_COUNT};
55
56
57 struct test_case {
58 const char* arguments[ARG_COUNT];
59 int flags[FLAG_COUNT];
60 };
61
62
63 inline static int
int_of_string(const char * s)64 int_of_string(const char* s)
65 {
66 return static_cast<int>(strtol(s, nullptr, 10));
67 }
68
69
70 static void
reset_getopt_globals()71 reset_getopt_globals()
72 {
73 opterr = 0; // silence getopt_long(3)
74 optopt = -1; // reset "unknown option" character
75 optind = 1; // reset parsing index
76 optarg = nullptr; // reset pointer to value of option argument
77 }
78
79
80 static int
try_out_getopt_long(int arg_count,const char * arguments[],int * flags)81 try_out_getopt_long(int arg_count, const char* arguments[], int* flags)
82 {
83 const char* short_options = "ab:c::";
84 const struct option long_options[] = {
85 {"long-a", no_argument, nullptr, 1},
86 {"long-b", required_argument, nullptr, 2},
87 {"long-c", optional_argument, nullptr, 3},
88 {nullptr, 0, nullptr, 0}
89 };
90
91 reset_getopt_globals();
92 while (true)
93 {
94 int option_index = 0;
95 int code = getopt_long(arg_count, const_cast<char* const*>(arguments),
96 short_options, long_options,
97 &option_index);
98
99 if (code == -1)
100 {
101 break;
102 }
103
104 switch (code)
105 {
106 case 1:
107 flags[a_long] = 1;
108 break;
109 case 2:
110 flags[b_long] = int_of_string(optarg);
111 break;
112 case 3:
113 flags[c_long] = optarg == nullptr ? 1 : int_of_string(optarg);
114 break;
115 case 'a':
116 flags[a_short] = 1;
117 break;
118 case 'b':
119 flags[b_short] = int_of_string(optarg);
120 break;
121 case 'c':
122 flags[c_short] = optarg == nullptr ? 1 : int_of_string(optarg);
123 break;
124 default:
125 return -1;
126 }
127 }
128
129 return optind;
130 }
131
132
133 // Write a list of elements separated by spaces to stream out.
134 template <typename T>
135 static void
write_list(std::ostream & out,unsigned size,const T list)136 write_list(std::ostream& out, unsigned size, const T list)
137 {
138 for (unsigned i = 0U; i != size; ++i)
139 {
140 out << list[i];
141 if (i != size - 1U)
142 {
143 out << ' ';
144 }
145 }
146 }
147
148
149 // Name of the first argument, i.e. the first non-option in the list
150 // of arguments. We need to know its name so that we can check
151 // whether getopt_long(3) really parsed all options.
152 #define ARG1 "1"
153
154
155 // Test whether the library function getopt_long(3) works as required.
156 bool
getopt_long_works_ok()157 getopt_long_works_ok()
158 {
159 bool has_passed_test = true;
160 struct test_case tests[] = {
161 {{"p", ARG1, "2", "3"}, {0, 0, 0, 0, 0, 0}},
162
163 {{"p", "-a", ARG1, "2"}, {1, 0, 0, 0, 0, 0}},
164 {{"p", "-b2", ARG1, "2"}, {0, 2, 0, 0, 0, 0}},
165 {{"p", "-c", ARG1, "2"}, {0, 0, 1, 0, 0, 0}},
166 {{"p", "-c2", ARG1, "2"}, {0, 0, 2, 0, 0, 0}},
167
168 {{"p", "--long-a", ARG1, "2"}, {0, 0, 0, 1, 0, 0}},
169 {{"p", "--long-b=2", ARG1, "2"}, {0, 0, 0, 0, 2, 0}},
170 {{"p", "--long-c", ARG1, "2"}, {0, 0, 0, 0, 0, 1}},
171 {{"p", "--long-c=2", ARG1, "2"}, {0, 0, 0, 0, 0, 2}},
172
173 {{"p", "-a", "-b2", ARG1}, {1, 2, 0, 0, 0, 0}},
174 {{"p", "-a", "-b2", ARG1}, {1, 2, 0, 0, 0, 0}},
175 {{"p", "-ab2", "-c", ARG1}, {1, 2, 1, 0, 0, 0}},
176 {{"p", "-ab2", "-c3", ARG1}, {1, 2, 3, 0, 0, 0}},
177
178 {{"p", "--long-a", "--long-b=2", ARG1}, {0, 0, 0, 1, 2, 0}},
179 {{"p", "--long-a", "--long-b=2", ARG1}, {0, 0, 0, 1, 2, 0}},
180 {{"p", "--long-b=2", "--long-c", ARG1}, {0, 0, 0, 0, 2, 1}},
181 {{"p", "--long-b=2", "--long-c=3", ARG1}, {0, 0, 0, 0, 2, 3}},
182
183 {{"p", "-a", "--long-a", ARG1}, {1, 0, 0, 1, 0, 0}},
184 {{"p", "-b2", "--long-a", ARG1}, {0, 2, 0, 1, 0, 0}},
185 {{"p", "-a", "--long-b=2", ARG1}, {1, 0, 0, 0, 2, 0}},
186 {{"p", "-b2", "--long-b=2", ARG1}, {0, 2, 0, 0, 2, 0}},
187
188 {{nullptr, nullptr, nullptr}, {0, 0, 0, 0, 0, 0}}
189 };
190 const unsigned arg_count = lengthof(tests->arguments);
191 const unsigned flag_count = lengthof(tests->flags);
192
193 for (struct test_case* t = tests; t->arguments[0] != nullptr; ++t)
194 {
195 int flags[] = {0, 0, 0, 0, 0, 0};
196 assert(lengthof(tests->flags) == lengthof(flags));
197 const int index = try_out_getopt_long(arg_count, t->arguments, flags);
198
199 if (index < 0 || index >= static_cast<int>(arg_count) ||
200 strcmp(t->arguments[index], ARG1) != 0)
201 {
202 std::cerr <<
203 command <<
204 ": failed self test: getopt_long(3) did not parse argument list \"";
205 write_list(std::cerr, arg_count, t->arguments);
206 std::cerr << "\"\n";
207
208 has_passed_test = false;
209 }
210
211 for (unsigned i = 0U; i != flag_count; ++i)
212 {
213 if (flags[i] != t->flags[i])
214 {
215 std::cerr <<
216 command <<
217 ": failed self test: getopt_long(3) incorrectly parses argument list \"";
218 write_list(std::cerr, arg_count, t->arguments);
219 std::cerr << "\";\n";
220
221 std::cerr <<
222 command <<
223 ": failed self test: expected {";
224 write_list(std::cerr, flag_count, t->flags);
225 std::cerr << "}, but got {";
226 write_list(std::cerr, flag_count, flags);
227 std::cerr << "}\n";
228
229 has_passed_test = false;
230 }
231 }
232 }
233
234 reset_getopt_globals();
235
236 return has_passed_test;
237 }
238
239
240 // Run a kernel, if we have OpenCL support.
241 #ifdef OPENCL
242
243 typedef std::vector<float> float_vector;
244
245
246 static const std::string
247 axpy_source("kernel void\n"
248 "axpy(const float alpha,\n"
249 " global const float *restrict x,\n"
250 " global const float *restrict y,\n"
251 " const int n,\n"
252 " global float *restrict z)\n"
253 "{\n"
254 " const int i = get_global_id(0);\n"
255 "\n"
256 " if (i >= n)\n"
257 " {\n"
258 " return;\n"
259 " }\n"
260 "\n"
261 " z[i] = alpha * x[i] + y[i];\n"
262 "}\n");
263
264
265 class Alpha_times_x_plus_y : public ocl::BuildableFunction
266 {
267 public:
268 Alpha_times_x_plus_y() = delete;
Alpha_times_x_plus_y(const cl::Context & a_context)269 explicit Alpha_times_x_plus_y(const cl::Context& a_context) : f_(a_context, axpy_source) {}
270
build(const std::string & a_build_option)271 void build(const std::string& a_build_option)
272 {
273 std::cerr << "\n+ Alpha_times_x_plus_y::build: by request\n\n";
274 f_.build(a_build_option);
275 std::cerr <<
276 "+ Alpha_times_x_plus_y::build: log begin ================\n" <<
277 f_.build_log() <<
278 "\n+ Alpha_times_x_plus_y::build: log end ================\n";
279 }
280
wait()281 void wait()
282 {
283 f_.wait();
284 std::cerr << "\n+ Alpha_times_x_plus_y::wait: carry on...\n\n";
285 initialize();
286 }
287
run(float alpha,const float_vector & x,const float_vector & y,float_vector & z)288 void run(float alpha, const float_vector& x, const float_vector& y, float_vector& z)
289 {
290 const size_t n = x.size();
291 assert(n == y.size());
292 assert(n <= z.size());
293
294 const size_t buffer_size = n * sizeof(float);
295
296 cl::Buffer x_buffer(f_.context(), CL_MEM_READ_ONLY, buffer_size);
297 cl::Buffer y_buffer(f_.context(), CL_MEM_READ_ONLY, buffer_size);
298 cl::Buffer z_buffer(f_.context(), CL_MEM_WRITE_ONLY, buffer_size);
299
300 kernel_.setArg(0U, static_cast<cl_float>(alpha));
301 kernel_.setArg(1U, x_buffer);
302 kernel_.setArg(2U, y_buffer);
303 kernel_.setArg(3U, static_cast<cl_int>(n));
304 kernel_.setArg(4U, z_buffer);
305
306 f_.queue().enqueueWriteBuffer(x_buffer, CL_TRUE, 0U, buffer_size, &x[0]);
307 f_.queue().enqueueWriteBuffer(y_buffer, CL_TRUE, 0U, buffer_size, &y[0]);
308 f_.queue().enqueueNDRangeKernel(kernel_, cl::NullRange, cl::NDRange(n), cl::NullRange);
309 f_.queue().enqueueReadBuffer(z_buffer, CL_TRUE, 0U, buffer_size, &z[0]);
310 }
311
312 private:
initialize()313 void initialize()
314 {
315 kernel_ = f_.create_kernel("axpy");
316 }
317
318 ocl::LazyFunctionCXXOfString f_;
319 cl::Kernel kernel_;
320 }; // Alpha_times_x_plus_y
321
322
323 static void
alpha_times_x_plus_y(float alpha,const float_vector & x,const float_vector & y,float_vector & z)324 alpha_times_x_plus_y(float alpha, const float_vector& x, const float_vector& y, float_vector& z)
325 {
326 const size_t n = x.size();
327
328 assert(n == y.size());
329 assert(n <= z.size());
330
331 for (size_t i = 0U; i != n; ++i)
332 {
333 z[i] = alpha * x[i] + y[i];
334 }
335 }
336
337
338 template <class forward_iterator, class t>
339 inline static void
iota(forward_iterator first,forward_iterator last,t a_value)340 iota(forward_iterator first, forward_iterator last, t a_value)
341 {
342 while (first != last)
343 {
344 *first++ = a_value++;
345 }
346 }
347
348
349 static bool
test_axpy_on_gpu(cl::Context * a_context)350 test_axpy_on_gpu(cl::Context* a_context)
351 {
352 const float alpha = 2.5F;
353 const size_t n = 20U;
354
355 float_vector x(n);
356 float_vector y(n);
357 float_vector z0(n);
358 float_vector z(n);
359
360 iota(x.begin(), x.end(), 1.0F);
361 iota(y.begin(), y.end(), 1.0F);
362
363 alpha_times_x_plus_y(alpha, x, y, z0);
364
365 try
366 {
367 Alpha_times_x_plus_y axpy(*a_context);
368
369 axpy.build("-Werror");
370 axpy.wait();
371 axpy.run(alpha, x, y, z);
372 }
373 catch (cl::Error& a_cl_error)
374 {
375 std::cerr <<
376 command << ": warning: plain cl error: " << a_cl_error.what() << "\n" <<
377 command << ": note: " << ocl::string_of_error_code(a_cl_error.err()) <<
378 std::endl;
379 return false;
380 }
381 catch (ocl::runtime_error& an_opencl_runtime_error)
382 {
383 std::cerr <<
384 command << ": warning: ocl error: " << an_opencl_runtime_error.what() << "\n" <<
385 command << ": note: " <<
386 ocl::string_of_error_code(an_opencl_runtime_error.error().err()) << "\n" <<
387 command << ": note: " << an_opencl_runtime_error.additional_message() <<
388 std::endl;
389 return false;
390 }
391 catch (...)
392 {
393 std::cerr <<
394 command << ": warning: unknown exception thrown during self test \"test_axpy_on_gpu\"" <<
395 std::endl;
396 return false;
397 }
398
399 for (size_t i = 0U; i != n; ++i)
400 {
401 #ifdef DEBUG
402 std::cout <<
403 "+ test_axpy_on_gpu: [" << i << "] " <<
404 alpha << " * " << x[i] << " + " << y[i] << " = " << z[i] << " (reference: " << z0[i] << "), " <<
405 "delta: " << std::scientific << fabsf(z[i] - z0[i]) << std::fixed << "\n";
406 #endif // DEBUG
407 if (fabsf(z[i] - z0[i]) > std::numeric_limits<float>::epsilon())
408 {
409 std::cerr <<
410 "+ test_axpy_on_gpu: failure at index " << i <<
411 ", expected " << z0[i] << ", but got " << z[i] << "\n";
412 return false;
413 }
414 }
415
416 return true;
417 }
418
419
420 bool
gpu_is_ok(cl::Context * a_context)421 gpu_is_ok(cl::Context* a_context)
422 {
423 return test_axpy_on_gpu(a_context);
424 }
425
426 #endif // OPENCL
427