1 /*
2  * Copyright (C) 2009-2016 Dr. Christoph L. Spiel
3  *
4  * This file is part of Enblend.
5  *
6  * Enblend is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * Enblend is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with Enblend; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19  */
20 
21 
22 // No test, no bug.  -- Chris Spiel
23 
24 
25 #ifdef HAVE_CONFIG_H
26 #include <config.h>
27 #endif
28 
29 #include <getopt.h>
30 #include <stdlib.h>
31 #include <string.h>
32 
33 #include <cassert>
34 #include <cmath>                // fabsf
35 #include <iostream>
36 #include <string>
37 
38 #include "self_test.h"
39 
40 
41 #define lengthof(m_array) (sizeof(m_array) / sizeof(m_array[0]))
42 
43 
44 extern const std::string command;
45 
46 
47 ////////////////////////////////////////////////////////////////////////
48 
49 
50 // Number of arguments we pass to getopt_long in each of our tests.
51 #define ARG_COUNT 4
52 
53 
54 enum {a_short, b_short, c_short, a_long, b_long, c_long, FLAG_COUNT};
55 
56 
57 struct test_case {
58     const char* arguments[ARG_COUNT];
59     int flags[FLAG_COUNT];
60 };
61 
62 
63 inline static int
int_of_string(const char * s)64 int_of_string(const char* s)
65 {
66     return static_cast<int>(strtol(s, nullptr, 10));
67 }
68 
69 
70 static void
reset_getopt_globals()71 reset_getopt_globals()
72 {
73     opterr = 0;                 // silence getopt_long(3)
74     optopt = -1;                // reset "unknown option" character
75     optind = 1;                 // reset parsing index
76     optarg = nullptr;              // reset pointer to value of option argument
77 }
78 
79 
80 static int
try_out_getopt_long(int arg_count,const char * arguments[],int * flags)81 try_out_getopt_long(int arg_count, const char* arguments[], int* flags)
82 {
83     const char* short_options = "ab:c::";
84     const struct option long_options[] = {
85         {"long-a", no_argument,       nullptr, 1},
86         {"long-b", required_argument, nullptr, 2},
87         {"long-c", optional_argument, nullptr, 3},
88         {nullptr, 0, nullptr, 0}
89     };
90 
91     reset_getopt_globals();
92     while (true)
93     {
94         int option_index = 0;
95         int code = getopt_long(arg_count, const_cast<char* const*>(arguments),
96                                short_options, long_options,
97                                &option_index);
98 
99         if (code == -1)
100         {
101             break;
102         }
103 
104         switch (code)
105         {
106         case 1:
107             flags[a_long] = 1;
108             break;
109         case 2:
110             flags[b_long] = int_of_string(optarg);
111             break;
112         case 3:
113             flags[c_long] = optarg == nullptr ? 1 : int_of_string(optarg);
114             break;
115         case 'a':
116             flags[a_short] = 1;
117             break;
118         case 'b':
119             flags[b_short] = int_of_string(optarg);
120             break;
121         case 'c':
122             flags[c_short] = optarg == nullptr ? 1 : int_of_string(optarg);
123             break;
124         default:
125             return -1;
126         }
127     }
128 
129     return optind;
130 }
131 
132 
133 // Write a list of elements separated by spaces to stream out.
134 template <typename T>
135 static void
write_list(std::ostream & out,unsigned size,const T list)136 write_list(std::ostream& out, unsigned size, const T list)
137 {
138     for (unsigned i = 0U; i != size; ++i)
139     {
140         out << list[i];
141         if (i != size - 1U)
142         {
143             out << ' ';
144         }
145     }
146 }
147 
148 
149 // Name of the first argument, i.e. the first non-option in the list
150 // of arguments.  We need to know its name so that we can check
151 // whether getopt_long(3) really parsed all options.
152 #define ARG1 "1"
153 
154 
155 // Test whether the library function getopt_long(3) works as required.
156 bool
getopt_long_works_ok()157 getopt_long_works_ok()
158 {
159     bool has_passed_test = true;
160     struct test_case tests[] = {
161         {{"p", ARG1, "2", "3"},                    {0, 0, 0, 0, 0, 0}},
162 
163         {{"p", "-a", ARG1, "2"},                   {1, 0, 0, 0, 0, 0}},
164         {{"p", "-b2", ARG1, "2"},                  {0, 2, 0, 0, 0, 0}},
165         {{"p", "-c", ARG1, "2"},                   {0, 0, 1, 0, 0, 0}},
166         {{"p", "-c2", ARG1, "2"},                  {0, 0, 2, 0, 0, 0}},
167 
168         {{"p", "--long-a", ARG1, "2"},             {0, 0, 0, 1, 0, 0}},
169         {{"p", "--long-b=2", ARG1, "2"},           {0, 0, 0, 0, 2, 0}},
170         {{"p", "--long-c", ARG1, "2"},             {0, 0, 0, 0, 0, 1}},
171         {{"p", "--long-c=2", ARG1, "2"},           {0, 0, 0, 0, 0, 2}},
172 
173         {{"p", "-a", "-b2", ARG1},                 {1, 2, 0, 0, 0, 0}},
174         {{"p", "-a", "-b2", ARG1},                 {1, 2, 0, 0, 0, 0}},
175         {{"p", "-ab2", "-c", ARG1},                {1, 2, 1, 0, 0, 0}},
176         {{"p", "-ab2", "-c3", ARG1},               {1, 2, 3, 0, 0, 0}},
177 
178         {{"p", "--long-a", "--long-b=2", ARG1},    {0, 0, 0, 1, 2, 0}},
179         {{"p", "--long-a", "--long-b=2", ARG1},    {0, 0, 0, 1, 2, 0}},
180         {{"p", "--long-b=2", "--long-c", ARG1},    {0, 0, 0, 0, 2, 1}},
181         {{"p", "--long-b=2", "--long-c=3", ARG1},  {0, 0, 0, 0, 2, 3}},
182 
183         {{"p", "-a", "--long-a", ARG1},            {1, 0, 0, 1, 0, 0}},
184         {{"p", "-b2", "--long-a", ARG1},           {0, 2, 0, 1, 0, 0}},
185         {{"p", "-a", "--long-b=2", ARG1},          {1, 0, 0, 0, 2, 0}},
186         {{"p", "-b2", "--long-b=2", ARG1},         {0, 2, 0, 0, 2, 0}},
187 
188         {{nullptr, nullptr, nullptr}, {0, 0, 0, 0, 0, 0}}
189     };
190     const unsigned arg_count = lengthof(tests->arguments);
191     const unsigned flag_count = lengthof(tests->flags);
192 
193     for (struct test_case* t = tests; t->arguments[0] != nullptr; ++t)
194     {
195         int flags[] = {0, 0, 0, 0, 0, 0};
196         assert(lengthof(tests->flags) == lengthof(flags));
197         const int index = try_out_getopt_long(arg_count, t->arguments, flags);
198 
199         if (index < 0 || index >= static_cast<int>(arg_count) ||
200             strcmp(t->arguments[index], ARG1) != 0)
201         {
202             std::cerr <<
203                 command <<
204                 ": failed self test: getopt_long(3) did not parse argument list \"";
205             write_list(std::cerr, arg_count, t->arguments);
206             std::cerr << "\"\n";
207 
208             has_passed_test = false;
209         }
210 
211         for (unsigned i = 0U; i != flag_count; ++i)
212         {
213             if (flags[i] != t->flags[i])
214             {
215                 std::cerr <<
216                     command <<
217                     ": failed self test: getopt_long(3) incorrectly parses argument list \"";
218                 write_list(std::cerr, arg_count, t->arguments);
219                 std::cerr << "\";\n";
220 
221                 std::cerr <<
222                     command <<
223                     ": failed self test: expected {";
224                 write_list(std::cerr, flag_count, t->flags);
225                 std::cerr << "}, but got {";
226                 write_list(std::cerr, flag_count, flags);
227                 std::cerr << "}\n";
228 
229                 has_passed_test = false;
230             }
231         }
232     }
233 
234     reset_getopt_globals();
235 
236     return has_passed_test;
237 }
238 
239 
240 // Run a kernel, if we have OpenCL support.
241 #ifdef OPENCL
242 
243 typedef std::vector<float> float_vector;
244 
245 
246 static const std::string
247 axpy_source("kernel void\n"
248             "axpy(const float alpha,\n"
249             "     global const float *restrict x,\n"
250             "     global const float *restrict y,\n"
251             "     const int n,\n"
252             "     global float *restrict z)\n"
253             "{\n"
254             "    const int i = get_global_id(0);\n"
255             "\n"
256             "    if (i >= n)\n"
257             "    {\n"
258             "        return;\n"
259             "    }\n"
260             "\n"
261             "    z[i] = alpha * x[i] + y[i];\n"
262             "}\n");
263 
264 
265 class Alpha_times_x_plus_y : public ocl::BuildableFunction
266 {
267 public:
268     Alpha_times_x_plus_y() = delete;
Alpha_times_x_plus_y(const cl::Context & a_context)269     explicit Alpha_times_x_plus_y(const cl::Context& a_context) : f_(a_context, axpy_source) {}
270 
build(const std::string & a_build_option)271     void build(const std::string& a_build_option)
272     {
273         std::cerr << "\n+ Alpha_times_x_plus_y::build: by request\n\n";
274         f_.build(a_build_option);
275         std::cerr <<
276             "+ Alpha_times_x_plus_y::build: log begin ================\n" <<
277             f_.build_log() <<
278             "\n+ Alpha_times_x_plus_y::build: log end   ================\n";
279     }
280 
wait()281     void wait()
282     {
283         f_.wait();
284         std::cerr << "\n+ Alpha_times_x_plus_y::wait: carry on...\n\n";
285         initialize();
286     }
287 
run(float alpha,const float_vector & x,const float_vector & y,float_vector & z)288     void run(float alpha, const float_vector& x, const float_vector& y, float_vector& z)
289     {
290         const size_t n = x.size();
291         assert(n == y.size());
292         assert(n <= z.size());
293 
294         const size_t buffer_size = n * sizeof(float);
295 
296         cl::Buffer x_buffer(f_.context(), CL_MEM_READ_ONLY, buffer_size);
297         cl::Buffer y_buffer(f_.context(), CL_MEM_READ_ONLY, buffer_size);
298         cl::Buffer z_buffer(f_.context(), CL_MEM_WRITE_ONLY, buffer_size);
299 
300         kernel_.setArg(0U, static_cast<cl_float>(alpha));
301         kernel_.setArg(1U, x_buffer);
302         kernel_.setArg(2U, y_buffer);
303         kernel_.setArg(3U, static_cast<cl_int>(n));
304         kernel_.setArg(4U, z_buffer);
305 
306         f_.queue().enqueueWriteBuffer(x_buffer, CL_TRUE, 0U, buffer_size, &x[0]);
307         f_.queue().enqueueWriteBuffer(y_buffer, CL_TRUE, 0U, buffer_size, &y[0]);
308         f_.queue().enqueueNDRangeKernel(kernel_, cl::NullRange, cl::NDRange(n), cl::NullRange);
309         f_.queue().enqueueReadBuffer(z_buffer, CL_TRUE, 0U, buffer_size, &z[0]);
310     }
311 
312 private:
initialize()313     void initialize()
314     {
315         kernel_ = f_.create_kernel("axpy");
316     }
317 
318     ocl::LazyFunctionCXXOfString f_;
319     cl::Kernel kernel_;
320 }; // Alpha_times_x_plus_y
321 
322 
323 static void
alpha_times_x_plus_y(float alpha,const float_vector & x,const float_vector & y,float_vector & z)324 alpha_times_x_plus_y(float alpha, const float_vector& x, const float_vector& y, float_vector& z)
325 {
326     const size_t n = x.size();
327 
328     assert(n == y.size());
329     assert(n <= z.size());
330 
331     for (size_t i = 0U; i != n; ++i)
332     {
333         z[i] = alpha * x[i] + y[i];
334     }
335 }
336 
337 
338 template <class forward_iterator, class t>
339 inline static void
iota(forward_iterator first,forward_iterator last,t a_value)340 iota(forward_iterator first, forward_iterator last, t a_value)
341 {
342     while (first != last)
343     {
344         *first++ = a_value++;
345     }
346 }
347 
348 
349 static bool
test_axpy_on_gpu(cl::Context * a_context)350 test_axpy_on_gpu(cl::Context* a_context)
351 {
352     const float alpha = 2.5F;
353     const size_t n = 20U;
354 
355     float_vector x(n);
356     float_vector y(n);
357     float_vector z0(n);
358     float_vector z(n);
359 
360     iota(x.begin(), x.end(), 1.0F);
361     iota(y.begin(), y.end(), 1.0F);
362 
363     alpha_times_x_plus_y(alpha, x, y, z0);
364 
365     try
366     {
367         Alpha_times_x_plus_y axpy(*a_context);
368 
369         axpy.build("-Werror");
370         axpy.wait();
371         axpy.run(alpha, x, y, z);
372     }
373     catch (cl::Error& a_cl_error)
374     {
375         std::cerr <<
376             command << ": warning: plain cl error: " << a_cl_error.what() << "\n" <<
377             command << ": note: " << ocl::string_of_error_code(a_cl_error.err()) <<
378             std::endl;
379         return false;
380     }
381     catch (ocl::runtime_error& an_opencl_runtime_error)
382     {
383         std::cerr <<
384             command << ": warning: ocl error: " << an_opencl_runtime_error.what() << "\n" <<
385             command << ": note: " <<
386             ocl::string_of_error_code(an_opencl_runtime_error.error().err()) << "\n" <<
387             command << ": note: " << an_opencl_runtime_error.additional_message() <<
388             std::endl;
389         return false;
390     }
391     catch (...)
392     {
393         std::cerr <<
394             command << ": warning: unknown exception thrown during self test \"test_axpy_on_gpu\"" <<
395             std::endl;
396         return false;
397     }
398 
399     for (size_t i = 0U; i != n; ++i)
400     {
401 #ifdef DEBUG
402         std::cout <<
403             "+ test_axpy_on_gpu: [" << i << "]  " <<
404             alpha << " * " << x[i] << " + " << y[i] << " = " << z[i] << " (reference: " << z0[i] << "), " <<
405             "delta: " << std::scientific << fabsf(z[i] - z0[i]) << std::fixed << "\n";
406 #endif // DEBUG
407         if (fabsf(z[i] - z0[i]) > std::numeric_limits<float>::epsilon())
408         {
409             std::cerr <<
410                 "+ test_axpy_on_gpu: failure at index " << i <<
411                 ", expected " << z0[i] << ", but got " << z[i] << "\n";
412             return false;
413         }
414     }
415 
416     return true;
417 }
418 
419 
420 bool
gpu_is_ok(cl::Context * a_context)421 gpu_is_ok(cl::Context* a_context)
422 {
423     return test_axpy_on_gpu(a_context);
424 }
425 
426 #endif // OPENCL
427