1 //---------------------------------------------------------------------------//
2 // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
3 //
4 // Distributed under the Boost Software License, Version 1.0
5 // See accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt
7 //
8 // See http://boostorg.github.com/compute for more information.
9 //---------------------------------------------------------------------------//
10 
11 #define BOOST_TEST_MODULE TestCommandQueue
12 #include <boost/test/unit_test.hpp>
13 
14 #include <iostream>
15 
16 #include <boost/compute/kernel.hpp>
17 #include <boost/compute/system.hpp>
18 #include <boost/compute/program.hpp>
19 #include <boost/compute/command_queue.hpp>
20 #include <boost/compute/algorithm/fill.hpp>
21 #include <boost/compute/container/vector.hpp>
22 #include <boost/compute/utility/dim.hpp>
23 #include <boost/compute/utility/source.hpp>
24 #include <boost/compute/detail/diagnostic.hpp>
25 
26 #include "check_macros.hpp"
27 #include "context_setup.hpp"
28 
29 namespace bc = boost::compute;
30 namespace compute = boost::compute;
31 
BOOST_AUTO_TEST_CASE(get_context)32 BOOST_AUTO_TEST_CASE(get_context)
33 {
34     BOOST_VERIFY(queue.get_context() == context);
35     BOOST_VERIFY(queue.get_info<CL_QUEUE_CONTEXT>() == context.get());
36 }
37 
BOOST_AUTO_TEST_CASE(get_device)38 BOOST_AUTO_TEST_CASE(get_device)
39 {
40     BOOST_VERIFY(queue.get_info<CL_QUEUE_DEVICE>() == device.get());
41 }
42 
BOOST_AUTO_TEST_CASE(equality_operator)43 BOOST_AUTO_TEST_CASE(equality_operator)
44 {
45     compute::command_queue queue1(context, device);
46     BOOST_CHECK(queue1 == queue1);
47 
48     compute::command_queue queue2 = queue1;
49     BOOST_CHECK(queue1 == queue2);
50 
51     compute::command_queue queue3(context, device);
52     BOOST_CHECK(queue1 != queue3);
53 }
54 
BOOST_AUTO_TEST_CASE(event_profiling)55 BOOST_AUTO_TEST_CASE(event_profiling)
56 {
57     bc::command_queue queue(context, device, bc::command_queue::enable_profiling);
58 
59     int data[] = { 1, 2, 3, 4, 5, 6, 7, 8 };
60     bc::buffer buffer(context, sizeof(data));
61 
62     bc::event event =
63         queue.enqueue_write_buffer_async(buffer,
64                                          0,
65                                          sizeof(data),
66                                          static_cast<const void *>(data));
67     queue.finish();
68 
69     event.get_profiling_info<cl_ulong>(bc::event::profiling_command_queued);
70     event.get_profiling_info<cl_ulong>(bc::event::profiling_command_submit);
71     event.get_profiling_info<cl_ulong>(bc::event::profiling_command_start);
72     event.get_profiling_info<cl_ulong>(bc::event::profiling_command_end);
73 }
74 
BOOST_AUTO_TEST_CASE(kernel_profiling)75 BOOST_AUTO_TEST_CASE(kernel_profiling)
76 {
77     // create queue with profiling enabled
78     boost::compute::command_queue queue(
79         context, device, boost::compute::command_queue::enable_profiling
80     );
81 
82     // input data
83     int data[] = { 1, 2, 3, 4, 5, 6, 7, 8 };
84     boost::compute::buffer buffer(context, sizeof(data));
85 
86     // copy input data to device
87     queue.enqueue_write_buffer(buffer, 0, sizeof(data), data);
88 
89     // setup kernel
90     const char source[] =
91         "__kernel void iscal(__global int *buffer, int alpha)\n"
92         "{\n"
93         "    buffer[get_global_id(0)] *= alpha;\n"
94         "}\n";
95 
96     boost::compute::program program =
97         boost::compute::program::create_with_source(source, context);
98     program.build();
99 
100     boost::compute::kernel kernel(program, "iscal");
101     kernel.set_arg(0, buffer);
102     kernel.set_arg(1, 2);
103 
104     // execute kernel
105     size_t global_work_offset = 0;
106     size_t global_work_size = 8;
107 
108     boost::compute::event event =
109         queue.enqueue_nd_range_kernel(kernel,
110                                       size_t(1),
111                                       &global_work_offset,
112                                       &global_work_size,
113                                       0);
114 
115     // wait until kernel is finished
116     event.wait();
117 
118     // check profiling information
119     event.get_profiling_info<cl_ulong>(bc::event::profiling_command_queued);
120     event.get_profiling_info<cl_ulong>(bc::event::profiling_command_submit);
121     event.get_profiling_info<cl_ulong>(bc::event::profiling_command_start);
122     event.get_profiling_info<cl_ulong>(bc::event::profiling_command_end);
123 
124     // read results back to host
125     queue.enqueue_read_buffer(buffer, 0, sizeof(data), data);
126 
127     // check results
128     BOOST_CHECK_EQUAL(data[0], 2);
129     BOOST_CHECK_EQUAL(data[1], 4);
130     BOOST_CHECK_EQUAL(data[2], 6);
131     BOOST_CHECK_EQUAL(data[3], 8);
132     BOOST_CHECK_EQUAL(data[4], 10);
133     BOOST_CHECK_EQUAL(data[5], 12);
134     BOOST_CHECK_EQUAL(data[6], 14);
135     BOOST_CHECK_EQUAL(data[7], 16);
136 }
137 
BOOST_AUTO_TEST_CASE(construct_from_cl_command_queue)138 BOOST_AUTO_TEST_CASE(construct_from_cl_command_queue)
139 {
140     // create cl_command_queue
141     cl_command_queue cl_queue;
142 #ifdef BOOST_COMPUTE_CL_VERSION_2_0
143     if (device.check_version(2, 0)){ // runtime check
144         cl_queue =
145             clCreateCommandQueueWithProperties(context, device.id(), 0, 0);
146     } else
147 #endif // BOOST_COMPUTE_CL_VERSION_2_0
148     {
149         // Suppress deprecated declarations warning
150         BOOST_COMPUTE_DISABLE_DEPRECATED_DECLARATIONS();
151         cl_queue =
152             clCreateCommandQueue(context, device.id(), 0, 0);
153         BOOST_COMPUTE_ENABLE_DEPRECATED_DECLARATIONS();
154     }
155     BOOST_VERIFY(cl_queue);
156 
157     // create boost::compute::command_queue
158     boost::compute::command_queue queue(cl_queue);
159 
160     // check queue
161     BOOST_CHECK(queue.get_context() == context);
162     BOOST_CHECK(cl_command_queue(queue) == cl_queue);
163 
164     // cleanup cl_command_queue
165     clReleaseCommandQueue(cl_queue);
166 }
167 
168 #ifdef BOOST_COMPUTE_CL_VERSION_1_1
BOOST_AUTO_TEST_CASE(write_buffer_rect)169 BOOST_AUTO_TEST_CASE(write_buffer_rect)
170 {
171     REQUIRES_OPENCL_VERSION(1, 1);
172 
173     // skip this test on AMD GPUs due to a buggy implementation
174     // of the clEnqueueWriteBufferRect() function
175     if(device.vendor() == "Advanced Micro Devices, Inc." &&
176        device.type() & boost::compute::device::gpu){
177         std::cerr << "skipping write_buffer_rect test on AMD GPU" << std::endl;
178         return;
179     }
180 
181     int data[] = { 1, 2, 3, 4, 5, 6, 7, 8 };
182     boost::compute::buffer buffer(context, 8 * sizeof(int));
183 
184     // copy every other value to the buffer
185     size_t buffer_origin[] = { 0, 0, 0 };
186     size_t host_origin[] = { 0, 0, 0 };
187     size_t region[] = { sizeof(int), sizeof(int), 1 };
188 
189     queue.enqueue_write_buffer_rect(
190         buffer,
191         buffer_origin,
192         host_origin,
193         region,
194         sizeof(int),
195         0,
196         2 * sizeof(int),
197         0,
198         data
199     );
200 
201     // check output values
202     int output[4];
203     queue.enqueue_read_buffer(buffer, 0, 4 * sizeof(int), output);
204     BOOST_CHECK_EQUAL(output[0], 1);
205     BOOST_CHECK_EQUAL(output[1], 3);
206     BOOST_CHECK_EQUAL(output[2], 5);
207     BOOST_CHECK_EQUAL(output[3], 7);
208 }
209 #endif // BOOST_COMPUTE_CL_VERSION_1_1
210 
211 static bool nullary_kernel_executed = false;
212 
nullary_kernel()213 static void nullary_kernel()
214 {
215     nullary_kernel_executed = true;
216 }
217 
BOOST_AUTO_TEST_CASE(native_kernel)218 BOOST_AUTO_TEST_CASE(native_kernel)
219 {
220     cl_device_exec_capabilities exec_capabilities =
221         device.get_info<CL_DEVICE_EXECUTION_CAPABILITIES>();
222     if(!(exec_capabilities & CL_EXEC_NATIVE_KERNEL)){
223         std::cerr << "skipping native_kernel test: "
224                   << "device does not support CL_EXEC_NATIVE_KERNEL"
225                   << std::endl;
226         return;
227     }
228 
229     compute::vector<int> vector(1000, context);
230     compute::fill(vector.begin(), vector.end(), 42, queue);
231     BOOST_CHECK_EQUAL(nullary_kernel_executed, false);
232     queue.enqueue_native_kernel(&nullary_kernel);
233     queue.finish();
234     BOOST_CHECK_EQUAL(nullary_kernel_executed, true);
235 }
236 
BOOST_AUTO_TEST_CASE(copy_with_wait_list)237 BOOST_AUTO_TEST_CASE(copy_with_wait_list)
238 {
239     int data1[] = { 1, 3, 5, 7 };
240     int data2[] = { 2, 4, 6, 8 };
241 
242     compute::buffer buf1(context, 4 * sizeof(int));
243     compute::buffer buf2(context, 4 * sizeof(int));
244 
245     compute::event write_event1 =
246         queue.enqueue_write_buffer_async(buf1, 0, buf1.size(), data1);
247 
248     compute::event write_event2 =
249         queue.enqueue_write_buffer_async(buf2, 0, buf2.size(), data2);
250 
251     compute::event read_event1 =
252         queue.enqueue_read_buffer_async(buf1, 0, buf1.size(), data2, write_event1);
253 
254     compute::event read_event2 =
255         queue.enqueue_read_buffer_async(buf2, 0, buf2.size(), data1, write_event2);
256 
257     read_event1.wait();
258     read_event2.wait();
259 
260     CHECK_HOST_RANGE_EQUAL(int, 4, data1, (2, 4, 6, 8));
261     CHECK_HOST_RANGE_EQUAL(int, 4, data2, (1, 3, 5, 7));
262 }
263 
264 #ifndef BOOST_COMPUTE_NO_HDR_INITIALIZER_LIST
BOOST_AUTO_TEST_CASE(enqueue_kernel_with_extents)265 BOOST_AUTO_TEST_CASE(enqueue_kernel_with_extents)
266 {
267     using boost::compute::dim;
268     using boost::compute::uint_;
269 
270     const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE(
271         __kernel void foo(__global int *output1, __global int *output2)
272         {
273             output1[get_global_id(0)] = get_local_id(0);
274             output2[get_global_id(1)] = get_local_id(1);
275         }
276     );
277 
278     compute::kernel kernel =
279         compute::kernel::create_with_source(source, "foo", context);
280 
281     compute::vector<uint_> output1(4, context);
282     compute::vector<uint_> output2(4, context);
283 
284     kernel.set_arg(0, output1);
285     kernel.set_arg(1, output2);
286 
287     queue.enqueue_nd_range_kernel(kernel, dim(0, 0), dim(4, 4), dim(1, 1));
288 
289     CHECK_RANGE_EQUAL(int, 4, output1, (0, 0, 0, 0));
290     CHECK_RANGE_EQUAL(int, 4, output2, (0, 0, 0, 0));
291 
292     // Maximum number of work-items that can be specified in each
293     // dimension of the work-group to clEnqueueNDRangeKernel.
294     std::vector<size_t> max_work_item_sizes =
295         device.get_info<CL_DEVICE_MAX_WORK_ITEM_SIZES>();
296 
297     if(max_work_item_sizes[0] < size_t(2)) {
298         return;
299     }
300 
301     queue.enqueue_nd_range_kernel(kernel, dim(0, 0), dim(4, 4), dim(2, 1));
302 
303     CHECK_RANGE_EQUAL(int, 4, output1, (0, 1, 0, 1));
304     CHECK_RANGE_EQUAL(int, 4, output2, (0, 0, 0, 0));
305 
306     if(max_work_item_sizes[1] < size_t(2)) {
307         return;
308     }
309 
310     queue.enqueue_nd_range_kernel(kernel, dim(0, 0), dim(4, 4), dim(2, 2));
311 
312     CHECK_RANGE_EQUAL(int, 4, output1, (0, 1, 0, 1));
313     CHECK_RANGE_EQUAL(int, 4, output2, (0, 1, 0, 1));
314 }
315 #endif // BOOST_COMPUTE_NO_HDR_INITIALIZER_LIST
316 
317 #ifdef BOOST_COMPUTE_CL_VERSION_2_1
BOOST_AUTO_TEST_CASE(get_default_device_queue)318 BOOST_AUTO_TEST_CASE(get_default_device_queue)
319 {
320     REQUIRES_OPENCL_VERSION(2, 1);
321 
322     boost::compute::command_queue default_device_queue(
323         context, device,
324         boost::compute::command_queue::on_device |
325         boost::compute::command_queue::on_device_default |
326         boost::compute::command_queue::enable_out_of_order_execution
327     );
328     BOOST_CHECK_NO_THROW(queue.get_info<CL_QUEUE_DEVICE_DEFAULT>());
329     BOOST_CHECK_EQUAL(
330         queue.get_default_device_queue(),
331         default_device_queue
332     );
333 }
334 
BOOST_AUTO_TEST_CASE(set_as_default_device_queue)335 BOOST_AUTO_TEST_CASE(set_as_default_device_queue)
336 {
337     REQUIRES_OPENCL_VERSION(2, 1);
338 
339     boost::compute::command_queue new_default_device_queue(
340         context, device,
341         boost::compute::command_queue::on_device |
342         boost::compute::command_queue::enable_out_of_order_execution
343     );
344     new_default_device_queue.set_as_default_device_queue();
345     BOOST_CHECK_EQUAL(
346          queue.get_default_device_queue(),
347          new_default_device_queue
348     );
349 }
350 #endif
351 
352 BOOST_AUTO_TEST_SUITE_END()
353