1 /*
2   Copyright (c) 2018 Michal Babej / Tampere University
3 
4    Permission is hereby granted, free of charge, to any person obtaining a copy
5    of this software and associated documentation files (the "Software"), to
6   deal in the Software without restriction, including without limitation the
7   rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8   sell copies of the Software, and to permit persons to whom the Software is
9    furnished to do so, subject to the following conditions:
10 
11    The above copyright notice and this permission notice shall be included in
12    all copies or substantial portions of the Software.
13 
14    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19   FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20   IN THE SOFTWARE.
21 */
22 
23 #include <assert.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 
27 #include <CL/cl.h>
28 #include <poclu.h>
29 
30 /*
31   Multi-device migration test. Creates two buffers (in & out),
32   enqueues the same kernel with different parameters on every device from 0 to
33   N-1, then does the same in opposite direction (N-1 to 0th device). Verifies
34   that pocl properly migrates the buffer contents across devices.
35 */
36 
37 #define ITEMS 1024
38 
39 int
main(int argc,char ** argv)40 main (int argc, char **argv)
41 {
42   cl_float *input = NULL, *output = NULL;
43   int err, total_err, spir, spirv;
44   cl_mem buf_in, buf_out;
45   size_t global_work_size[2] = { 0 };
46   size_t local_work_size[2] = { 0 };
47 
48   cl_platform_id platform = NULL;
49   cl_context context = NULL;
50   cl_device_id *devices = NULL;
51   cl_command_queue *queues = NULL;
52   cl_uint i, j, num_devices = 0;
53   cl_program program = NULL;
54   cl_kernel kernel = NULL;
55 
56   err = poclu_get_multiple_devices (&platform, &context, &num_devices,
57                                     &devices, &queues);
58   CHECK_OPENCL_ERROR_IN ("poclu_get_multiple_devices");
59 
60   printf ("NUM DEVICES: %u \n", num_devices);
61 
62   if (num_devices < 2)
63     {
64       printf ("NOT ENOUGH DEVICES! (need 2)\n");
65       exit(0);
66     }
67 
68   const char *basename = "migration_test";
69   err = poclu_load_program_multidev (context, devices, num_devices, basename,
70                                      0, 0, 0, NULL, NULL, &program);
71   if (err != CL_SUCCESS)
72     goto ERROR;
73 
74   kernel = clCreateKernel (program, basename, NULL);
75   CHECK_CL_ERROR2 (err);
76 
77   cl_uint num_floats = num_devices * ITEMS;
78   input = (cl_float *)malloc (num_floats * sizeof (cl_float));
79   output = (cl_float *)calloc (num_floats, sizeof (cl_float));
80 
81   srand48 (0);
82   for (i = 0; i < num_floats; ++i)
83     {
84       input[i] = (cl_float)drand48 ();
85     }
86 
87   buf_in = clCreateBuffer (context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
88                            sizeof (cl_float) * num_floats, input, NULL);
89   CHECK_CL_ERROR2 (err);
90 
91   buf_out = clCreateBuffer (context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
92                             sizeof (cl_float) * num_floats, output, NULL);
93   CHECK_CL_ERROR2 (err);
94 
95   err = clSetKernelArg (kernel, 0, sizeof (cl_mem), (void *)&buf_in);
96   CHECK_CL_ERROR2 (err);
97 
98   err = clSetKernelArg (kernel, 1, sizeof (cl_mem), (void *)&buf_out);
99   CHECK_CL_ERROR2 (err);
100 
101   cl_uint num_items = ITEMS;
102   err = clSetKernelArg (kernel, 2, sizeof (cl_uint), &num_items);
103   CHECK_CL_ERROR2 (err);
104 
105   global_work_size[0] = ITEMS;
106   local_work_size[0] = 64;
107 
108   fprintf (stderr, "FORWARD \n");
109 
110   for (i = 0; i < num_devices; ++i)
111     {
112       uint32_t index_arg = i;
113       fprintf (stderr, "index ARG: %u\n", index_arg);
114       err = clSetKernelArg (kernel, 3, sizeof (uint32_t), &index_arg);
115       CHECK_CL_ERROR2 (err);
116 
117       err = clEnqueueNDRangeKernel (queues[i], kernel, 1, NULL,
118                                     global_work_size, local_work_size, 0, NULL,
119                                     NULL);
120 
121       CHECK_CL_ERROR2 (err);
122     }
123 
124   clFinish (queues[num_devices - 1]);
125   fprintf (stderr, "NOW REVERSE \n");
126 
127   for (i = num_devices; i > 0; --i)
128     {
129       uint32_t index_arg = i - 1;
130       fprintf (stderr, "index ARG: %u\n", index_arg);
131       err = clSetKernelArg (kernel, 3, sizeof (uint32_t), &index_arg);
132       CHECK_CL_ERROR2 (err);
133 
134       err = clEnqueueNDRangeKernel (queues[i - 1], kernel, 1, NULL,
135                                     global_work_size, local_work_size, 0, NULL,
136                                     NULL);
137 
138       CHECK_CL_ERROR2 (err);
139     }
140 
141   err = clEnqueueReadBuffer (queues[0], buf_out, CL_TRUE, 0,
142                              num_floats * sizeof (cl_float), output, 0, NULL,
143                              NULL);
144   CHECK_CL_ERROR2 (err);
145   fprintf (stderr, "DONE \n");
146 
147   total_err = 0;
148   for (i = 0; i < num_devices; ++i)
149     {
150       err = 0;
151       for (j = 0; j < ITEMS; ++j)
152         {
153           cl_float actual = output[i * ITEMS + j];
154           cl_float expected = input[i * ITEMS + j] * (float)(i + 1) * 2.0f;
155           if (expected != actual)
156             {
157               if (err < 10)
158                 printf ("FAIL at DEV %u ITEM %u: EXPECTED %e ACTUAL %e\n", i,
159                         j, expected, actual);
160               err += 1;
161               total_err += 1;
162             }
163         }
164       if (err > 0)
165         printf ("DEV %u FAILED: %i errs\n", i, err);
166       else
167         printf ("DEV %u PASS\n", i);
168     }
169   if (total_err == 0)
170     printf ("OK\n");
171   else
172     printf ("FAIL\n");
173 
174 ERROR:
175   CHECK_CL_ERROR (clReleaseMemObject (buf_in));
176   CHECK_CL_ERROR (clReleaseMemObject (buf_out));
177 
178   for (i = 0; i < num_devices; ++i)
179     {
180       CHECK_CL_ERROR (clReleaseCommandQueue (queues[i]));
181     }
182 
183   CHECK_CL_ERROR (clReleaseKernel (kernel));
184   CHECK_CL_ERROR (clReleaseProgram (program));
185   CHECK_CL_ERROR (clReleaseContext (context));
186   CHECK_CL_ERROR (clUnloadPlatformCompiler (platform));
187   free (input);
188   free (output);
189   free (devices);
190   free (queues);
191 
192   return err;
193 }
194