1 /*
2 Copyright (c) 2018 Michal Babej / Tampere University
3
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to
6 deal in the Software without restriction, including without limitation the
7 rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8 sell copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20 IN THE SOFTWARE.
21 */
22
23 #include <assert.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26
27 #include <CL/cl.h>
28 #include <poclu.h>
29
30 /*
31 Multi-device migration test. Creates two buffers (in & out),
32 enqueues the same kernel with different parameters on every device from 0 to
33 N-1, then does the same in opposite direction (N-1 to 0th device). Verifies
34 that pocl properly migrates the buffer contents across devices.
35 */
36
37 #define ITEMS 1024
38
39 int
main(int argc,char ** argv)40 main (int argc, char **argv)
41 {
42 cl_float *input = NULL, *output = NULL;
43 int err, total_err, spir, spirv;
44 cl_mem buf_in, buf_out;
45 size_t global_work_size[2] = { 0 };
46 size_t local_work_size[2] = { 0 };
47
48 cl_platform_id platform = NULL;
49 cl_context context = NULL;
50 cl_device_id *devices = NULL;
51 cl_command_queue *queues = NULL;
52 cl_uint i, j, num_devices = 0;
53 cl_program program = NULL;
54 cl_kernel kernel = NULL;
55
56 err = poclu_get_multiple_devices (&platform, &context, &num_devices,
57 &devices, &queues);
58 CHECK_OPENCL_ERROR_IN ("poclu_get_multiple_devices");
59
60 printf ("NUM DEVICES: %u \n", num_devices);
61
62 if (num_devices < 2)
63 {
64 printf ("NOT ENOUGH DEVICES! (need 2)\n");
65 exit(0);
66 }
67
68 const char *basename = "migration_test";
69 err = poclu_load_program_multidev (context, devices, num_devices, basename,
70 0, 0, 0, NULL, NULL, &program);
71 if (err != CL_SUCCESS)
72 goto ERROR;
73
74 kernel = clCreateKernel (program, basename, NULL);
75 CHECK_CL_ERROR2 (err);
76
77 cl_uint num_floats = num_devices * ITEMS;
78 input = (cl_float *)malloc (num_floats * sizeof (cl_float));
79 output = (cl_float *)calloc (num_floats, sizeof (cl_float));
80
81 srand48 (0);
82 for (i = 0; i < num_floats; ++i)
83 {
84 input[i] = (cl_float)drand48 ();
85 }
86
87 buf_in = clCreateBuffer (context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
88 sizeof (cl_float) * num_floats, input, NULL);
89 CHECK_CL_ERROR2 (err);
90
91 buf_out = clCreateBuffer (context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
92 sizeof (cl_float) * num_floats, output, NULL);
93 CHECK_CL_ERROR2 (err);
94
95 err = clSetKernelArg (kernel, 0, sizeof (cl_mem), (void *)&buf_in);
96 CHECK_CL_ERROR2 (err);
97
98 err = clSetKernelArg (kernel, 1, sizeof (cl_mem), (void *)&buf_out);
99 CHECK_CL_ERROR2 (err);
100
101 cl_uint num_items = ITEMS;
102 err = clSetKernelArg (kernel, 2, sizeof (cl_uint), &num_items);
103 CHECK_CL_ERROR2 (err);
104
105 global_work_size[0] = ITEMS;
106 local_work_size[0] = 64;
107
108 fprintf (stderr, "FORWARD \n");
109
110 for (i = 0; i < num_devices; ++i)
111 {
112 uint32_t index_arg = i;
113 fprintf (stderr, "index ARG: %u\n", index_arg);
114 err = clSetKernelArg (kernel, 3, sizeof (uint32_t), &index_arg);
115 CHECK_CL_ERROR2 (err);
116
117 err = clEnqueueNDRangeKernel (queues[i], kernel, 1, NULL,
118 global_work_size, local_work_size, 0, NULL,
119 NULL);
120
121 CHECK_CL_ERROR2 (err);
122 }
123
124 clFinish (queues[num_devices - 1]);
125 fprintf (stderr, "NOW REVERSE \n");
126
127 for (i = num_devices; i > 0; --i)
128 {
129 uint32_t index_arg = i - 1;
130 fprintf (stderr, "index ARG: %u\n", index_arg);
131 err = clSetKernelArg (kernel, 3, sizeof (uint32_t), &index_arg);
132 CHECK_CL_ERROR2 (err);
133
134 err = clEnqueueNDRangeKernel (queues[i - 1], kernel, 1, NULL,
135 global_work_size, local_work_size, 0, NULL,
136 NULL);
137
138 CHECK_CL_ERROR2 (err);
139 }
140
141 err = clEnqueueReadBuffer (queues[0], buf_out, CL_TRUE, 0,
142 num_floats * sizeof (cl_float), output, 0, NULL,
143 NULL);
144 CHECK_CL_ERROR2 (err);
145 fprintf (stderr, "DONE \n");
146
147 total_err = 0;
148 for (i = 0; i < num_devices; ++i)
149 {
150 err = 0;
151 for (j = 0; j < ITEMS; ++j)
152 {
153 cl_float actual = output[i * ITEMS + j];
154 cl_float expected = input[i * ITEMS + j] * (float)(i + 1) * 2.0f;
155 if (expected != actual)
156 {
157 if (err < 10)
158 printf ("FAIL at DEV %u ITEM %u: EXPECTED %e ACTUAL %e\n", i,
159 j, expected, actual);
160 err += 1;
161 total_err += 1;
162 }
163 }
164 if (err > 0)
165 printf ("DEV %u FAILED: %i errs\n", i, err);
166 else
167 printf ("DEV %u PASS\n", i);
168 }
169 if (total_err == 0)
170 printf ("OK\n");
171 else
172 printf ("FAIL\n");
173
174 ERROR:
175 CHECK_CL_ERROR (clReleaseMemObject (buf_in));
176 CHECK_CL_ERROR (clReleaseMemObject (buf_out));
177
178 for (i = 0; i < num_devices; ++i)
179 {
180 CHECK_CL_ERROR (clReleaseCommandQueue (queues[i]));
181 }
182
183 CHECK_CL_ERROR (clReleaseKernel (kernel));
184 CHECK_CL_ERROR (clReleaseProgram (program));
185 CHECK_CL_ERROR (clReleaseContext (context));
186 CHECK_CL_ERROR (clUnloadPlatformCompiler (platform));
187 free (input);
188 free (output);
189 free (devices);
190 free (queues);
191
192 return err;
193 }
194