1 /* example2 - Matrix transpose example from OpenCL specification.
2
3 Copyright (c) 2011 Universidad Rey Juan Carlos
4
5 Permission is hereby granted, free of charge, to any person obtaining a copy
6 of this software and associated documentation files (the "Software"), to deal
7 in the Software without restriction, including without limitation the rights
8 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 copies of the Software, and to permit persons to whom the Software is
10 furnished to do so, subject to the following conditions:
11
12 The above copyright notice and this permission notice shall be included in
13 all copies or substantial portions of the Software.
14
15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <CL/opencl.h>
28 #include "poclu.h"
29
30 #ifdef _WIN32
31 # include "vccompat.hpp"
32 #endif
33
34 #define WIDTH 256
35 #define HEIGHT 4096
36 #define PADDING 32
37
38 int
main(int argc,char ** argv)39 main (int argc, char **argv)
40 {
41 cl_float *input = NULL, *output = NULL;
42 int i, j, err, spir, spirv, poclbin;
43 cl_mem memobjs[2] = { 0 };
44 size_t global_work_size[2] = { 0 };
45 size_t local_work_size[2] = { 0 };
46
47 cl_context context = NULL;
48 cl_device_id device = NULL;
49 cl_platform_id platform = NULL;
50 cl_command_queue queue = NULL;
51 cl_program program = NULL;
52 cl_kernel kernel = NULL;
53
54 err = poclu_get_any_device2 (&context, &device, &queue, &platform);
55 CHECK_OPENCL_ERROR_IN ("clCreateContext");
56
57 spir = (argc > 1 && argv[1][0] == 's');
58 spirv = (argc > 1 && argv[1][0] == 'v');
59 poclbin = (argc > 1 && argv[1][0] == 'b');
60 const char *explicit_binary_path = (poclbin && (argc > 2)) ? argv[2] : NULL;
61
62 const char *basename = "example2";
63 err = poclu_load_program (context, device, basename, spir, spirv, poclbin,
64 explicit_binary_path, NULL, &program);
65 if (err != CL_SUCCESS)
66 goto ERROR;
67
68 input = (cl_float *) malloc (WIDTH * HEIGHT * sizeof (cl_float));
69 output = (cl_float *) malloc (WIDTH * (HEIGHT + PADDING) * sizeof (cl_float));
70
71 srand48(0);
72 for (i = 0; i < WIDTH; ++i)
73 {
74 for (j = 0; j < HEIGHT; ++j)
75 input[i * HEIGHT + j] = (cl_float)drand48();
76 for (j = 0; j < (HEIGHT + PADDING); ++j)
77 output[i * (HEIGHT + PADDING) + j] = 0.0f;
78 }
79
80 CHECK_CL_ERROR2 (err);
81
82 memobjs[0] = clCreateBuffer(context,
83 CL_MEM_READ_WRITE,
84 sizeof(cl_float) * WIDTH * (HEIGHT + PADDING), NULL, NULL);
85 CHECK_CL_ERROR2 (err);
86
87 memobjs[1] = clCreateBuffer(context,
88 CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
89 sizeof(cl_float) * WIDTH * HEIGHT, input, NULL);
90 CHECK_CL_ERROR2 (err);
91
92 kernel = clCreateKernel(program, "matrix_transpose", NULL);
93 CHECK_CL_ERROR2 (err);
94
95 err = clSetKernelArg (kernel, 0, sizeof (cl_mem), (void *)&memobjs[0]);
96 CHECK_CL_ERROR2 (err);
97
98 err = clSetKernelArg (kernel, 1, sizeof (cl_mem), (void *)&memobjs[1]);
99 CHECK_CL_ERROR2 (err);
100
101 err = clSetKernelArg (kernel, 2, (32 + 1) * 32 * sizeof (float), NULL);
102 CHECK_CL_ERROR2 (err);
103
104 global_work_size[0] = 2 * WIDTH;
105 global_work_size[1] = HEIGHT / 32;
106 local_work_size[0]= 64;
107 local_work_size[1]= 1;
108
109 err = clEnqueueNDRangeKernel (queue, kernel, 2, NULL, global_work_size,
110 local_work_size, 0, NULL, NULL);
111
112 CHECK_CL_ERROR2 (err);
113
114 err = clEnqueueReadBuffer (queue, memobjs[0], CL_TRUE, 0,
115 WIDTH * (HEIGHT + PADDING) * sizeof (cl_float),
116 output, 0, NULL, NULL);
117 CHECK_CL_ERROR2 (err);
118
119 err = clFinish (queue);
120 CHECK_CL_ERROR2 (err);
121
122 for (i = 0; i < HEIGHT; ++i)
123 {
124 for (j = 0; j < WIDTH; ++j) {
125 if (input[i * WIDTH + j] != output[j * (HEIGHT + PADDING) + i]) {
126 printf ("FAIL\n");
127 err = 1;
128 goto ERROR;
129 }
130 }
131 }
132
133 printf ("OK\n");
134
135 ERROR:
136 CHECK_CL_ERROR (clReleaseMemObject (memobjs[0]));
137 CHECK_CL_ERROR (clReleaseMemObject (memobjs[1]));
138 CHECK_CL_ERROR (clReleaseKernel (kernel));
139 CHECK_CL_ERROR (clReleaseProgram (program));
140 CHECK_CL_ERROR (clReleaseCommandQueue (queue));
141 CHECK_CL_ERROR (clReleaseContext (context));
142 CHECK_CL_ERROR (clUnloadPlatformCompiler (platform));
143 free (input);
144 free (output);
145
146 return err;
147 }
148