1 /**
2 * FreeRDP: A Remote Desktop Protocol Implementation
3 * Optimized YUV/RGB conversion operations using openCL
4 *
5 * Copyright 2019 David Fort <contact@hardening-consulting.com>
6 * Copyright 2019 Rangee Gmbh
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 */
20
21 #ifdef HAVE_CONFIG_H
22 #include "config.h"
23 #endif
24
25 #include <freerdp/types.h>
26 #include <freerdp/primitives.h>
27 #include "prim_internal.h"
28
29 #if defined(WITH_OPENCL)
30 #ifdef __APPLE__
31 #include "OpenCL/opencl.h"
32 #else
33 #include <CL/cl.h>
34 #endif
35 #endif
36
37 #define TAG FREERDP_TAG("primitives")
38
39 typedef struct
40 {
41 BOOL support;
42 cl_platform_id platformId;
43 cl_device_id deviceId;
44 cl_context context;
45 cl_command_queue commandQueue;
46 cl_program program;
47 } primitives_opencl_context;
48
49 static primitives_opencl_context* primitives_get_opencl_context(void);
50
opencl_YUVToRGB(const char * kernelName,const BYTE * const pSrc[3],const UINT32 srcStep[3],BYTE * pDst,UINT32 dstStep,const prim_size_t * roi)51 static pstatus_t opencl_YUVToRGB(const char* kernelName, const BYTE* const pSrc[3],
52 const UINT32 srcStep[3], BYTE* pDst, UINT32 dstStep,
53 const prim_size_t* roi)
54 {
55 cl_int ret;
56 cl_uint i;
57 cl_mem objs[3] = { NULL, NULL, NULL };
58 cl_mem destObj;
59 cl_kernel kernel;
60 size_t indexes[2];
61 const char* sourceNames[] = { "Y", "U", "V" };
62 primitives_opencl_context* cl = primitives_get_opencl_context();
63
64 kernel = clCreateKernel(cl->program, kernelName, &ret);
65 if (ret != CL_SUCCESS)
66 {
67 WLog_ERR(TAG, "openCL: unable to create kernel %s", kernelName);
68 return -1;
69 }
70
71 for (i = 0; i < 3; i++)
72 {
73 objs[i] = clCreateBuffer(cl->context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
74 srcStep[i] * roi->height, (char*)pSrc[i], &ret);
75 if (ret != CL_SUCCESS)
76 {
77 WLog_ERR(TAG, "unable to create %sobj", sourceNames[i]);
78 goto error_objs;
79 }
80 }
81
82 destObj = clCreateBuffer(cl->context, CL_MEM_WRITE_ONLY, dstStep * roi->height, NULL, &ret);
83 if (ret != CL_SUCCESS)
84 {
85 WLog_ERR(TAG, "unable to create dest obj");
86 goto error_objs;
87 }
88
89 /* push source + stride arguments*/
90 for (i = 0; i < 3; i++)
91 {
92 ret = clSetKernelArg(kernel, i * 2, sizeof(cl_mem), &objs[i]);
93 if (ret != CL_SUCCESS)
94 {
95 WLog_ERR(TAG, "unable to set arg for %sobj", sourceNames[i]);
96 goto error_set_args;
97 }
98
99 ret = clSetKernelArg(kernel, i * 2 + 1, sizeof(cl_int), &srcStep[i]);
100 if (ret != CL_SUCCESS)
101 {
102 WLog_ERR(TAG, "unable to set arg stride for %sobj", sourceNames[i]);
103 goto error_set_args;
104 }
105 }
106
107 ret = clSetKernelArg(kernel, 6, sizeof(cl_mem), &destObj);
108 if (ret != CL_SUCCESS)
109 {
110 WLog_ERR(TAG, "unable to set arg destObj");
111 goto error_set_args;
112 }
113
114 ret = clSetKernelArg(kernel, 7, sizeof(cl_int), &dstStep);
115 if (ret != CL_SUCCESS)
116 {
117 WLog_ERR(TAG, "unable to set arg dstStep");
118 goto error_set_args;
119 }
120
121 indexes[0] = roi->width;
122 indexes[1] = roi->height;
123 ret = clEnqueueNDRangeKernel(cl->commandQueue, kernel, 2, NULL, indexes, NULL, 0, NULL, NULL);
124 if (ret != CL_SUCCESS)
125 {
126 WLog_ERR(TAG, "unable to enqueue call kernel");
127 goto error_set_args;
128 }
129
130 /* Transfer result to host */
131 ret = clEnqueueReadBuffer(cl->commandQueue, destObj, CL_TRUE, 0, roi->height * dstStep, pDst, 0,
132 NULL, NULL);
133 if (ret != CL_SUCCESS)
134 {
135 WLog_ERR(TAG, "unable to read back buffer");
136 goto error_set_args;
137 }
138
139 /* cleanup things */
140 clReleaseMemObject(destObj);
141 for (i = 0; i < 3; i++)
142 if (objs[i])
143 clReleaseMemObject(objs[i]);
144 clReleaseKernel(kernel);
145
146 return PRIMITIVES_SUCCESS;
147
148 error_set_args:
149 clReleaseMemObject(destObj);
150 error_objs:
151 for (i = 0; i < 3; i++)
152 {
153 if (objs[i])
154 clReleaseMemObject(objs[i]);
155 }
156 clReleaseKernel(kernel);
157 return -1;
158 }
159
160 static primitives_opencl_context openclContext;
161
primitives_get_opencl_context(void)162 static primitives_opencl_context* primitives_get_opencl_context(void)
163 {
164 return &openclContext;
165 }
166
primitives_uninit_opencl(void)167 static pstatus_t primitives_uninit_opencl(void)
168 {
169 if (!openclContext.support)
170 return PRIMITIVES_SUCCESS;
171
172 clReleaseProgram(openclContext.program);
173 clReleaseCommandQueue(openclContext.commandQueue);
174 clReleaseContext(openclContext.context);
175 clReleaseDevice(openclContext.deviceId);
176 openclContext.support = FALSE;
177 return PRIMITIVES_SUCCESS;
178 }
179
180 static const char* openclProgram =
181 #include "primitives.cl"
182 ;
183
primitives_init_opencl_context(primitives_opencl_context * cl)184 static BOOL primitives_init_opencl_context(primitives_opencl_context* cl)
185 {
186 cl_platform_id* platform_ids = NULL;
187 cl_uint ndevices, nplatforms, i;
188 cl_kernel kernel;
189 cl_int ret;
190
191 BOOL gotGPU = FALSE;
192 size_t programLen;
193
194 ret = clGetPlatformIDs(0, NULL, &nplatforms);
195 if (ret != CL_SUCCESS || nplatforms < 1)
196 return FALSE;
197
198 platform_ids = calloc(nplatforms, sizeof(*platform_ids));
199 if (!platform_ids)
200 return FALSE;
201
202 ret = clGetPlatformIDs(nplatforms, platform_ids, &nplatforms);
203 if (ret != CL_SUCCESS)
204 {
205 free(platform_ids);
206 return FALSE;
207 }
208
209 for (i = 0; (i < nplatforms) && !gotGPU; i++)
210 {
211 cl_device_id device_id;
212 cl_context context;
213 char platformName[1000];
214 char deviceName[1000];
215
216 ret = clGetPlatformInfo(platform_ids[i], CL_PLATFORM_NAME, sizeof(platformName),
217 platformName, NULL);
218 if (ret != CL_SUCCESS)
219 continue;
220
221 ret = clGetDeviceIDs(platform_ids[i], CL_DEVICE_TYPE_GPU, 1, &device_id, &ndevices);
222 if (ret != CL_SUCCESS)
223 continue;
224
225 ret = clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(deviceName), deviceName, NULL);
226 if (ret != CL_SUCCESS)
227 {
228 WLog_ERR(TAG, "openCL: unable get device name for platform %s", platformName);
229 clReleaseDevice(device_id);
230 continue;
231 }
232
233 context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);
234 if (ret != CL_SUCCESS)
235 {
236 WLog_ERR(TAG, "openCL: unable to create context for platform %s, device %s",
237 platformName, deviceName);
238 clReleaseDevice(device_id);
239 continue;
240 }
241
242 cl->commandQueue = clCreateCommandQueue(context, device_id, 0, &ret);
243 if (ret != CL_SUCCESS)
244 {
245 WLog_ERR(TAG, "openCL: unable to create command queue");
246 clReleaseContext(context);
247 clReleaseDevice(device_id);
248 continue;
249 }
250
251 WLog_INFO(TAG, "openCL: using platform=%s device=%s", platformName, deviceName);
252
253 cl->platformId = platform_ids[i];
254 cl->deviceId = device_id;
255 cl->context = context;
256 gotGPU = TRUE;
257 }
258
259 free(platform_ids);
260
261 if (!gotGPU)
262 {
263 WLog_ERR(TAG, "openCL: no GPU found");
264 return FALSE;
265 }
266
267 programLen = strlen(openclProgram);
268 cl->program =
269 clCreateProgramWithSource(cl->context, 1, (const char**)&openclProgram, &programLen, &ret);
270 if (ret != CL_SUCCESS)
271 {
272 WLog_ERR(TAG, "openCL: unable to create program");
273 goto out_program_create;
274 }
275
276 ret = clBuildProgram(cl->program, 1, &cl->deviceId, NULL, NULL, NULL);
277 if (ret != CL_SUCCESS)
278 {
279 size_t length;
280 char buffer[2048];
281 ret = clGetProgramBuildInfo(cl->program, cl->deviceId, CL_PROGRAM_BUILD_LOG, sizeof(buffer),
282 buffer, &length);
283 if (ret != CL_SUCCESS)
284 {
285 WLog_ERR(TAG,
286 "openCL: building program failed but unable to retrieve buildLog, error=%d",
287 ret);
288 }
289 else
290 {
291 WLog_ERR(TAG, "openCL: unable to build program, errorLog=%s", buffer);
292 }
293 goto out_program_build;
294 }
295
296 kernel = clCreateKernel(cl->program, "yuv420_to_bgra_1b", &ret);
297 if (ret != CL_SUCCESS)
298 {
299 WLog_ERR(TAG, "openCL: unable to create yuv420_to_bgra_1b kernel");
300 goto out_program_build;
301 }
302 clReleaseKernel(kernel);
303
304 cl->support = TRUE;
305 return TRUE;
306
307 out_program_build:
308 clReleaseProgram(cl->program);
309 out_program_create:
310 clReleaseCommandQueue(cl->commandQueue);
311 clReleaseContext(cl->context);
312 clReleaseDevice(cl->deviceId);
313 return FALSE;
314 }
315
opencl_YUV420ToRGB_8u_P3AC4R(const BYTE * const pSrc[3],const UINT32 srcStep[3],BYTE * pDst,UINT32 dstStep,UINT32 DstFormat,const prim_size_t * roi)316 static pstatus_t opencl_YUV420ToRGB_8u_P3AC4R(const BYTE* const pSrc[3], const UINT32 srcStep[3],
317 BYTE* pDst, UINT32 dstStep, UINT32 DstFormat,
318 const prim_size_t* roi)
319 {
320 const char* kernel_name;
321
322 switch (DstFormat)
323 {
324 case PIXEL_FORMAT_BGRA32:
325 case PIXEL_FORMAT_BGRX32:
326 kernel_name = "yuv420_to_bgra_1b";
327 break;
328 case PIXEL_FORMAT_XRGB32:
329 case PIXEL_FORMAT_ARGB32:
330 kernel_name = "yuv420_to_argb_1b";
331 break;
332 default:
333 {
334 primitives_t* p = primitives_get_by_type(PRIMITIVES_ONLY_CPU);
335 if (!p)
336 return -1;
337 return p->YUV420ToRGB_8u_P3AC4R(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
338 }
339 }
340
341 return opencl_YUVToRGB(kernel_name, pSrc, srcStep, pDst, dstStep, roi);
342 }
343
opencl_YUV444ToRGB_8u_P3AC4R(const BYTE * const pSrc[3],const UINT32 srcStep[3],BYTE * pDst,UINT32 dstStep,UINT32 DstFormat,const prim_size_t * roi)344 static pstatus_t opencl_YUV444ToRGB_8u_P3AC4R(const BYTE* const pSrc[3], const UINT32 srcStep[3],
345 BYTE* pDst, UINT32 dstStep, UINT32 DstFormat,
346 const prim_size_t* roi)
347 {
348 const char* kernel_name;
349
350 switch (DstFormat)
351 {
352 case PIXEL_FORMAT_BGRA32:
353 case PIXEL_FORMAT_BGRX32:
354 kernel_name = "yuv444_to_bgra_1b";
355 break;
356 case PIXEL_FORMAT_XRGB32:
357 case PIXEL_FORMAT_ARGB32:
358 kernel_name = "yuv444_to_argb_1b";
359 break;
360 default:
361 {
362 primitives_t* p = primitives_get_by_type(PRIMITIVES_ONLY_CPU);
363 if (!p)
364 return -1;
365 return p->YUV444ToRGB_8u_P3AC4R(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
366 }
367 }
368
369 return opencl_YUVToRGB(kernel_name, pSrc, srcStep, pDst, dstStep, roi);
370 }
371
primitives_init_opencl(primitives_t * prims)372 BOOL primitives_init_opencl(primitives_t* prims)
373 {
374 primitives_t* p = primitives_get_by_type(PRIMITIVES_ONLY_CPU);
375 if (!prims || !p)
376 return FALSE;
377 *prims = *p;
378
379 if (!primitives_init_opencl_context(&openclContext))
380 return FALSE;
381
382 prims->YUV420ToRGB_8u_P3AC4R = opencl_YUV420ToRGB_8u_P3AC4R;
383 prims->YUV444ToRGB_8u_P3AC4R = opencl_YUV444ToRGB_8u_P3AC4R;
384 prims->flags |= PRIM_FLAGS_HAVE_EXTGPU;
385 prims->uninit = primitives_uninit_opencl;
386 return TRUE;
387 }
388