1 /* This file is part of GEGL
2  *
3  * GEGL is free software; you can redistribute it and/or
4  * modify it under the terms of the GNU Lesser General Public
5  * License as published by the Free Software Foundation; either
6  * version 3 of the License, or (at your option) any later version.
7  *
8  * GEGL is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * Lesser General Public License for more details.
12  *
13  * You should have received a copy of the GNU Lesser General Public
14  * License along with GEGL; if not, see <https://www.gnu.org/licenses/>.
15  *
16  * Copyright 2012 Victor Oliveira (victormatheus@gmail.com)
17  *           2013 Daniel Sabo
18  */
19 
20 /* OpenCL Initialization
21    The API is stubbed out so we detect if OpenCL libraries are available
22    in runtime.
23 */
24 
25 #include "config.h"
26 
27 #define __GEGL_CL_INIT_MAIN__
28 #include "gegl-cl-init.h"
29 #undef __GEGL_CL_INIT_MAIN__
30 
31 #include <glib.h>
32 #include <gmodule.h>
33 #include <string.h>
34 #include <stdio.h>
35 
36 #include "gegl-cl.h"
37 #include "gegl-cl-color.h"
38 #include "opencl/random.cl.h"
39 
40 #include "gegl/gegl-debug.h"
41 
42 #include "gegl/buffer/gegl-buffer-private.h"
43 #include "gegl-buffer-cl-cache.h"
44 
45 GQuark gegl_opencl_error_quark (void);
46 
47 GQuark
gegl_opencl_error_quark(void)48 gegl_opencl_error_quark (void)
49 {
50   return g_quark_from_static_string ("gegl-opencl-error-quark");
51 }
52 
53 #define GEGL_OPENCL_ERROR (gegl_opencl_error_quark ())
54 
gegl_cl_errstring(cl_int err)55 const char *gegl_cl_errstring(cl_int err) {
56   static const char* strings[] =
57   {
58     /* Error Codes */
59       "success"                         /*  0  */
60     , "device not found"                /* -1  */
61     , "device not available"            /* -2  */
62     , "compiler not available"          /* -3  */
63     , "mem object allocation failure"   /* -4  */
64     , "out of resources"                /* -5  */
65     , "out of host memory"              /* -6  */
66     , "profiling info not available"    /* -7  */
67     , "mem copy overlap"                /* -8  */
68     , "image format mismatch"           /* -9  */
69     , "image format not supported"      /* -10 */
70     , "build program failure"           /* -11 */
71     , "map failure"                     /* -12 */
72     , ""                                /* -13 */
73     , ""                                /* -14 */
74     , ""                                /* -15 */
75     , ""                                /* -16 */
76     , ""                                /* -17 */
77     , ""                                /* -18 */
78     , ""                                /* -19 */
79     , ""                                /* -20 */
80     , ""                                /* -21 */
81     , ""                                /* -22 */
82     , ""                                /* -23 */
83     , ""                                /* -24 */
84     , ""                                /* -25 */
85     , ""                                /* -26 */
86     , ""                                /* -27 */
87     , ""                                /* -28 */
88     , ""                                /* -29 */
89     , "invalid value"                   /* -30 */
90     , "invalid device type"             /* -31 */
91     , "invalid platform"                /* -32 */
92     , "invalid device"                  /* -33 */
93     , "invalid context"                 /* -34 */
94     , "invalid queue properties"        /* -35 */
95     , "invalid command queue"           /* -36 */
96     , "invalid host ptr"                /* -37 */
97     , "invalid mem object"              /* -38 */
98     , "invalid image format descriptor" /* -39 */
99     , "invalid image size"              /* -40 */
100     , "invalid sampler"                 /* -41 */
101     , "invalid binary"                  /* -42 */
102     , "invalid build options"           /* -43 */
103     , "invalid program"                 /* -44 */
104     , "invalid program executable"      /* -45 */
105     , "invalid kernel name"             /* -46 */
106     , "invalid kernel definition"       /* -47 */
107     , "invalid kernel"                  /* -48 */
108     , "invalid arg index"               /* -49 */
109     , "invalid arg value"               /* -50 */
110     , "invalid arg size"                /* -51 */
111     , "invalid kernel args"             /* -52 */
112     , "invalid work dimension"          /* -53 */
113     , "invalid work group size"         /* -54 */
114     , "invalid work item size"          /* -55 */
115     , "invalid global offset"           /* -56 */
116     , "invalid event wait list"         /* -57 */
117     , "invalid event"                   /* -58 */
118     , "invalid operation"               /* -59 */
119     , "invalid gl object"               /* -60 */
120     , "invalid buffer size"             /* -61 */
121     , "invalid mip level"               /* -62 */
122     , "invalid global work size"        /* -63 */
123   };
124 
125   static const int strings_len = sizeof(strings) / sizeof(strings[0]);
126 
127   if (-err < 0 || -err >= strings_len)
128     return "unknown error";
129 
130   return strings[-err];
131 }
132 
133 gboolean _gegl_cl_is_accelerated;
134 
135 typedef struct
136 {
137   gboolean         is_loaded;
138   gboolean         have_opengl;
139   gboolean         hard_disable;
140   gboolean         enable_profiling;
141   cl_context       ctx;
142   cl_platform_id   platform;
143   cl_device_id     device;
144   cl_command_queue cq;
145   cl_bool          image_support;
146   size_t           iter_height;
147   size_t           iter_width;
148   cl_ulong         max_mem_alloc;
149   cl_ulong         local_mem_size;
150 
151   char platform_name   [1024];
152   char platform_version[1024];
153   char platform_ext    [1024];
154   char device_name     [1024];
155 }
156 GeglClState;
157 
158 /* we made some performance measurements and OpenCL in the CPU is rarely worth it,
159  * specially now that we got our multi-threading working */
160 
161 static cl_device_type gegl_cl_default_device_type = CL_DEVICE_TYPE_GPU;
162 static GeglClState cl_state = { 0, };
163 static GHashTable *cl_program_hash = NULL;
164 
165 
166 gboolean
gegl_cl_has_gl_sharing(void)167 gegl_cl_has_gl_sharing (void)
168 {
169   return cl_state.have_opengl && gegl_cl_is_accelerated ();
170 }
171 
172 void
gegl_cl_disable(void)173 gegl_cl_disable (void)
174 {
175   _gegl_cl_is_accelerated = FALSE;
176 
177   gegl_buffer_ext_flush = NULL;
178   gegl_buffer_ext_invalidate = NULL;
179   gegl_tile_handler_cache_ext_flush = NULL;
180 }
181 
182 void
gegl_cl_hard_disable(void)183 gegl_cl_hard_disable (void)
184 {
185   cl_state.hard_disable = TRUE;
186   _gegl_cl_is_accelerated = FALSE;
187 
188   gegl_buffer_ext_flush = NULL;
189   gegl_buffer_ext_invalidate = NULL;
190   gegl_tile_handler_cache_ext_flush = NULL;
191 }
192 
193 cl_platform_id
gegl_cl_get_platform(void)194 gegl_cl_get_platform (void)
195 {
196   return cl_state.platform;
197 }
198 
199 cl_device_id
gegl_cl_get_device(void)200 gegl_cl_get_device (void)
201 {
202   return cl_state.device;
203 }
204 
205 cl_context
gegl_cl_get_context(void)206 gegl_cl_get_context (void)
207 {
208   return cl_state.ctx;
209 }
210 
211 cl_command_queue
gegl_cl_get_command_queue(void)212 gegl_cl_get_command_queue (void)
213 {
214   return cl_state.cq;
215 }
216 
217 cl_ulong
gegl_cl_get_local_mem_size(void)218 gegl_cl_get_local_mem_size (void)
219 {
220   return cl_state.local_mem_size;
221 }
222 
223 size_t
gegl_cl_get_iter_width(void)224 gegl_cl_get_iter_width (void)
225 {
226   return cl_state.iter_width;
227 }
228 
229 size_t
gegl_cl_get_iter_height(void)230 gegl_cl_get_iter_height (void)
231 {
232   return cl_state.iter_height;
233 }
234 
235 void
gegl_cl_set_profiling(gboolean enable)236 gegl_cl_set_profiling (gboolean enable)
237 {
238   g_return_if_fail (!cl_state.is_loaded);
239 
240   cl_state.enable_profiling = enable;
241 }
242 
243 void
gegl_cl_set_default_device_type(cl_device_type default_device_type)244 gegl_cl_set_default_device_type (cl_device_type default_device_type)
245 {
246   g_return_if_fail (!cl_state.is_loaded);
247 
248   gegl_cl_default_device_type = default_device_type;
249 }
250 
251 static gboolean
gegl_cl_device_has_extension(cl_device_id device,const char * extension_name)252 gegl_cl_device_has_extension (cl_device_id device, const char *extension_name)
253 {
254   cl_int     cl_err;
255   size_t     string_len = 0;
256   gchar     *device_ext_string = NULL;
257   gchar    **extensions;
258   gboolean   found = FALSE;
259 
260   if (!extension_name)
261     return FALSE;
262 
263   cl_err= gegl_clGetDeviceInfo (device, CL_DEVICE_EXTENSIONS,
264                                 0, NULL, &string_len);
265   CL_CHECK_ONLY (cl_err);
266 
267   if (!string_len)
268     return FALSE;
269 
270   device_ext_string = g_malloc0 (string_len);
271 
272   cl_err = gegl_clGetDeviceInfo (device, CL_DEVICE_EXTENSIONS,
273                                  string_len, device_ext_string, NULL);
274   CL_CHECK_ONLY (cl_err);
275 
276   extensions = g_strsplit (device_ext_string, " ", 0);
277 
278   for (gint i = 0; extensions[i] && !found; ++i)
279     {
280       if (!strcmp (extensions[i], extension_name))
281         found = TRUE;
282     }
283 
284   g_free (device_ext_string);
285   g_strfreev (extensions);
286 
287   return found;
288 }
289 
290 gboolean
gegl_cl_has_extension(const char * extension_name)291 gegl_cl_has_extension (const char *extension_name)
292 {
293   if (!gegl_cl_is_accelerated () || !extension_name)
294     return FALSE;
295 
296   return gegl_cl_device_has_extension (cl_state.device, extension_name);
297 }
298 
299 #ifdef G_OS_WIN32
300 
301 #include <windows.h>
302 
303 #define CL_LOAD_FUNCTION(func)                                                    \
304 if ((gegl_##func = (t_##func) GetProcAddress(module, #func)) == NULL)             \
305   {                                                                               \
306     g_set_error (error, GEGL_OPENCL_ERROR, 0, "symbol gegl_##func is NULL");      \
307     FreeLibrary(module);                                                          \
308     return FALSE;                                                                 \
309   }
310 
311 #else
312 
313 #ifdef __APPLE__
314 #define GL_LIBRARY_NAME "/System/Library/Frameworks/OpenGL.framework/Versions/Current/OpenGL"
315 #define CL_LIBRARY_NAME "/System/Library/Frameworks/OpenCL.framework/Versions/Current/OpenCL"
316 #else
317 #define GL_LIBRARY_NAME "libGL.so.1"
318 #define CL_LIBRARY_NAME "libOpenCL.so.1"
319 #endif
320 
321 #define CL_LOAD_FUNCTION(func)                                                    \
322 if (!g_module_symbol (module, #func, (gpointer *)& gegl_##func))                  \
323   {                                                                               \
324     GEGL_NOTE (GEGL_DEBUG_OPENCL, "%s: %s", CL_LIBRARY_NAME, g_module_error ());  \
325     g_set_error (error, GEGL_OPENCL_ERROR, 0, "%s: %s", CL_LIBRARY_NAME, g_module_error ()); \
326     if (!g_module_close (module))                                                 \
327       g_warning ("%s: %s", CL_LIBRARY_NAME, g_module_error ());                   \
328     return FALSE;                                                                 \
329   }                                                                               \
330 if (gegl_##func == NULL)                                                          \
331   {                                                                               \
332     GEGL_NOTE (GEGL_DEBUG_OPENCL, "symbol gegl_##func is NULL");                  \
333     g_set_error (error, GEGL_OPENCL_ERROR, 0, "symbol gegl_##func is NULL");      \
334     if (!g_module_close (module))                                                 \
335       g_warning ("%s: %s", CL_LIBRARY_NAME, g_module_error ());                   \
336     return FALSE;                                                                 \
337   }
338 
339 #endif
340 
341 #define CL_LOAD_EXTENSION_FUNCTION(func)                                          \
342 g_assert(gegl_clGetExtensionFunctionAddress);                                     \
343 gegl_##func = gegl_clGetExtensionFunctionAddress(#func);                          \
344 if (gegl_##func == NULL)                                                          \
345   {                                                                               \
346     GEGL_NOTE (GEGL_DEBUG_OPENCL, "symbol gegl_##func is NULL");                  \
347     g_set_error (error, GEGL_OPENCL_ERROR, 0, "symbol gegl_##func is NULL");      \
348     return FALSE;                                                                 \
349   }
350 
351 #if defined(__APPLE__)
352 typedef struct _CGLContextObject *CGLContextObj;
353 typedef struct CGLShareGroupRec  *CGLShareGroupObj;
354 
355 typedef CGLContextObj (*t_CGLGetCurrentContext) (void);
356 typedef CGLShareGroupObj (*t_CGLGetShareGroup) (CGLContextObj);
357 
358 t_CGLGetCurrentContext gegl_CGLGetCurrentContext;
359 t_CGLGetShareGroup gegl_CGLGetShareGroup;
360 
361 /* FIXME: Move this to cl_gl_ext.h */
362 #define CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE        0x10000000
363 #elif defined(G_OS_WIN32)
364 /* pass */
365 #else
366 typedef struct _XDisplay Display;
367 typedef struct __GLXcontextRec *GLXContext;
368 
369 
370 typedef GLXContext (*t_glXGetCurrentContext) (void);
371 typedef Display * (*t_glXGetCurrentDisplay) (void);
372 
373 t_glXGetCurrentContext gegl_glXGetCurrentContext;
374 t_glXGetCurrentDisplay gegl_glXGetCurrentDisplay;
375 #endif
376 
377 static gboolean
gegl_cl_init_get_gl_sharing_props(cl_context_properties gl_contex_props[64],GError ** error)378 gegl_cl_init_get_gl_sharing_props (cl_context_properties   gl_contex_props[64],
379                                    GError                **error)
380 {
381   static gboolean gl_loaded = FALSE;
382 
383   #if defined(__APPLE__)
384   CGLContextObj kCGLContext;
385   CGLShareGroupObj kCGLShareGroup;
386 
387   if (!gl_loaded)
388     {
389       GModule *module = g_module_open (GL_LIBRARY_NAME, G_MODULE_BIND_LAZY);
390 
391       if (!g_module_symbol (module, "CGLGetCurrentContext", (gpointer *)&gegl_CGLGetCurrentContext))
392         printf ("Failed to load CGLGetCurrentContext");
393       if (!g_module_symbol (module, "CGLGetShareGroup", (gpointer *)&gegl_CGLGetShareGroup))
394         printf ("Failed to load CGLGetShareGroup");
395 
396       gl_loaded = TRUE;
397     }
398 
399   kCGLContext = gegl_CGLGetCurrentContext ();
400   kCGLShareGroup = gegl_CGLGetShareGroup (kCGLContext);
401 
402   gl_contex_props[0] = CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE;
403   gl_contex_props[1] = (cl_context_properties)kCGLShareGroup;
404   gl_contex_props[2] = 0;
405   return TRUE;
406 
407   #elif defined(G_OS_WIN32)
408 
409   GEGL_NOTE (GEGL_DEBUG_OPENCL, "GL sharing not supported on WIN32");
410   g_set_error (error, GEGL_OPENCL_ERROR, 0, "GL sharing not supported on WIN32");
411 
412   return FALSE;
413 
414   #else /* Some kind of unix */
415   GLXContext  context;
416   Display    *display;
417 
418   if (!gl_loaded)
419     {
420       GModule *module = g_module_open (GL_LIBRARY_NAME, G_MODULE_BIND_LAZY);
421 
422       if (!g_module_symbol (module, "glXGetCurrentContext", (gpointer *)&gegl_glXGetCurrentContext))
423         printf ("Failed to load glXGetCurrentContext");
424       if (!g_module_symbol (module, "glXGetCurrentDisplay", (gpointer *)&gegl_glXGetCurrentDisplay))
425         printf ("Failed to load glXGetCurrentDisplay");
426 
427       gl_loaded = TRUE;
428     }
429 
430   context = gegl_glXGetCurrentContext();
431   display = gegl_glXGetCurrentDisplay();
432   if (!context || !display)
433     {
434       GEGL_NOTE (GEGL_DEBUG_OPENCL, "Could not get a valid OpenGL context");
435       g_set_error (error, GEGL_OPENCL_ERROR, 0, "Could not get a valid OpenGL context");
436       return FALSE;
437     }
438 
439   gl_contex_props[0] = CL_GL_CONTEXT_KHR;
440   gl_contex_props[1] = (cl_context_properties)context;
441   gl_contex_props[2] = CL_GLX_DISPLAY_KHR;
442   gl_contex_props[3] = (cl_context_properties)display;
443   gl_contex_props[4] = 0;
444   return TRUE;
445 
446   #endif
447 }
448 
449 static gboolean
450 gegl_cl_init_common (cl_device_type          requested_device_type,
451                      gboolean                gl_sharing,
452                      GError                **error);
453 
454 gboolean
gegl_cl_init_with_opengl(GError ** error)455 gegl_cl_init_with_opengl  (GError **error)
456 {
457   return gegl_cl_init_common (gegl_cl_default_device_type, TRUE, error);
458 }
459 
460 gboolean
gegl_cl_init(GError ** error)461 gegl_cl_init (GError **error)
462 {
463   return gegl_cl_init_common (gegl_cl_default_device_type, FALSE, error);
464 }
465 
466 static gboolean
gegl_cl_init_load_functions(GError ** error)467 gegl_cl_init_load_functions (GError **error)
468 {
469 #ifdef G_OS_WIN32
470   HINSTANCE module = LoadLibrary ("OpenCL.dll");
471 #else
472   GModule *module = g_module_open (CL_LIBRARY_NAME, G_MODULE_BIND_LAZY);
473 #endif
474 
475   if (!module)
476     {
477       GEGL_NOTE (GEGL_DEBUG_OPENCL, "Unable to load OpenCL library");
478       g_set_error (error, GEGL_OPENCL_ERROR, 0, "Unable to load OpenCL library");
479       return FALSE;
480     }
481 
482   CL_LOAD_FUNCTION (clGetPlatformIDs)
483   CL_LOAD_FUNCTION (clGetPlatformInfo)
484   CL_LOAD_FUNCTION (clGetDeviceIDs)
485   CL_LOAD_FUNCTION (clGetDeviceInfo)
486 
487   CL_LOAD_FUNCTION (clCreateContext)
488   CL_LOAD_FUNCTION (clCreateContextFromType)
489   CL_LOAD_FUNCTION (clCreateCommandQueue)
490   CL_LOAD_FUNCTION (clCreateProgramWithSource)
491   CL_LOAD_FUNCTION (clBuildProgram)
492   CL_LOAD_FUNCTION (clGetProgramBuildInfo)
493 
494   CL_LOAD_FUNCTION (clCreateKernel)
495   CL_LOAD_FUNCTION (clSetKernelArg)
496   CL_LOAD_FUNCTION (clGetKernelWorkGroupInfo)
497   CL_LOAD_FUNCTION (clCreateBuffer)
498   CL_LOAD_FUNCTION (clEnqueueWriteBuffer)
499   CL_LOAD_FUNCTION (clEnqueueReadBuffer)
500   CL_LOAD_FUNCTION (clEnqueueCopyBuffer)
501   CL_LOAD_FUNCTION (clEnqueueReadBufferRect)
502   CL_LOAD_FUNCTION (clEnqueueWriteBufferRect)
503   CL_LOAD_FUNCTION (clEnqueueCopyBufferRect)
504   CL_LOAD_FUNCTION (clCreateImage2D)
505   CL_LOAD_FUNCTION (clCreateImage3D)
506   CL_LOAD_FUNCTION (clEnqueueReadImage)
507   CL_LOAD_FUNCTION (clEnqueueWriteImage)
508   CL_LOAD_FUNCTION (clEnqueueCopyImage)
509   CL_LOAD_FUNCTION (clEnqueueCopyImageToBuffer)
510   CL_LOAD_FUNCTION (clEnqueueCopyBufferToImage)
511 
512   CL_LOAD_FUNCTION (clEnqueueMapBuffer)
513   CL_LOAD_FUNCTION (clEnqueueMapImage)
514   CL_LOAD_FUNCTION (clEnqueueUnmapMemObject)
515 
516   CL_LOAD_FUNCTION (clEnqueueNDRangeKernel)
517   CL_LOAD_FUNCTION (clEnqueueBarrier)
518   CL_LOAD_FUNCTION (clFinish)
519 
520   CL_LOAD_FUNCTION (clGetEventProfilingInfo)
521 
522   CL_LOAD_FUNCTION (clReleaseKernel)
523   CL_LOAD_FUNCTION (clReleaseProgram)
524   CL_LOAD_FUNCTION (clReleaseCommandQueue)
525   CL_LOAD_FUNCTION (clReleaseContext)
526   CL_LOAD_FUNCTION (clReleaseMemObject)
527 
528   CL_LOAD_FUNCTION (clGetExtensionFunctionAddress);
529 
530   return TRUE;
531 }
532 
533 static gboolean
gegl_cl_gl_init_load_functions(GError ** error)534 gegl_cl_gl_init_load_functions (GError **error)
535 {
536   CL_LOAD_EXTENSION_FUNCTION (clCreateFromGLTexture2D)
537   CL_LOAD_EXTENSION_FUNCTION (clEnqueueAcquireGLObjects)
538   CL_LOAD_EXTENSION_FUNCTION (clEnqueueReleaseGLObjects)
539 
540   return TRUE;
541 }
542 
543 static gboolean
gegl_cl_init_load_device_info(cl_platform_id platform,cl_device_id device,cl_device_type requested_device_type,GError ** error)544 gegl_cl_init_load_device_info (cl_platform_id   platform,
545                                cl_device_id     device,
546                                cl_device_type   requested_device_type,
547                                GError         **error)
548 {
549   cl_int err = CL_SUCCESS;
550 
551   if (device)
552     {
553       /* Get platform from device */
554       err = gegl_clGetDeviceInfo (device, CL_DEVICE_PLATFORM, sizeof (cl_platform_id), &platform, NULL);
555       if (err != CL_SUCCESS)
556         {
557           GEGL_NOTE (GEGL_DEBUG_OPENCL, "Could not create platform");
558           g_set_error (error, GEGL_OPENCL_ERROR, 0, "Could not create platform");
559           return FALSE;
560         }
561     }
562   else
563     {
564       cl_platform_id *platforms = NULL;
565       cl_uint num_platforms = 0;
566 
567       if (!requested_device_type)
568         requested_device_type = CL_DEVICE_TYPE_DEFAULT;
569 
570       err = gegl_clGetPlatformIDs (0, NULL, &num_platforms);
571       if (err != CL_SUCCESS)
572         {
573           GEGL_NOTE (GEGL_DEBUG_OPENCL, "Could not create platform");
574           g_set_error (error, GEGL_OPENCL_ERROR, 0, "Could not create platform");
575           return FALSE;
576         }
577 
578       if (platform)
579         {
580           platforms = g_new (cl_platform_id, 1);
581           num_platforms = 1;
582           platforms[0] = platform;
583         }
584       else
585         {
586           platforms = g_new (cl_platform_id, num_platforms);
587           err = gegl_clGetPlatformIDs (num_platforms, platforms, NULL);
588         }
589 
590       if (err != CL_SUCCESS)
591         {
592           GEGL_NOTE (GEGL_DEBUG_OPENCL, "Could not create platform");
593           g_set_error (error, GEGL_OPENCL_ERROR, 0, "Could not create platform");
594           g_free (platforms);
595           return FALSE;
596         }
597 
598       for (int platform_idx = 0; platform_idx < num_platforms; platform_idx++)
599         {
600           platform = platforms[platform_idx];
601           err = gegl_clGetDeviceIDs (platform, requested_device_type, 1, &device, NULL);
602           if (err == CL_SUCCESS) {
603             cl_bool tmp_image_support = FALSE;
604             err = gegl_clGetDeviceInfo (device, CL_DEVICE_IMAGE_SUPPORT, sizeof(cl_bool), &tmp_image_support, NULL);
605             if (err == CL_SUCCESS && tmp_image_support == FALSE)
606               continue;
607             break;
608           }
609         }
610 
611       g_free (platforms);
612 
613       if (err != CL_SUCCESS)
614         {
615           GEGL_NOTE (GEGL_DEBUG_OPENCL, "Could not create device: %s", gegl_cl_errstring (err));
616           g_set_error (error, GEGL_OPENCL_ERROR, 0, "Could not create device: %s", gegl_cl_errstring (err));
617           return FALSE;
618         }
619     }
620 
621   cl_state.platform = platform;
622   cl_state.device = device;
623 
624   gegl_clGetPlatformInfo (platform, CL_PLATFORM_NAME,       sizeof(cl_state.platform_name),    cl_state.platform_name,    NULL);
625   gegl_clGetPlatformInfo (platform, CL_PLATFORM_VERSION,    sizeof(cl_state.platform_version), cl_state.platform_version, NULL);
626   gegl_clGetPlatformInfo (platform, CL_PLATFORM_EXTENSIONS, sizeof(cl_state.platform_ext),     cl_state.platform_ext,     NULL);
627 
628   gegl_clGetDeviceInfo (device, CL_DEVICE_NAME, sizeof(cl_state.device_name), cl_state.device_name, NULL);
629 
630   gegl_clGetDeviceInfo (device, CL_DEVICE_IMAGE_SUPPORT,      sizeof(cl_bool),  &cl_state.image_support,    NULL);
631   gegl_clGetDeviceInfo (device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &cl_state.max_mem_alloc,    NULL);
632   gegl_clGetDeviceInfo (device, CL_DEVICE_LOCAL_MEM_SIZE,     sizeof(cl_ulong), &cl_state.local_mem_size,   NULL);
633 
634   cl_state.iter_width  = 4096;
635   cl_state.iter_height = 4096;
636 
637   while (cl_state.iter_width * cl_state.iter_height * 16 > cl_state.max_mem_alloc)
638     {
639       if (cl_state.iter_height < cl_state.iter_width)
640         cl_state.iter_width  /= 2;
641       else
642         cl_state.iter_height /= 2;
643     }
644 
645   cl_state.iter_width  /= 2;
646 
647   GEGL_NOTE (GEGL_DEBUG_OPENCL, "Platform Name: %s",       cl_state.platform_name);
648   GEGL_NOTE (GEGL_DEBUG_OPENCL, "Version: %s",             cl_state.platform_version);
649   GEGL_NOTE (GEGL_DEBUG_OPENCL, "Extensions: %s",          cl_state.platform_ext);
650   GEGL_NOTE (GEGL_DEBUG_OPENCL, "Default Device Name: %s", cl_state.device_name);
651   GEGL_NOTE (GEGL_DEBUG_OPENCL, "Max Alloc: %lu bytes",    (unsigned long)cl_state.max_mem_alloc);
652   GEGL_NOTE (GEGL_DEBUG_OPENCL, "Local Mem: %lu bytes",    (unsigned long)cl_state.local_mem_size);
653   GEGL_NOTE (GEGL_DEBUG_OPENCL, "Iteration size: (%lu, %lu)",
654                                 (long unsigned int)cl_state.iter_width,
655                                 (long unsigned int)cl_state.iter_height);
656 
657   return TRUE;
658 }
659 
660 static gboolean
gegl_cl_init_common(cl_device_type requested_device_type,gboolean gl_sharing,GError ** error)661 gegl_cl_init_common (cl_device_type          requested_device_type,
662                      gboolean                gl_sharing,
663                      GError                **error)
664 {
665   cl_int err;
666 
667   if (cl_state.hard_disable)
668     {
669       GEGL_NOTE (GEGL_DEBUG_OPENCL, "OpenCL is disabled");
670       g_set_error (error, GEGL_OPENCL_ERROR, 0, "OpenCL is disabled");
671       return FALSE;
672     }
673 
674   if (!cl_state.is_loaded)
675     {
676       cl_command_queue_properties command_queue_flags = 0;
677       cl_context ctx = NULL;
678 
679       if (!gegl_cl_init_load_functions (error))
680         return FALSE;
681 
682       if (gl_sharing)
683         {
684 #ifdef __APPLE__
685           cl_device_id sharing_device;
686 #endif
687           cl_context_properties gl_contex_props[64];
688 
689           if (!gegl_cl_init_get_gl_sharing_props (gl_contex_props, error))
690             return FALSE;
691 
692 #ifdef __APPLE__
693           /* Create context */
694           ctx = gegl_clCreateContext (gl_contex_props, 0, 0, NULL, 0, &err);
695 
696           if (err != CL_SUCCESS)
697             {
698               GEGL_NOTE (GEGL_DEBUG_OPENCL, "Could not create context: %s", gegl_cl_errstring (err));
699               g_set_error (error, GEGL_OPENCL_ERROR, 0, "Could not create context: %s", gegl_cl_errstring (err));
700               return FALSE;
701             }
702 
703           /* Get device */
704           clGetContextInfo (ctx, CL_CONTEXT_DEVICES, sizeof(cl_device_id), &sharing_device, NULL);
705 
706           if (err != CL_SUCCESS)
707             {
708               clReleaseContext (ctx);
709               GEGL_NOTE (GEGL_DEBUG_OPENCL, "Could not get context's device: %s", gegl_cl_errstring (err));
710               g_set_error (error, GEGL_OPENCL_ERROR, 0, "Could not get context's device: %s", gegl_cl_errstring (err));
711               return FALSE;
712             }
713 
714           if (!gegl_cl_init_load_device_info (NULL, sharing_device, 0, error))
715             {
716               clReleaseContext (ctx);
717               return FALSE;
718             }
719 #else
720           /* Get default GPU device */
721           if (!gegl_cl_init_load_device_info (NULL, NULL, CL_DEVICE_TYPE_GPU, error))
722             return FALSE;
723 
724           if (!gegl_cl_device_has_extension (cl_state.device, "cl_khr_gl_sharing"))
725             {
726               GEGL_NOTE (GEGL_DEBUG_OPENCL, "Device does not support cl_khr_gl_sharing");
727               g_set_error (error, GEGL_OPENCL_ERROR, 0, "Device does not support cl_khr_gl_sharing");
728               return FALSE;
729             }
730 
731           /* Load extension functions */
732           if (!gegl_cl_gl_init_load_functions (error))
733             return FALSE;
734 
735           /* Create context */
736           ctx = gegl_clCreateContext (gl_contex_props, 1, &cl_state.device, NULL, NULL, &err);
737 
738           if (err != CL_SUCCESS)
739             {
740               GEGL_NOTE (GEGL_DEBUG_OPENCL, "Could not create context: %s", gegl_cl_errstring (err));
741               g_set_error (error, GEGL_OPENCL_ERROR, 0, "Could not create context: %s", gegl_cl_errstring (err));
742               return FALSE;
743             }
744 #endif
745         }
746       else
747         {
748           if (!gegl_cl_init_load_device_info (NULL, NULL, requested_device_type, error))
749             return FALSE;
750           ctx = gegl_clCreateContext (NULL, 1, &cl_state.device, NULL, NULL, &err);
751         }
752 
753       if (cl_state.image_support)
754         {
755           GEGL_NOTE (GEGL_DEBUG_OPENCL, "Image Support OK");
756         }
757       else
758         {
759           if (ctx)
760             gegl_clReleaseContext (ctx);
761 
762           GEGL_NOTE (GEGL_DEBUG_OPENCL, "Image Support Error");
763           g_set_error (error, GEGL_OPENCL_ERROR, 0, "Image Support Error");
764           return FALSE;
765         }
766 
767       cl_state.ctx = ctx;
768 
769       command_queue_flags = 0;
770       if (cl_state.enable_profiling)
771         command_queue_flags |= CL_QUEUE_PROFILING_ENABLE;
772 
773       cl_state.cq = gegl_clCreateCommandQueue (cl_state.ctx, cl_state.device, command_queue_flags, &err);
774 
775       if (err != CL_SUCCESS)
776         {
777           GEGL_NOTE (GEGL_DEBUG_OPENCL, "Could not create command queue");
778           g_set_error (error, GEGL_OPENCL_ERROR, 0, "Could not create command queue");
779           return FALSE;
780         }
781 
782       if (gl_sharing)
783         cl_state.have_opengl = TRUE;
784       _gegl_cl_is_accelerated = TRUE;
785       cl_state.is_loaded = TRUE;
786 
787       /* XXX: this dict is being leaked */
788       cl_program_hash = g_hash_table_new (g_str_hash, g_str_equal);
789 
790       gegl_cl_color_compile_kernels ();
791 
792       GEGL_NOTE (GEGL_DEBUG_OPENCL, "OK");
793     }
794 
795   if (cl_state.is_loaded)
796     _gegl_cl_is_accelerated = TRUE;
797 
798   {
799     gegl_buffer_ext_flush = (void*)gegl_buffer_cl_cache_flush;
800     gegl_buffer_ext_invalidate = (void*)gegl_buffer_cl_cache_invalidate;
801     gegl_tile_handler_cache_ext_flush = (void*)gegl_buffer_cl_cache_flush2;
802   }
803 
804   return TRUE;
805 }
806 
807 #undef CL_LOAD_FUNCTION
808 
809 /* XXX: same program_source with different kernel_name[], context or device
810  *      will retrieve the same key
811  */
812 GeglClRunData *
gegl_cl_compile_and_build(const char * program_source,const char * kernel_name[])813 gegl_cl_compile_and_build (const char *program_source, const char *kernel_name[])
814 {
815   gint errcode;
816   GeglClRunData *cl_data = NULL;
817   if (!gegl_cl_is_accelerated ())
818     return NULL;
819 
820   cl_data = (GeglClRunData *)g_hash_table_lookup (cl_program_hash, program_source);
821 
822   if (cl_data == NULL)
823     {
824       const size_t lengths[] = {strlen(random_cl_source), strlen(program_source)};
825       const char *sources[] = {random_cl_source, program_source};
826 
827       gint    i;
828       char   *msg;
829       size_t  s = 0;
830       cl_int  build_errcode;
831       guint   kernel_n = 0;
832 
833       while (kernel_name[++kernel_n] != NULL);
834 
835       cl_data = (GeglClRunData *) g_new (GeglClRunData, 1);
836 
837       cl_data->program = gegl_clCreateProgramWithSource (gegl_cl_get_context (), 2, sources,
838                                                          lengths, &errcode);
839       CL_CHECK_ONLY (errcode);
840 
841       build_errcode = gegl_clBuildProgram (cl_data->program, 0, NULL, NULL, NULL, NULL);
842 
843       errcode = gegl_clGetProgramBuildInfo (cl_data->program,
844                                             gegl_cl_get_device (),
845                                             CL_PROGRAM_BUILD_LOG,
846                                             0, NULL, &s);
847       CL_CHECK_ONLY (errcode);
848 
849       if (s)
850         {
851           msg = g_malloc (s);
852           errcode = gegl_clGetProgramBuildInfo (cl_data->program,
853                                                 gegl_cl_get_device (),
854                                                 CL_PROGRAM_BUILD_LOG,
855                                                 s, msg, NULL);
856           CL_CHECK_ONLY (errcode);
857         }
858       else
859         {
860           msg = strdup ("");
861         }
862 
863       if (build_errcode != CL_SUCCESS)
864         {
865           GEGL_NOTE (GEGL_DEBUG_OPENCL, "Build Error: %s\n%s",
866                                         gegl_cl_errstring (build_errcode),
867                                         msg);
868           g_warning ("%s\n%s\n", gegl_cl_errstring (build_errcode), msg);
869           g_free (msg);
870           return NULL;
871         }
872       else
873         {
874           g_strchug (msg);
875           if (strlen (msg))
876             GEGL_NOTE (GEGL_DEBUG_OPENCL, "Compiling successful\n%s", msg);
877           else
878             GEGL_NOTE (GEGL_DEBUG_OPENCL, "Compiling successful");
879           g_free (msg);
880         }
881 
882       cl_data->kernel = g_new (cl_kernel, kernel_n);
883       cl_data->work_group_size = g_new (size_t, kernel_n);
884 
885       for (i = 0; i < kernel_n; i++)
886         {
887           cl_data->kernel[i] = gegl_clCreateKernel (cl_data->program,
888                                                     kernel_name[i],
889                                                     &errcode);
890           CL_CHECK_ONLY (errcode);
891 
892           errcode = gegl_clGetKernelWorkGroupInfo (cl_data->kernel[i],
893                                                    gegl_cl_get_device (),
894                                                    CL_KERNEL_WORK_GROUP_SIZE,
895                                                    sizeof(size_t),
896                                                    &cl_data->work_group_size[i],
897                                                    NULL);
898           CL_CHECK_ONLY (errcode);
899         }
900 
901       g_hash_table_insert (cl_program_hash, g_strdup (program_source), (void*)cl_data);
902     }
903 
904   return cl_data;
905 }
906 
907 void
gegl_cl_cleanup(void)908 gegl_cl_cleanup (void)
909 {
910   cl_int err;
911   err = gegl_cl_random_cleanup ();
912   if (err != CL_SUCCESS)
913     GEGL_NOTE (GEGL_DEBUG_OPENCL, "Could not free cl_random_data: %s", gegl_cl_errstring (err));
914 }
915