1 /****** BEGIN COPYRIGHT *******************************************************
2  *
3  * Copyright (C) 2007 - 2012, Rogvall Invest AB, <tony@rogvall.se>
4  *
5  * This software is licensed as described in the file COPYRIGHT, which
6  * you should have received as part of this distribution. The terms
7  * are also available at http://www.rogvall.se/docs/copyright.txt.
8  *
9  * You may opt to use, copy, modify, merge, publish, distribute and/or sell
10  * copies of the Software, and permit persons to whom the Software is
11  * furnished to do so, under the terms of the COPYRIGHT file.
12  *
13  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
14  * KIND, either express or implied.
15  *
16  ****** END COPYRIGHT ********************************************************/
17 //
18 // NIF interface for OpenCL binding
19 //
20 
21 #include <stdio.h>
22 
23 #ifndef WIN32
24 #include <stdbool.h>
25 #include <stdlib.h>
26 #include <stdarg.h>
27 #include <stdint.h>
28 #include <string.h>
29 #include <errno.h>
30 #include <dlfcn.h>
31 #else
32 #include <windows.h>
33 #endif
34 
35 #define CL_USE_DEPRECATED_OPENCL_1_1_APIS 1
36 
37 #ifdef DARWIN
38 #include <OpenCL/opencl.h>
39 #else
40 #include <CL/cl.h>
41 #include <CL/cl_ext.h>
42 #endif
43 
44 // Old cl_platform doesn't have the CL_CALLBACK
45 #ifndef CL_CALLBACK
46 #define CL_CALLBACK
47 #endif
48 
49 #ifdef WIN32
50 typedef cl_bool bool;
51 #define true 1
52 #define false 0
53 #endif
54 
55 
56 #ifdef WIN_X64
57 #define ecl_get_sizet(a1,a2,a3) enif_get_uint64(a1,a2,a3)
58 #define ecl_make_sizet(a1,a2) enif_make_uint64(a1,a2)
59 #else
60 #define ecl_get_sizet(a1,a2,a3) enif_get_ulong(a1,a2,(unsigned long*)a3)
61 #define ecl_make_sizet(a1,a2) enif_make_ulong(a1,a2)
62 #endif
63 
64 #define UNUSED(a) ((void) a)
65 
66 #include "erl_nif.h"
67 #include "cl_hash.h"
68 
69 #define sizeof_array(a) (sizeof(a) / sizeof(a[0]))
70 
71 // #define DEBUG
72 
73 #ifdef DEBUG
74 #include <stdarg.h>
75 static void ecl_emit_error(char* file, int line, ...);
76 #define DBG(...) ecl_emit_error(__FILE__,__LINE__,__VA_ARGS__)
77 #else
78 #define DBG(...)
79 #endif
80 
81 #define CL_ERROR(...) ecl_emit_error(__FILE__,__LINE__,__VA_ARGS__)
82 
83 // soft limits
84 #define MAX_INFO_SIZE   1024
85 #define MAX_DEVICES     128
86 #define MAX_PLATFORMS   128
87 #define MAX_OPTION_LIST 1024
88 #define MAX_KERNEL_NAME 1024
89 #define MAX_KERNELS     1024
90 #define MAX_SOURCES     128
91 #define MAX_WAIT_LIST   128
92 #define MAX_WORK_SIZE   3
93 #define MAX_IMAGE_FORMATS 128
94 #define MAX_MEM_OBJECTS 128
95 
96 // Atom macros
97 #define ATOM(name) atm_##name
98 
99 #define DECL_ATOM(name) \
100     ERL_NIF_TERM atm_##name = 0
101 
102 // require env in context (ugly)
103 #define LOAD_ATOM(name)			\
104     atm_##name = enif_make_atom(env,#name)
105 
106 #define LOAD_ATOM_STRING(name,string)			\
107     atm_##name = enif_make_atom(env,string)
108 
109 // Wrapper to handle reource atom name etc.
110 typedef struct {
111     char* name;
112     ERL_NIF_TERM type;         // resource atom name
113     ErlNifResourceType* res;   // the resource type
114     size_t              size;  // "real" object size
115 } ecl_resource_t;
116 
117 struct _ecl_object_t;
118 
119 typedef struct _ecl_platform_t {
120     struct _ecl_object_t* o_platform;
121     cl_uint ndevices;
122     struct _ecl_object_t** o_device;
123 } ecl_platform_t;
124 
125 struct _ecl_env_t;
126 
127 typedef struct _ecl_object_t {
128     lhash_bucket_t        hbucket;   // inheritance: map: cl->ecl
129     struct _ecl_env_t*    env;
130     cl_int                version;
131     struct _ecl_object_t* parent;     // parent resource object
132     union {
133 	cl_platform_id   platform;
134 	cl_device_id     device;
135 	cl_context       context;
136 	cl_command_queue queue;
137 	cl_mem           mem;
138 	cl_sampler       sampler;
139 	cl_program       program;
140 	cl_kernel        kernel;
141 	cl_event         event;
142 	void*            opaque;
143     };
144 } ecl_object_t;
145 
146 // "inherits" ecl_object_t and add special binary objects (read/write)
147 typedef struct _ecl_event_t {
148     ecl_object_t obj;       // FIXED place for inhertiance
149     bool          rd;       // Read binary operation
150     bool          rl;       // Do not release if true
151     ErlNifEnv*    bin_env;  // environment to hold binary term data
152     ErlNifBinary* bin;      // read/write data
153 } ecl_event_t;
154 
155 #define KERNEL_ARG_OTHER   0
156 #define KERNEL_ARG_MEM     1
157 #define KERNEL_ARG_SAMPLER 2
158 
159 // This is a special construct inorder to kee
160 typedef struct {
161     int type;    // 0=other, 1=mem, 2=samper
162     union {
163 	cl_mem      mem;
164 	cl_sampler  sampler;
165 	void*       other;
166 	void*       value;
167     };
168 } ecl_kernel_arg_t;
169 
170 // "inherits" ecl_object_t and reference count kernel args
171 typedef struct _ecl_kernel_t {
172     ecl_object_t      obj;       // FIXED place for inhertiance
173     cl_uint           num_args;  // number of arguments used by the kernel
174     ecl_kernel_arg_t* arg;       // array of current args
175 } ecl_kernel_t;
176 
177 
178 typedef enum {
179     OCL_CHAR,          // cl_char
180     OCL_UCHAR,         // cl_uchar
181     OCL_SHORT,         // cl_short
182     OCL_USHORT,        // cl_ushort
183     OCL_INT,           // cl_int
184     OCL_UINT,          // cl_uint
185     OCL_LONG,          // cl_long
186     OCL_ULONG,         // cl_ulong
187     OCL_HALF,          // cl_half
188     OCL_FLOAT,         // cl_float
189     OCL_DOUBLE,        // cl_double
190     OCL_BOOL,          // cl_bool
191     OCL_STRING,        // cl_char*
192     OCL_BITFIELD,      // cl_ulong
193     OCL_ENUM,          // cl_int
194     OCL_POINTER,       // void*
195     OCL_SIZE,          // size_t
196     OCL_PLATFORM,      // void*
197     OCL_DEVICE,        // void*
198     OCL_CONTEXT,       // void*
199     OCL_PROGRAM,       // void*
200     OCL_COMMAND_QUEUE, // void*
201     OCL_IMAGE_FORMAT,   // cl_image_format
202 #if CL_VERSION_1_2 == 1
203     OCL_DEVICE_PARTITION, // cl_device_partition_property
204 #endif
205     OCL_NUM_TYPES
206 } ocl_type_t;
207 
208 #define OCL_DEVICE_TYPE                  OCL_BITFIELD
209 #define OCL_DEVICE_FP_CONFIG             OCL_BITFIELD
210 #define OCL_DEVICE_GLOBAL_MEM_CACHE_TYPE OCL_ENUM
211 #define OCL_PLATFORM_INFO                OCL_UINT
212 #define OCL_DEVICE_INFO                  OCL_UINT
213 #define OCL_DEVICE_EXEC_CAPABILITIES     OCL_BITFIELD
214 #define OCL_QUEUE_PROPERTIES             OCL_BITFIELD
215 #define OCL_DEVICE_LOCAL_MEM_TYPE        OCL_ENUM
216 #define OCL_MEM_OBJECT_TYPE              OCL_ENUM
217 #define OCL_MEM_FLAGS                    OCL_BITFIELD
218 #define OCL_SAMPLER_ADDRESSING_MODE      OCL_ENUM
219 #define OCL_SAMPLER_FILTER_MODE          OCL_ENUM
220 #define OCL_BUILD_STATUS                 OCL_ENUM
221 #define OCL_DEVICE_DOUBLE_FP_CONFIG      OCL_BITFIELD
222 #define OCL_PROGRAM_BINARY_TYPE          OCL_ENUM
223 
224 typedef struct {
225     ERL_NIF_TERM*  key;
226     ErlNifUInt64   value;
227 } ecl_kv_t;
228 
229 typedef struct {
230     ERL_NIF_TERM*  info_key;    // Atom
231     cl_uint        info_id;     // Information
232     bool           is_array;    // return type is a vector of data
233     ocl_type_t     info_type;   // info data type
234     void*          extern_info; // Encode/Decode enum/bitfields
235     size_t         def_size;    // Def size in bytes (if == 0 query driver)
236 } ecl_info_t;
237 
238 typedef enum {
239     ECL_MESSAGE_STOP,           // time to die
240     ECL_MESSAGE_UPGRADE,        // time to upgrade
241     ECL_MESSAGE_SYNC,           // synk
242     ECL_MESSAGE_SYNC_ACK,       // synk return message
243     ECL_MESSAGE_FLUSH,          // call clFlush
244     ECL_MESSAGE_FINISH,         // call clFinish
245     ECL_MESSAGE_WAIT_FOR_EVENT  // call clWaitForEvents (only one event!)
246 } ecl_message_type_t;
247 
248 struct _ecl_thread_t;
249 
250 typedef struct ecl_message_t
251 {
252     ecl_message_type_t type;
253     ErlNifPid        sender;  // sender pid
254     ErlNifEnv*          env;  // message environment (ref, bin's etc)
255     ERL_NIF_TERM        ref;  // ref (in env!)
256     union {
257 	ecl_object_t* queue;  // ECL_MESSAGE_FLUSH/ECL_MESSAGE_FINISH
258 	ecl_event_t* event;   // ECL_MESSAGE_WAIT_FOR_EVENT
259 	void* (*upgrade)(void*); // ECL_MESSAGE_UPGRADE
260     };
261 } ecl_message_t;
262 
263 typedef struct _ecl_qlink_t {
264     struct _ecl_qlink_t* next;
265     ecl_message_t mesg;
266 } ecl_qlink_t;
267 
268 #define MAX_QLINK  8  // pre-allocated qlinks
269 
270 typedef struct {
271     ErlNifMutex*   mtx;
272     ErlNifCond*    cv;
273     int len;
274     ecl_qlink_t*   front;   // pick from front
275     ecl_qlink_t*   rear;    // insert at rear
276     ecl_qlink_t*   free;    // free list in ql
277     ecl_qlink_t  ql[MAX_QLINK];  // "pre" allocated qlinks
278 } ecl_queue_t;
279 
280 typedef struct _ecl_thread_t {
281     ErlNifTid   tid;     // thread id
282     ecl_queue_t q;       // message queue
283     void*       arg;     // thread init argument
284 } ecl_thread_t;
285 
286 // "inherits" ecl_object_t and add keep track of the context thread
287 typedef struct _ecl_context_t {
288     ecl_object_t obj;             // FIXED place for inhertiance
289     struct _ecl_context_t* next;  // next context in list
290     ecl_thread_t* thr;            // The context thread
291     int upgrade_count;            // upgrade tick
292 } ecl_context_t;
293 
294 typedef struct _ecl_env_t {
295     int         ref_count;  // ref count the load/upgrade/unload
296     lhash_t     ref;        // cl -> ecl
297     ErlNifRWLock* ref_lock; // lhash operation lock
298     ecl_queue_t q;          // sync queue
299     cl_uint nplatforms;
300     ecl_platform_t* platform;
301     ErlNifRWLock* context_list_lock;
302     ecl_context_t*  context_list;
303     cl_int icd_version;
304 } ecl_env_t;
305 
306 typedef struct _ecl_func_t {
307     char* name;
308     void* func;
309     int   version;  // 10,11,12,20...
310 } ecl_func_t;
311 
312 //
313 // ECL_FUNC function list is used multiple times by
314 // updating the meaning of ECL_FUNC
315 //
316 #define ECL_FUNC_LIST		     \
317     ECL_FUNC(clGetPlatformIDs,10),		\
318 	ECL_FUNC(clGetPlatformInfo,10),	\
319 	ECL_FUNC(clGetDeviceIDs,10),		\
320 	ECL_FUNC(clGetDeviceInfo,10),		\
321 	ECL_FUNC(clCreateSubDevices,12),	\
322 	ECL_FUNC(clRetainDevice,12),		\
323 	ECL_FUNC(clReleaseDevice,12),		\
324 	ECL_FUNC(clCreateContext,10),		\
325 	ECL_FUNC(clCreateContextFromType,10),	\
326 	ECL_FUNC(clRetainContext,10),		\
327 	ECL_FUNC(clReleaseContext,10),		\
328 	ECL_FUNC(clGetContextInfo,10),		\
329 	ECL_FUNC(clCreateCommandQueue,10),	\
330 	ECL_FUNC(clRetainCommandQueue,10),	\
331 	ECL_FUNC(clReleaseCommandQueue,10),	\
332 	ECL_FUNC(clGetCommandQueueInfo,10),	\
333 	ECL_FUNC(clCreateBuffer,10),		\
334 	ECL_FUNC(clCreateSubBuffer,11),	\
335 	ECL_FUNC(clCreateImage,12),		\
336 	ECL_FUNC(clCreatePipe,20),		\
337 	ECL_FUNC(clRetainMemObject,10),	\
338 	ECL_FUNC(clReleaseMemObject,10),     \
339 	ECL_FUNC(clGetSupportedImageFormats,10),	\
340 	ECL_FUNC(clGetMemObjectInfo,10),		\
341 	ECL_FUNC(clGetImageInfo,10),			\
342 	ECL_FUNC(clGetPipeInfo,20),			\
343 	ECL_FUNC(clSetMemObjectDestructorCallback,11),	\
344 	ECL_FUNC(clSVMAlloc,20),		   \
345 	ECL_FUNC(clSVMFree,20),			   \
346 	ECL_FUNC(clCreateSampler,10),		   \
347 	ECL_FUNC(clCreateSamplerWithProperties,20),	\
348 	ECL_FUNC(clRetainSampler,10),			\
349 	ECL_FUNC(clReleaseSampler,10),			\
350 	ECL_FUNC(clGetSamplerInfo,10),			\
351 	ECL_FUNC(clCreateProgramWithSource,10),		\
352 	ECL_FUNC(clCreateProgramWithBinary,10),		\
353 	ECL_FUNC(clCreateProgramWithBuiltInKernels,12), \
354 	ECL_FUNC(clRetainProgram,10),			\
355 	ECL_FUNC(clReleaseProgram,10),			\
356 	ECL_FUNC(clBuildProgram,10),			\
357 	ECL_FUNC(clCompileProgram,12),			\
358 	ECL_FUNC(clLinkProgram,12),			\
359 	ECL_FUNC(clUnloadPlatformCompiler,12),		\
360 	ECL_FUNC(clGetProgramInfo,10),			\
361 	ECL_FUNC(clGetProgramBuildInfo,10),		\
362 	ECL_FUNC(clCreateKernel,10),			\
363 	ECL_FUNC(clCreateKernelsInProgram,10),		\
364 	ECL_FUNC(clSetKernelArg,10),			\
365 	ECL_FUNC(clSetKernelArgSVMPointer,20),		\
366 	ECL_FUNC(clSetKernelExecInfo,20),		\
367 	ECL_FUNC(clRetainKernel,10),			\
368 	ECL_FUNC(clReleaseKernel,10),			\
369 	ECL_FUNC(clGetKernelInfo,10),			\
370 	ECL_FUNC(clGetKernelArgInfo,12),		\
371 	ECL_FUNC(clGetKernelWorkGroupInfo,10),		\
372 	ECL_FUNC(clWaitForEvents,10),			\
373 	ECL_FUNC(clGetEventInfo,10),			\
374 	ECL_FUNC(clCreateUserEvent,11),			\
375 	ECL_FUNC(clRetainEvent,10),			\
376 	ECL_FUNC(clReleaseEvent,10),			\
377 	ECL_FUNC(clSetUserEventStatus,11),		\
378 	ECL_FUNC(clSetEventCallback,11),		\
379 	ECL_FUNC(clGetEventProfilingInfo,10),		\
380 	ECL_FUNC(clFlush,10),				\
381 	ECL_FUNC(clFinish,10),				\
382 	ECL_FUNC(clEnqueueReadBuffer,10),		\
383 	ECL_FUNC(clEnqueueReadBufferRect,11),		\
384 	ECL_FUNC(clEnqueueWriteBuffer,10),		\
385 	ECL_FUNC(clEnqueueWriteBufferRect,11),		\
386 	ECL_FUNC(clEnqueueFillBuffer,12),		\
387 	ECL_FUNC(clEnqueueCopyBuffer,10),		\
388 	ECL_FUNC(clEnqueueCopyBufferRect,11),		\
389 	ECL_FUNC(clEnqueueReadImage,10),		\
390 	ECL_FUNC(clEnqueueWriteImage,10),		\
391 	ECL_FUNC(clEnqueueFillImage,12),		\
392 	ECL_FUNC(clEnqueueCopyImage,10),		\
393 	ECL_FUNC(clEnqueueCopyImageToBuffer,10),	\
394 	ECL_FUNC(clEnqueueCopyBufferToImage,10),	\
395 	ECL_FUNC(clEnqueueMapBuffer,10),		\
396 	ECL_FUNC(clEnqueueMapImage,10),			\
397 	ECL_FUNC(clEnqueueUnmapMemObject,10),		\
398 	ECL_FUNC(clEnqueueMigrateMemObjects,12),	\
399 	ECL_FUNC(clEnqueueNDRangeKernel,10),		\
400 	ECL_FUNC(clEnqueueTask,10),			\
401 	ECL_FUNC(clEnqueueNativeKernel,10),		\
402 	ECL_FUNC(clEnqueueMarkerWithWaitList,12),	\
403 	ECL_FUNC(clEnqueueBarrierWithWaitList,12),	\
404 	ECL_FUNC(clEnqueueSVMFree,20),			\
405 	ECL_FUNC(clEnqueueSVMMemcpy,20),		\
406 	ECL_FUNC(clEnqueueSVMMemFill,20),		\
407 	ECL_FUNC(clEnqueueSVMMap,20),				\
408 	ECL_FUNC(clEnqueueSVMUnmap,20),				\
409 	ECL_FUNC(clGetExtensionFunctionAddressForPlatform,12),	\
410 	ECL_FUNC(clCreateImage2D,10),				\
411 	ECL_FUNC(clCreateImage3D,10),				\
412 	ECL_FUNC(clEnqueueMarker,10),				\
413 	ECL_FUNC(clEnqueueWaitForEvents,10),			\
414 	ECL_FUNC(clEnqueueBarrier,10),				\
415 	ECL_FUNC(clUnloadCompiler,10),				\
416 	ECL_FUNC(clGetExtensionFunctionAddress,10)
417 
418 #include "ecl_types.h"
419 
420 #undef  ECL_FUNC
421 #define ECL_FUNC(nm,vsn) i_##nm
422 typedef enum {
423     ECL_FUNC_LIST
424 } ecl_func_index_t;
425 
426 #undef  ECL_FUNC
427 #define ECL_FUNC(nm,vsn)			\
428     [i_##nm] = { .name = #nm, .func = NULL, .version = (vsn) }
429 
430 ecl_func_t ecl_function[] = {
431     ECL_FUNC_LIST,
432     { NULL, NULL, 0 }
433 };
434 
435 #define ECL_FUNC_PTR(nm) ecl_function[i_##nm].func
436 #define ECL_FUNC_VERSION(nm) ecl_function[i_##nm].version
437 #define ECL_CALL(nm) ((t_##nm)(ecl_function[i_##nm].func))
438 
439 static void* ecl_context_main(void* arg);
440 
441 static int ecl_load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info);
442 
443 static int ecl_upgrade(ErlNifEnv* env, void** priv_data, void** old_priv_data,
444 			 ERL_NIF_TERM load_info);
445 
446 static void ecl_unload(ErlNifEnv* env, void* priv_data);
447 
448 static int ecl_load_dynfunctions(ecl_env_t* ecl);
449 
450 static ERL_NIF_TERM ecl_versions(ErlNifEnv* env, int argc,
451 				 const ERL_NIF_TERM argv[]);
452 
453 static ERL_NIF_TERM ecl_noop(ErlNifEnv* env, int argc,
454 			    const ERL_NIF_TERM argv[]);
455 
456 static ERL_NIF_TERM ecl_get_platform_ids(ErlNifEnv* env, int argc,
457 					 const ERL_NIF_TERM argv[]);
458 
459 static ERL_NIF_TERM ecl_get_platform_info(ErlNifEnv* env, int argc,
460 					  const ERL_NIF_TERM argv[]);
461 
462 static ERL_NIF_TERM ecl_get_device_ids(ErlNifEnv* env, int argc,
463 				       const ERL_NIF_TERM argv[]);
464 
465 #if CL_VERSION_1_2 == 1
466 static ERL_NIF_TERM ecl_create_sub_devices(ErlNifEnv* env, int argc,
467 					   const ERL_NIF_TERM argv[]);
468 #endif
469 
470 static ERL_NIF_TERM ecl_get_device_info(ErlNifEnv* env, int argc,
471 					const ERL_NIF_TERM argv[]);
472 
473 static ERL_NIF_TERM ecl_create_context(ErlNifEnv* env, int argc,
474 				       const ERL_NIF_TERM argv[]);
475 
476 static ERL_NIF_TERM ecl_get_context_info(ErlNifEnv* env, int argc,
477 					 const ERL_NIF_TERM argv[]);
478 
479 static ERL_NIF_TERM ecl_create_queue(ErlNifEnv* env, int argc,
480 				     const ERL_NIF_TERM argv[]);
481 
482 static ERL_NIF_TERM ecl_get_queue_info(ErlNifEnv* env, int argc,
483 				       const ERL_NIF_TERM argv[]);
484 
485 static ERL_NIF_TERM ecl_create_buffer(ErlNifEnv* env, int argc,
486 				      const ERL_NIF_TERM argv[]);
487 
488 #if CL_VERSION_1_1 == 1
489 static ERL_NIF_TERM ecl_create_sub_buffer(ErlNifEnv* env, int argc,
490 					  const ERL_NIF_TERM argv[]);
491 #endif
492 
493 static ERL_NIF_TERM ecl_create_image2d(ErlNifEnv* env, int argc,
494 				       const ERL_NIF_TERM argv[]);
495 
496 static ERL_NIF_TERM ecl_create_image3d(ErlNifEnv* env, int argc,
497 				       const ERL_NIF_TERM argv[]);
498 
499 #if CL_VERSION_1_2 == 1
500 static ERL_NIF_TERM ecl_create_image(ErlNifEnv* env, int argc,
501 				     const ERL_NIF_TERM argv[]);
502 #endif
503 
504 static ERL_NIF_TERM ecl_get_supported_image_formats(ErlNifEnv* env, int argc,
505 						    const ERL_NIF_TERM argv[]);
506 
507 static ERL_NIF_TERM ecl_get_mem_object_info(ErlNifEnv* env, int argc,
508 					    const ERL_NIF_TERM argv[]);
509 
510 static ERL_NIF_TERM ecl_get_image_info(ErlNifEnv* env, int argc,
511 				       const ERL_NIF_TERM argv[]);
512 
513 static ERL_NIF_TERM ecl_create_sampler(ErlNifEnv* env, int argc,
514 				       const ERL_NIF_TERM argv[]);
515 
516 static ERL_NIF_TERM ecl_get_sampler_info(ErlNifEnv* env, int argc,
517 					 const ERL_NIF_TERM argv[]);
518 
519 static ERL_NIF_TERM ecl_create_program_with_source(ErlNifEnv* env, int argc,
520 						   const ERL_NIF_TERM argv[]);
521 static ERL_NIF_TERM ecl_create_program_with_binary(ErlNifEnv* env, int argc,
522 						   const ERL_NIF_TERM argv[]);
523 #if CL_VERSION_1_2 == 1
524 static ERL_NIF_TERM ecl_create_program_with_builtin_kernels(
525     ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]);
526 #endif
527 static ERL_NIF_TERM ecl_async_build_program(ErlNifEnv* env, int argc,
528 					    const ERL_NIF_TERM argv[]);
529 
530 #if CL_VERSION_1_2 == 1
531 static ERL_NIF_TERM ecl_unload_platform_compiler(ErlNifEnv* env, int argc,
532 						 const ERL_NIF_TERM argv[]);
533 static ERL_NIF_TERM ecl_async_compile_program(ErlNifEnv* env, int argc,
534 					      const ERL_NIF_TERM argv[]);
535 static ERL_NIF_TERM ecl_async_link_program(ErlNifEnv* env, int argc,
536 					   const ERL_NIF_TERM argv[]);
537 #endif
538 
539 static ERL_NIF_TERM ecl_unload_compiler(ErlNifEnv* env, int argc,
540 					const ERL_NIF_TERM argv[]);
541 static ERL_NIF_TERM ecl_get_program_info(ErlNifEnv* env, int argc,
542 					 const ERL_NIF_TERM argv[]);
543 static ERL_NIF_TERM ecl_get_program_build_info(ErlNifEnv* env, int argc,
544 					       const ERL_NIF_TERM argv[]);
545 
546 static ERL_NIF_TERM ecl_create_kernel(ErlNifEnv* env, int argc,
547 				      const ERL_NIF_TERM argv[]);
548 static ERL_NIF_TERM ecl_create_kernels_in_program(ErlNifEnv* env, int argc,
549 						  const ERL_NIF_TERM argv[]);
550 static ERL_NIF_TERM ecl_set_kernel_arg(ErlNifEnv* env, int argc,
551 				       const ERL_NIF_TERM argv[]);
552 static ERL_NIF_TERM ecl_set_kernel_arg_size(ErlNifEnv* env, int argc,
553 					    const ERL_NIF_TERM argv[]);
554 static ERL_NIF_TERM ecl_get_kernel_info(ErlNifEnv* env, int argc,
555 					const ERL_NIF_TERM argv[]);
556 static ERL_NIF_TERM ecl_get_kernel_workgroup_info(ErlNifEnv* env, int argc,
557 						  const ERL_NIF_TERM argv[]);
558 #if CL_VERSION_1_2 == 1
559 static ERL_NIF_TERM ecl_get_kernel_arg_info(ErlNifEnv* env, int argc,
560 					    const ERL_NIF_TERM argv[]);
561 #endif
562 
563 static ERL_NIF_TERM ecl_enqueue_task(ErlNifEnv* env, int argc,
564 				     const ERL_NIF_TERM argv[]);
565 static ERL_NIF_TERM ecl_enqueue_nd_range_kernel(ErlNifEnv* env, int argc,
566 						const ERL_NIF_TERM argv[]);
567 static ERL_NIF_TERM ecl_enqueue_marker(ErlNifEnv* env, int argc,
568 				       const ERL_NIF_TERM argv[]);
569 
570 static ERL_NIF_TERM ecl_enqueue_barrier(ErlNifEnv* env, int argc,
571 					const ERL_NIF_TERM argv[]);
572 
573 #if CL_VERSION_1_2 == 1
574 
575 static ERL_NIF_TERM ecl_enqueue_marker_with_wait_list(ErlNifEnv* env,
576 						      int argc,
577 						      const ERL_NIF_TERM argv[]);
578 static ERL_NIF_TERM ecl_enqueue_barrier_with_wait_list(ErlNifEnv* env,
579 						       int argc,
580 						       const ERL_NIF_TERM argv[]);
581 #endif
582 
583 static ERL_NIF_TERM ecl_enqueue_wait_for_events(ErlNifEnv* env, int argc,
584 						const ERL_NIF_TERM argv[]);
585 
586 static ERL_NIF_TERM ecl_enqueue_read_buffer(ErlNifEnv* env, int argc,
587 					    const ERL_NIF_TERM argv[]);
588 #if CL_VERSION_1_1 == 1
589 static ERL_NIF_TERM ecl_enqueue_read_buffer_rect(ErlNifEnv* env, int argc,
590 						 const ERL_NIF_TERM argv[]);
591 #endif
592 
593 static ERL_NIF_TERM ecl_enqueue_write_buffer(ErlNifEnv* env, int argc,
594 					     const ERL_NIF_TERM argv[]);
595 
596 #if CL_VERSION_1_1 == 1
597 static ERL_NIF_TERM ecl_enqueue_write_buffer_rect(ErlNifEnv* env, int argc,
598 						  const ERL_NIF_TERM argv[]);
599 #endif
600 
601 #if CL_VERSION_1_2 == 1
602 static ERL_NIF_TERM ecl_enqueue_fill_buffer(ErlNifEnv* env, int argc,
603 					    const ERL_NIF_TERM argv[]);
604 #endif
605 
606 static ERL_NIF_TERM ecl_enqueue_read_image(ErlNifEnv* env, int argc,
607 					   const ERL_NIF_TERM argv[]);
608 
609 static ERL_NIF_TERM ecl_enqueue_write_image(ErlNifEnv* env, int argc,
610 					    const ERL_NIF_TERM argv[]);
611 
612 static ERL_NIF_TERM ecl_enqueue_copy_buffer(ErlNifEnv* env, int argc,
613 					    const ERL_NIF_TERM argv[]);
614 
615 #if CL_VERSION_1_1 == 1
616 static ERL_NIF_TERM ecl_enqueue_copy_buffer_rect(ErlNifEnv* env, int argc,
617 						 const ERL_NIF_TERM argv[]);
618 #endif
619 
620 static ERL_NIF_TERM ecl_enqueue_copy_image(ErlNifEnv* env, int argc,
621 					   const ERL_NIF_TERM argv[]);
622 #if CL_VERSION_1_2 == 1
623 static ERL_NIF_TERM ecl_enqueue_fill_image(ErlNifEnv* env, int argc,
624 					   const ERL_NIF_TERM argv[]);
625 #endif
626 
627 
628 static ERL_NIF_TERM ecl_enqueue_copy_image_to_buffer(ErlNifEnv* env, int argc,
629 						     const ERL_NIF_TERM argv[]);
630 
631 static ERL_NIF_TERM ecl_enqueue_copy_buffer_to_image(ErlNifEnv* env, int argc,
632 						     const ERL_NIF_TERM argv[]);
633 
634 static ERL_NIF_TERM ecl_enqueue_map_buffer(ErlNifEnv* env, int argc,
635 					   const ERL_NIF_TERM argv[]);
636 
637 static ERL_NIF_TERM ecl_enqueue_map_image(ErlNifEnv* env, int argc,
638 					  const ERL_NIF_TERM argv[]);
639 
640 static ERL_NIF_TERM ecl_enqueue_unmap_mem_object(ErlNifEnv* env, int argc,
641 						 const ERL_NIF_TERM argv[]);
642 
643 #if CL_VERSION_1_2 == 1
644 static ERL_NIF_TERM ecl_enqueue_migrate_mem_objects(ErlNifEnv* env, int argc,
645 						    const ERL_NIF_TERM argv[]);
646 #endif
647 
648 static ERL_NIF_TERM ecl_async_flush(ErlNifEnv* env, int argc,
649 				    const ERL_NIF_TERM argv[]);
650 
651 static ERL_NIF_TERM ecl_async_finish(ErlNifEnv* env, int argc,
652 				     const ERL_NIF_TERM argv[]);
653 
654 // speical version of clWaitForEvents
655 static ERL_NIF_TERM ecl_async_wait_for_event(ErlNifEnv* env, int argc,
656 					     const ERL_NIF_TERM argv[]);
657 
658 static ERL_NIF_TERM ecl_get_event_info(ErlNifEnv* env, int argc,
659 				       const ERL_NIF_TERM argv[]);
660 
661 
662 ErlNifFunc ecl_funcs[] =
663 {
664     { "noop",                        0, ecl_noop },
665     { "versions",                    0, ecl_versions },
666 
667     // Platform
668     { "get_platform_ids",           0, ecl_get_platform_ids },
669     { "get_platform_info",          2, ecl_get_platform_info },
670 
671     // Devices
672     { "get_device_ids",             2, ecl_get_device_ids },
673 #if CL_VERSION_1_2 == 1
674     { "create_sub_devices",         2, ecl_create_sub_devices },
675 #endif
676     { "get_device_info",            2, ecl_get_device_info },
677 
678     // Context
679     { "create_context",             1, ecl_create_context },
680     { "get_context_info",           2, ecl_get_context_info },
681 
682     // Command queue
683     { "create_queue",               3, ecl_create_queue },
684     { "get_queue_info",             2, ecl_get_queue_info },
685 
686     // Memory object
687     { "create_buffer",              4, ecl_create_buffer },
688 #if CL_VERSION_1_1 == 1
689     { "create_sub_buffer",          4, ecl_create_sub_buffer },
690 #endif
691 
692     { "get_mem_object_info",        2, ecl_get_mem_object_info },
693     { "get_image_info",             2, ecl_get_image_info },
694 
695     { "create_image2d",            7, ecl_create_image2d },
696     { "create_image3d",            9, ecl_create_image3d },
697 #if CL_VERSION_1_2 == 1
698     { "create_image",              5, ecl_create_image },
699 #endif
700     { "get_supported_image_formats",3, ecl_get_supported_image_formats },
701 
702     // Sampler
703     { "create_sampler",             4, ecl_create_sampler },
704     { "get_sampler_info",           2, ecl_get_sampler_info },
705 
706     // Program
707     { "create_program_with_source", 2, ecl_create_program_with_source },
708     { "create_program_with_binary", 3, ecl_create_program_with_binary },
709 #if CL_VERSION_1_2 == 1
710     { "create_program_with_builtin_kernels", 3,
711       ecl_create_program_with_builtin_kernels },
712 #endif
713     { "async_build_program",        3, ecl_async_build_program },
714 #if CL_VERSION_1_2 == 1
715     { "unload_platform_compiler",   1, ecl_unload_platform_compiler },
716 #endif
717 #if CL_VERSION_1_2 == 1
718     { "async_compile_program",      5,   ecl_async_compile_program },
719 #endif
720 #if CL_VERSION_1_2 == 1
721     { "async_link_program",         4,   ecl_async_link_program },
722 #endif
723     { "unload_compiler",            0, ecl_unload_compiler },
724     { "get_program_info",           2, ecl_get_program_info },
725     { "get_program_build_info",     3, ecl_get_program_build_info },
726 
727     // Kernel
728     { "create_kernel",              2, ecl_create_kernel },
729     { "create_kernels_in_program",  1, ecl_create_kernels_in_program },
730     { "set_kernel_arg",             3, ecl_set_kernel_arg },
731     { "set_kernel_arg_size",        3, ecl_set_kernel_arg_size },
732     { "get_kernel_info",            2, ecl_get_kernel_info },
733     { "get_kernel_workgroup_info",  3, ecl_get_kernel_workgroup_info },
734 #if CL_VERSION_1_2 == 1
735     { "get_kernel_arg_info",        3, ecl_get_kernel_arg_info },
736 #endif
737     // Events
738     { "enqueue_task",               4, ecl_enqueue_task },
739     { "enqueue_nd_range_kernel",    6, ecl_enqueue_nd_range_kernel },
740     { "enqueue_marker",             1, ecl_enqueue_marker },
741     { "enqueue_barrier",            1, ecl_enqueue_barrier },
742 #if CL_VERSION_1_2 == 1
743     { "enqueue_barrier_with_wait_list", 2, ecl_enqueue_barrier_with_wait_list },
744     { "enqueue_marker_with_wait_list",  2, ecl_enqueue_marker_with_wait_list },
745 #endif
746     { "enqueue_wait_for_events",    2, ecl_enqueue_wait_for_events },
747     { "enqueue_read_buffer",        5, ecl_enqueue_read_buffer },
748 #if CL_VERSION_1_1 == 1
749     { "enqueue_read_buffer_rect",   10, ecl_enqueue_read_buffer_rect },
750 #endif
751     { "enqueue_write_buffer",       7, ecl_enqueue_write_buffer },
752 #if CL_VERSION_1_1 == 1
753     { "enqueue_write_buffer_rect",  11, ecl_enqueue_write_buffer_rect },
754 #endif
755 #if CL_VERSION_1_2 == 1
756     { "enqueue_fill_buffer",         6, ecl_enqueue_fill_buffer },
757 #endif
758     { "enqueue_read_image",         7, ecl_enqueue_read_image },
759     { "enqueue_write_image",        9, ecl_enqueue_write_image },
760     { "enqueue_copy_buffer",        7, ecl_enqueue_copy_buffer },
761 #if CL_VERSION_1_1 == 1
762     { "enqueue_copy_buffer_rect",  11, ecl_enqueue_copy_buffer_rect },
763 #endif
764     { "enqueue_copy_image",         6, ecl_enqueue_copy_image },
765 #if CL_VERSION_1_2 == 1
766     { "enqueue_fill_image",         6, ecl_enqueue_fill_image },
767 #endif
768     { "enqueue_copy_image_to_buffer", 7, ecl_enqueue_copy_image_to_buffer },
769     { "enqueue_copy_buffer_to_image", 7, ecl_enqueue_copy_buffer_to_image },
770     { "enqueue_map_buffer",           6, ecl_enqueue_map_buffer },
771     { "enqueue_map_image",            6, ecl_enqueue_map_image },
772     { "enqueue_unmap_mem_object",     3, ecl_enqueue_unmap_mem_object },
773 #if CL_VERSION_1_2 == 1
774     { "enqueue_migrate_mem_objects",  4, ecl_enqueue_migrate_mem_objects },
775 #endif
776     { "async_flush",                  1, ecl_async_flush },
777     { "async_finish",                 1, ecl_async_finish },
778     { "async_wait_for_event",         1, ecl_async_wait_for_event },
779     { "get_event_info",               2, ecl_get_event_info }
780 };
781 
782 static ecl_resource_t platform_r;
783 static ecl_resource_t device_r;
784 static ecl_resource_t context_r;
785 static ecl_resource_t command_queue_r;
786 static ecl_resource_t mem_r;
787 static ecl_resource_t sampler_r;
788 static ecl_resource_t program_r;
789 static ecl_resource_t kernel_r;
790 static ecl_resource_t event_r;
791 
792 // General atoms
793 DECL_ATOM(ok);
794 DECL_ATOM(error);
795 DECL_ATOM(unknown);
796 DECL_ATOM(undefined);
797 DECL_ATOM(true);
798 DECL_ATOM(false);
799 
800 // async messages
801 DECL_ATOM(cl_async);
802 DECL_ATOM(cl_event);
803 
804 // Type names
805 DECL_ATOM(platform_t);
806 DECL_ATOM(device_t);
807 DECL_ATOM(context_t);
808 DECL_ATOM(command_queue_t);
809 DECL_ATOM(mem_t);
810 DECL_ATOM(sampler_t);
811 DECL_ATOM(program_t);
812 DECL_ATOM(kernel_t);
813 DECL_ATOM(event_t);
814 
815 // 'cl' type names
816 DECL_ATOM(char);
817 DECL_ATOM(char2);
818 DECL_ATOM(char4);
819 DECL_ATOM(char8);
820 DECL_ATOM(char16);
821 
822 DECL_ATOM(uchar);
823 DECL_ATOM(uchar2);
824 DECL_ATOM(uchar4);
825 DECL_ATOM(uchar8);
826 DECL_ATOM(uchar16);
827 
828 DECL_ATOM(short);
829 DECL_ATOM(short2);
830 DECL_ATOM(short4);
831 DECL_ATOM(short8);
832 DECL_ATOM(short16);
833 
834 DECL_ATOM(ushort);
835 DECL_ATOM(ushort2);
836 DECL_ATOM(ushort4);
837 DECL_ATOM(ushort8);
838 DECL_ATOM(ushort16);
839 
840 DECL_ATOM(int);
841 DECL_ATOM(int2);
842 DECL_ATOM(int4);
843 DECL_ATOM(int8);
844 DECL_ATOM(int16);
845 
846 DECL_ATOM(uint);
847 DECL_ATOM(uint2);
848 DECL_ATOM(uint4);
849 DECL_ATOM(uint8);
850 DECL_ATOM(uint16);
851 
852 DECL_ATOM(long);
853 DECL_ATOM(long2);
854 DECL_ATOM(long4);
855 DECL_ATOM(long8);
856 DECL_ATOM(long16);
857 
858 DECL_ATOM(ulong);
859 DECL_ATOM(ulong2);
860 DECL_ATOM(ulong4);
861 DECL_ATOM(ulong8);
862 DECL_ATOM(ulong16);
863 
864 DECL_ATOM(half);
865 
866 DECL_ATOM(float);
867 DECL_ATOM(float2);
868 DECL_ATOM(float4);
869 DECL_ATOM(float8);
870 DECL_ATOM(float16);
871 
872 DECL_ATOM(double);
873 DECL_ATOM(double2);
874 DECL_ATOM(double4);
875 DECL_ATOM(double8);
876 DECL_ATOM(double16);
877 
878 // records for image creation
879 DECL_ATOM(cl_image_desc);
880 DECL_ATOM(cl_image_format);
881 
882 // Platform info
883 // DECL_ATOM(profile);
884 // DECL_ATOM(version);
885 // DECL_ATOM(name);
886 // DECL_ATOM(vendor);
887 // DECL_ATOM(extensions);
888 
889 // Context info
890 DECL_ATOM(reference_count);
891 DECL_ATOM(devices);
892 DECL_ATOM(properties);
893 
894 // Queue info
895 DECL_ATOM(context);
896 DECL_ATOM(num_devices);
897 DECL_ATOM(device);
898 // DECL_ATOM(reference_count);
899 // DECL_ATOM(properties);
900 
901 // Mem info
902 DECL_ATOM(object_type);
903 DECL_ATOM(flags);
904 DECL_ATOM(size);
905 DECL_ATOM(host_ptr);
906 DECL_ATOM(map_count);
907 // DECL_ATOM(reference_count);
908 // DECL_ATOM(context);
909 
910 // Image info
911 DECL_ATOM(format);
912 DECL_ATOM(element_size);
913 DECL_ATOM(row_pitch);
914 DECL_ATOM(slice_pitch);
915 DECL_ATOM(width);
916 DECL_ATOM(height);
917 DECL_ATOM(depth);
918 
919 // Sampler info
920 // DECL_ATOM(reference_count);
921 // DECL_ATOM(context);
922 DECL_ATOM(normalized_coords);
923 DECL_ATOM(addressing_mode);
924 DECL_ATOM(filter_mode);
925 
926 // Program info
927 // DECL_ATOM(reference_count);
928 // DECL_ATOM(context);
929 DECL_ATOM(num_decices);
930 // DECL_ATOM(devices);
931 DECL_ATOM(source);
932 DECL_ATOM(binary_sizes);
933 DECL_ATOM(binaries);
934 
935 // Build Info
936 DECL_ATOM(status);
937 DECL_ATOM(options);
938 DECL_ATOM(log);
939 DECL_ATOM(binary_type);
940 
941 // Kernel Info
942 DECL_ATOM(function_name);
943 DECL_ATOM(num_args);
944 // DECL_ATOM(reference_count);
945 // DECL_ATOM(context);
946 DECL_ATOM(program);
947 
948 // Event Info
949 DECL_ATOM(command_queue);
950 DECL_ATOM(command_type);
951 // DECL_ATOM(reference_count);
952 DECL_ATOM(execution_status);
953 
954 // Workgroup info
955 DECL_ATOM(work_group_size);
956 DECL_ATOM(compile_work_group_size);
957 // DECL_ATOM(local_mem_size);
958 DECL_ATOM(preferred_work_group_size_multiple);
959 DECL_ATOM(private_mem_size);
960 DECL_ATOM(global_work_size);
961 
962 // Error codes
963 DECL_ATOM(device_not_found);
964 DECL_ATOM(device_not_available);
965 DECL_ATOM(compiler_not_available);
966 DECL_ATOM(mem_object_allocation_failure);
967 DECL_ATOM(out_of_resources);
968 DECL_ATOM(out_of_host_memory);
969 DECL_ATOM(profiling_info_not_available);
970 DECL_ATOM(mem_copy_overlap);
971 DECL_ATOM(image_format_mismatch);
972 DECL_ATOM(image_format_not_supported);
973 DECL_ATOM(build_program_failure);
974 DECL_ATOM(map_failure);
975 DECL_ATOM(invalid_value);
976 DECL_ATOM(invalid_device_type);
977 DECL_ATOM(invalid_platform);
978 DECL_ATOM(invalid_device);
979 DECL_ATOM(invalid_context);
980 DECL_ATOM(invalid_queue_properties);
981 DECL_ATOM(invalid_command_queue);
982 DECL_ATOM(invalid_host_ptr);
983 DECL_ATOM(invalid_mem_object);
984 DECL_ATOM(invalid_image_format_descriptor);
985 DECL_ATOM(invalid_image_size);
986 DECL_ATOM(invalid_sampler);
987 DECL_ATOM(invalid_binary);
988 DECL_ATOM(invalid_build_options);
989 DECL_ATOM(invalid_program);
990 DECL_ATOM(invalid_program_executable);
991 DECL_ATOM(invalid_kernel_name);
992 DECL_ATOM(invalid_kernel_definition);
993 DECL_ATOM(invalid_kernel);
994 DECL_ATOM(invalid_arg_index);
995 DECL_ATOM(invalid_arg_value);
996 DECL_ATOM(invalid_arg_size);
997 DECL_ATOM(invalid_kernel_args);
998 DECL_ATOM(invalid_work_dimension);
999 DECL_ATOM(invalid_work_group_size);
1000 DECL_ATOM(invalid_work_item_size);
1001 DECL_ATOM(invalid_global_offset);
1002 DECL_ATOM(invalid_event_wait_list);
1003 DECL_ATOM(invalid_event);
1004 DECL_ATOM(invalid_operation);
1005 DECL_ATOM(invalid_gl_object);
1006 DECL_ATOM(invalid_buffer_size);
1007 DECL_ATOM(invalid_mip_level);
1008 DECL_ATOM(invalid_global_work_size);
1009 
1010 // cl_device_type
1011 DECL_ATOM(all);
1012 DECL_ATOM(default);
1013 DECL_ATOM(cpu);
1014 DECL_ATOM(gpu);
1015 DECL_ATOM(accelerator);
1016 DECL_ATOM(custom);
1017 
1018 // fp_config
1019 DECL_ATOM(denorm);
1020 DECL_ATOM(inf_nan);
1021 DECL_ATOM(round_to_nearest);
1022 DECL_ATOM(round_to_zero);
1023 DECL_ATOM(round_to_inf);
1024 DECL_ATOM(fma);
1025 DECL_ATOM(soft_float);
1026 DECL_ATOM(correctly_rounded_divide_sqrt);
1027 
1028 // mem_cache_type
1029 DECL_ATOM(none);
1030 DECL_ATOM(read_only);
1031 DECL_ATOM(read_write);
1032 
1033 // local_mem_type
1034 DECL_ATOM(local);
1035 DECL_ATOM(global);
1036 
1037 // exec capability
1038 DECL_ATOM(kernel);
1039 DECL_ATOM(native_kernel);
1040 
1041 // command_queue_properties
1042 DECL_ATOM(out_of_order_exec_mode_enable);
1043 DECL_ATOM(profiling_enable);
1044 
1045 // mem_flags
1046 // DECL_ATOM(read_write);
1047 DECL_ATOM(write_only);
1048 // DECL_ATOM(read_only);
1049 DECL_ATOM(use_host_ptr);
1050 DECL_ATOM(alloc_host_ptr);
1051 DECL_ATOM(copy_host_ptr);
1052 
1053 // migration flags
1054 DECL_ATOM(host);
1055 DECL_ATOM(content_undefined);
1056 
1057 // mem_object_type
1058 DECL_ATOM(buffer);
1059 DECL_ATOM(image2d);
1060 DECL_ATOM(image3d);
1061 // version1.2
1062 DECL_ATOM(image2d_array);
1063 DECL_ATOM(image1d);
1064 DECL_ATOM(image1d_array);
1065 DECL_ATOM(image1d_buffer);
1066 
1067 // addressing_mode
1068 // DECL_ATOM(none);
1069 DECL_ATOM(clamp_to_edge);
1070 DECL_ATOM(clamp);
1071 DECL_ATOM(repeat);
1072 
1073 // filter_mode
1074 DECL_ATOM(nearest);
1075 DECL_ATOM(linear);
1076 
1077 // map_flags
1078 DECL_ATOM(read);
1079 DECL_ATOM(write);
1080 
1081 // build_status
1082 DECL_ATOM(success);
1083 // DECL_ATOM(none);
1084 // DECL_ATOM(error);
1085 DECL_ATOM(in_progress);
1086 
1087 // program_binary_type
1088 // DECL_ATOM(none);
1089 DECL_ATOM(compiled_object);
1090 DECL_ATOM(library);
1091 DECL_ATOM(executable);
1092 
1093 // command_type
1094 DECL_ATOM(ndrange_kernel);
1095 DECL_ATOM(task);
1096 // DECL_ATOM(native_kernel);
1097 DECL_ATOM(read_buffer);
1098 DECL_ATOM(write_buffer);
1099 DECL_ATOM(copy_buffer);
1100 DECL_ATOM(read_image);
1101 DECL_ATOM(write_image);
1102 DECL_ATOM(copy_image);
1103 DECL_ATOM(copy_image_to_buffer);
1104 DECL_ATOM(copy_buffer_to_image);
1105 DECL_ATOM(map_buffer);
1106 DECL_ATOM(map_image);
1107 DECL_ATOM(unmap_mem_object);
1108 DECL_ATOM(marker);
1109 DECL_ATOM(aquire_gl_objects);
1110 DECL_ATOM(release_gl_objects);
1111 DECL_ATOM(migreate_mem_objects);
1112 DECL_ATOM(fill_buffer);
1113 DECL_ATOM(fill_image);
1114 
1115 // execution_status
1116 DECL_ATOM(complete);
1117 DECL_ATOM(running);
1118 DECL_ATOM(submitted);
1119 DECL_ATOM(queued);
1120 
1121 // arguments
1122 DECL_ATOM(region);
1123 
1124 // DECL_ATOM(global);
1125 // DECL_ATOM(local);
1126 DECL_ATOM(constant);
1127 DECL_ATOM(private);
1128 
1129 // DECL_ATOM(read_only);
1130 // DECL_ATOM(write_only);
1131 // DECL_ATOM(read_write);
1132 // DECL_ATOM(none);
1133 
1134 // DECL_ATOM(none);
1135 DECL_ATOM(const);
1136 DECL_ATOM(restrict);
1137 DECL_ATOM(volatile);
1138 
1139 DECL_ATOM(address_qualifier);
1140 DECL_ATOM(access_qualifier);
1141 DECL_ATOM(type_name);
1142 DECL_ATOM(type_qualifier);
1143 // DECL_ATOM(name);
1144 
1145 #define SIZE_1   0x010000
1146 #define SIZE_2   0x020000
1147 #define SIZE_4   0x040000
1148 #define SIZE_8   0x080000
1149 #define SIZE_16  0x100000
1150 
1151 ecl_kv_t kv_cl_type[] = {
1152     { &ATOM(char),     SIZE_1 + OCL_CHAR },
1153     { &ATOM(char2),    SIZE_2 + OCL_CHAR },
1154     { &ATOM(char4),    SIZE_4 + OCL_CHAR },
1155     { &ATOM(char8),    SIZE_8 + OCL_CHAR },
1156     { &ATOM(char16),   SIZE_16 + OCL_CHAR },
1157     { &ATOM(uchar),    SIZE_1 + OCL_UCHAR },
1158     { &ATOM(uchar2),   SIZE_2 + OCL_UCHAR },
1159     { &ATOM(uchar4),   SIZE_4 + OCL_UCHAR },
1160     { &ATOM(uchar8),   SIZE_8 + OCL_UCHAR },
1161     { &ATOM(uchar16),  SIZE_16 + OCL_UCHAR },
1162     { &ATOM(short),    SIZE_1 + OCL_SHORT },
1163     { &ATOM(short2),   SIZE_2 + OCL_SHORT },
1164     { &ATOM(short4),   SIZE_4 + OCL_SHORT },
1165     { &ATOM(short8),   SIZE_8 + OCL_SHORT },
1166     { &ATOM(short16),  SIZE_16 + OCL_SHORT },
1167     { &ATOM(ushort),   SIZE_1 + OCL_USHORT },
1168     { &ATOM(ushort2),  SIZE_2 + OCL_USHORT },
1169     { &ATOM(ushort4),  SIZE_4 + OCL_USHORT },
1170     { &ATOM(ushort8),  SIZE_8 + OCL_USHORT },
1171     { &ATOM(ushort16), SIZE_16 + OCL_USHORT },
1172     { &ATOM(int),      SIZE_1 + OCL_INT },
1173     { &ATOM(int2),     SIZE_2 + OCL_INT },
1174     { &ATOM(int4),     SIZE_4 + OCL_INT },
1175     { &ATOM(int8),     SIZE_8 + OCL_INT },
1176     { &ATOM(int16),    SIZE_16 + OCL_INT },
1177     { &ATOM(uint),     SIZE_1 + OCL_UINT },
1178     { &ATOM(uint2),    SIZE_2 + OCL_UINT },
1179     { &ATOM(uint4),    SIZE_4 + OCL_UINT },
1180     { &ATOM(uint8),    SIZE_8 + OCL_UINT },
1181     { &ATOM(uint16),   SIZE_16 + OCL_UINT },
1182     { &ATOM(long),     SIZE_1 + OCL_LONG },
1183     { &ATOM(long2),    SIZE_2 + OCL_LONG },
1184     { &ATOM(long4),    SIZE_4 + OCL_LONG },
1185     { &ATOM(long8),    SIZE_8 + OCL_LONG },
1186     { &ATOM(long16),   SIZE_16 + OCL_LONG },
1187     { &ATOM(ulong),    SIZE_1 + OCL_ULONG },
1188     { &ATOM(ulong2),   SIZE_2 + OCL_ULONG },
1189     { &ATOM(ulong4),   SIZE_4 + OCL_ULONG },
1190     { &ATOM(ulong8),   SIZE_8 + OCL_ULONG },
1191     { &ATOM(ulong16),  SIZE_16 + OCL_ULONG },
1192     { &ATOM(half),     SIZE_1 + OCL_HALF },
1193     { &ATOM(float),    SIZE_1 + OCL_FLOAT },
1194     { &ATOM(float2),   SIZE_2 + OCL_FLOAT },
1195     { &ATOM(float4),   SIZE_4 + OCL_FLOAT },
1196     { &ATOM(float8),   SIZE_8 + OCL_FLOAT },
1197     { &ATOM(float16),  SIZE_16 + OCL_FLOAT },
1198     { &ATOM(double),   SIZE_1 + OCL_DOUBLE },
1199     { &ATOM(double2),  SIZE_2 + OCL_DOUBLE },
1200     { &ATOM(double4),  SIZE_4 + OCL_DOUBLE },
1201     { &ATOM(double8),  SIZE_8 + OCL_DOUBLE },
1202     { &ATOM(double16), SIZE_16 + OCL_DOUBLE },
1203     { 0, 0 }
1204 };
1205 
1206 ecl_kv_t kv_device_type[] = {  // bitfield
1207     { &ATOM(cpu),         CL_DEVICE_TYPE_CPU },
1208     { &ATOM(gpu),         CL_DEVICE_TYPE_GPU },
1209     { &ATOM(accelerator), CL_DEVICE_TYPE_ACCELERATOR },
1210     { &ATOM(default),     CL_DEVICE_TYPE_DEFAULT },
1211     { &ATOM(all),         CL_DEVICE_TYPE_ALL },
1212 #if CL_VERSION_1_2 == 1
1213     { &ATOM(custom),      CL_DEVICE_TYPE_CUSTOM },
1214 #endif
1215     { 0, 0}
1216 };
1217 
1218 ecl_kv_t kv_fp_config[] = {  // bitfield
1219     { &ATOM(denorm),      CL_FP_DENORM },
1220     { &ATOM(inf_nan),     CL_FP_INF_NAN },
1221     { &ATOM(round_to_nearest), CL_FP_ROUND_TO_NEAREST },
1222     { &ATOM(round_to_zero), CL_FP_ROUND_TO_ZERO },
1223     { &ATOM(round_to_inf), CL_FP_ROUND_TO_INF },
1224     { &ATOM(fma), CL_FP_FMA },
1225 #if CL_VERSION_1_2 == 1
1226     { &ATOM(soft_float), CL_FP_SOFT_FLOAT },
1227     { &ATOM(correctly_rounded_divide_sqrt),CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT},
1228 #endif
1229     { 0, 0 }
1230 };
1231 
1232 ecl_kv_t kv_mem_cache_type[] = {  // enum
1233     { &ATOM(none), CL_NONE },
1234     { &ATOM(read_only), CL_READ_ONLY_CACHE },
1235     { &ATOM(read_write), CL_READ_WRITE_CACHE },
1236     { 0, 0 }
1237 };
1238 
1239 ecl_kv_t kv_local_mem_type[] = {  // enum
1240     { &ATOM(local), CL_LOCAL },
1241     { &ATOM(global), CL_GLOBAL },
1242     { 0, 0 }
1243 };
1244 
1245 ecl_kv_t kv_exec_capabilities[] = {  // bit field
1246     { &ATOM(kernel), CL_EXEC_KERNEL },
1247     { &ATOM(native_kernel), CL_EXEC_NATIVE_KERNEL },
1248     { 0, 0 }
1249 };
1250 
1251 
1252 ecl_kv_t kv_command_queue_properties[] = { // bit field
1253     { &ATOM(out_of_order_exec_mode_enable), CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE },
1254     { &ATOM(profiling_enable), CL_QUEUE_PROFILING_ENABLE },
1255     { 0, 0}
1256 };
1257 
1258 ecl_kv_t kv_mem_flags[] = { // bit field
1259     { &ATOM(read_write), CL_MEM_READ_WRITE },
1260     { &ATOM(write_only), CL_MEM_WRITE_ONLY },
1261     { &ATOM(read_only),  CL_MEM_READ_ONLY },
1262     { &ATOM(use_host_ptr), CL_MEM_USE_HOST_PTR },
1263     { &ATOM(alloc_host_ptr), CL_MEM_ALLOC_HOST_PTR },
1264     { &ATOM(copy_host_ptr), CL_MEM_COPY_HOST_PTR },
1265     { 0, 0 }
1266 };
1267 
1268 #if CL_VERSION_1_2 == 1
1269 ecl_kv_t kv_migration_flags[] = { // bit field
1270     { &ATOM(host), CL_MIGRATE_MEM_OBJECT_HOST },
1271     { &ATOM(content_undefined), CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED},
1272     { 0, 0 }
1273 };
1274 #endif
1275 
1276 ecl_kv_t kv_mem_object_type[] = { // enum
1277     { &ATOM(buffer), CL_MEM_OBJECT_BUFFER },
1278     { &ATOM(image2d), CL_MEM_OBJECT_IMAGE2D },
1279     { &ATOM(image3d), CL_MEM_OBJECT_IMAGE3D },
1280 #if CL_VERSION_1_2 == 1
1281     { &ATOM(image2d_array), CL_MEM_OBJECT_IMAGE2D_ARRAY },
1282     { &ATOM(image1d), CL_MEM_OBJECT_IMAGE1D },
1283     { &ATOM(image1d_array), CL_MEM_OBJECT_IMAGE1D_ARRAY },
1284     { &ATOM(image1d_buffer), CL_MEM_OBJECT_IMAGE1D_BUFFER },
1285 #endif
1286     { 0, 0 }
1287 };
1288 
1289 ecl_kv_t kv_addressing_mode[] = { // enum
1290     { &ATOM(none), CL_ADDRESS_NONE },
1291     { &ATOM(clamp_to_edge), CL_ADDRESS_CLAMP_TO_EDGE },
1292     { &ATOM(clamp), CL_ADDRESS_CLAMP },
1293     { &ATOM(repeat), CL_ADDRESS_REPEAT },
1294     { 0, 0 }
1295 };
1296 
1297 ecl_kv_t kv_filter_mode[] = { // enum
1298     { &ATOM(nearest), CL_FILTER_NEAREST },
1299     { &ATOM(linear),  CL_FILTER_LINEAR },
1300     { 0, 0 }
1301 };
1302 
1303 ecl_kv_t kv_map_flags[] = { // bitfield
1304     { &ATOM(read), CL_MAP_READ },
1305     { &ATOM(write), CL_MAP_WRITE },
1306     { 0, 0 }
1307 };
1308 
1309 ecl_kv_t kv_build_status[] = { // enum
1310     { &ATOM(success), CL_BUILD_SUCCESS },
1311     { &ATOM(none), CL_BUILD_NONE },
1312     { &ATOM(error), CL_BUILD_ERROR },
1313     { &ATOM(in_progress), CL_BUILD_IN_PROGRESS },
1314     { 0, 0 }
1315 };
1316 
1317 #if CL_VERSION_1_2 == 1
1318 ecl_kv_t kv_program_binary_type[] = { // enum
1319     { &ATOM(none), CL_PROGRAM_BINARY_TYPE_NONE },
1320     { &ATOM(compiled_object),  CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT },
1321     { &ATOM(library), CL_PROGRAM_BINARY_TYPE_LIBRARY },
1322     { &ATOM(executable), CL_PROGRAM_BINARY_TYPE_EXECUTABLE },
1323     { 0, 0 }
1324 };
1325 #endif
1326 
1327 ecl_kv_t kv_command_type[] = { // enum
1328     { &ATOM(ndrange_kernel), CL_COMMAND_NDRANGE_KERNEL },
1329     { &ATOM(task),           CL_COMMAND_TASK },
1330     { &ATOM(native_kernel),  CL_COMMAND_NATIVE_KERNEL },
1331     { &ATOM(read_buffer),    CL_COMMAND_READ_BUFFER },
1332     { &ATOM(write_buffer),   CL_COMMAND_WRITE_BUFFER },
1333     { &ATOM(copy_buffer),    CL_COMMAND_COPY_BUFFER },
1334     { &ATOM(read_image),     CL_COMMAND_READ_IMAGE },
1335     { &ATOM(write_image),    CL_COMMAND_WRITE_IMAGE },
1336     { &ATOM(copy_image),     CL_COMMAND_COPY_IMAGE },
1337     { &ATOM(copy_image_to_buffer), CL_COMMAND_COPY_IMAGE_TO_BUFFER },
1338     { &ATOM(copy_buffer_to_image), CL_COMMAND_COPY_BUFFER_TO_IMAGE },
1339     { &ATOM(map_buffer), CL_COMMAND_MAP_BUFFER },
1340     { &ATOM(map_image), CL_COMMAND_MAP_IMAGE },
1341     { &ATOM(unmap_mem_object), CL_COMMAND_UNMAP_MEM_OBJECT },
1342     { &ATOM(marker), CL_COMMAND_MARKER  },
1343     { &ATOM(aquire_gl_objects), CL_COMMAND_ACQUIRE_GL_OBJECTS },
1344     { &ATOM(release_gl_objects), CL_COMMAND_RELEASE_GL_OBJECTS },
1345 #if CL_VERSION_12 == 1
1346     { &ATOM(migreate_mem_objects), CL_COMMAND_MIGRATE_MEM_OBJECTS },
1347     { &ATOM(fill_buffer), CL_COMMAND_FILL_BUFFER },
1348     { &ATOM(fill_image), CL_COMMAND_FILL_IMAGE },
1349 #endif
1350     { 0, 0}
1351 };
1352 
1353 ecl_kv_t kv_execution_status[] = { // enum
1354     { &ATOM(complete),   CL_COMPLETE   },   // same as CL_SUCCESS
1355     { &ATOM(running),    CL_RUNNING    },
1356     { &ATOM(submitted),  CL_SUBMITTED  },
1357     { &ATOM(queued),     CL_QUEUED     },
1358     // the error codes (negative values)
1359     { &ATOM(device_not_found), CL_DEVICE_NOT_FOUND },
1360     { &ATOM(device_not_available), CL_DEVICE_NOT_AVAILABLE },
1361     { &ATOM(compiler_not_available), CL_COMPILER_NOT_AVAILABLE },
1362     { &ATOM(mem_object_allocation_failure), CL_MEM_OBJECT_ALLOCATION_FAILURE },
1363     { &ATOM(out_of_resources), CL_OUT_OF_RESOURCES },
1364     { &ATOM(out_of_host_memory), CL_OUT_OF_HOST_MEMORY },
1365     { &ATOM(profiling_info_not_available), CL_PROFILING_INFO_NOT_AVAILABLE },
1366     { &ATOM(mem_copy_overlap), CL_MEM_COPY_OVERLAP },
1367     { &ATOM(image_format_mismatch), CL_IMAGE_FORMAT_MISMATCH },
1368     { &ATOM(image_format_not_supported), CL_IMAGE_FORMAT_NOT_SUPPORTED },
1369     { &ATOM(build_program_failure), CL_BUILD_PROGRAM_FAILURE },
1370     { &ATOM(map_failure), CL_MAP_FAILURE },
1371     { &ATOM(invalid_value), CL_INVALID_VALUE },
1372     { &ATOM(invalid_device_type), CL_INVALID_DEVICE_TYPE },
1373     { &ATOM(invalid_platform), CL_INVALID_PLATFORM },
1374     { &ATOM(invalid_device), CL_INVALID_DEVICE },
1375     { &ATOM(invalid_context), CL_INVALID_CONTEXT },
1376     { &ATOM(invalid_queue_properties), CL_INVALID_QUEUE_PROPERTIES },
1377     { &ATOM(invalid_command_queue), CL_INVALID_COMMAND_QUEUE },
1378     { &ATOM(invalid_host_ptr), CL_INVALID_HOST_PTR },
1379     { &ATOM(invalid_mem_object), CL_INVALID_MEM_OBJECT },
1380     { &ATOM(invalid_image_format_descriptor), CL_INVALID_IMAGE_FORMAT_DESCRIPTOR },
1381     { &ATOM(invalid_image_size), CL_INVALID_IMAGE_SIZE },
1382     { &ATOM(invalid_sampler), CL_INVALID_SAMPLER },
1383     { &ATOM(invalid_binary), CL_INVALID_BINARY },
1384     { &ATOM(invalid_build_options), CL_INVALID_BUILD_OPTIONS },
1385     { &ATOM(invalid_program), CL_INVALID_PROGRAM },
1386     { &ATOM(invalid_program_executable), CL_INVALID_PROGRAM_EXECUTABLE },
1387     { &ATOM(invalid_kernel_name), CL_INVALID_KERNEL_NAME },
1388     { &ATOM(invalid_kernel_definition), CL_INVALID_KERNEL_DEFINITION },
1389     { &ATOM(invalid_kernel), CL_INVALID_KERNEL },
1390     { &ATOM(invalid_arg_index), CL_INVALID_ARG_INDEX },
1391     { &ATOM(invalid_arg_value), CL_INVALID_ARG_VALUE },
1392     { &ATOM(invalid_arg_size), CL_INVALID_ARG_SIZE },
1393     { &ATOM(invalid_kernel_args), CL_INVALID_KERNEL_ARGS },
1394     { &ATOM(invalid_work_dimension), CL_INVALID_WORK_DIMENSION },
1395     { &ATOM(invalid_work_group_size), CL_INVALID_WORK_GROUP_SIZE },
1396     { &ATOM(invalid_work_item_size), CL_INVALID_WORK_ITEM_SIZE },
1397     { &ATOM(invalid_global_offset), CL_INVALID_GLOBAL_OFFSET },
1398     { &ATOM(invalid_event_wait_list), CL_INVALID_EVENT_WAIT_LIST },
1399     { &ATOM(invalid_event), CL_INVALID_EVENT },
1400     { &ATOM(invalid_operation), CL_INVALID_OPERATION },
1401     { &ATOM(invalid_gl_object), CL_INVALID_GL_OBJECT },
1402     { &ATOM(invalid_buffer_size), CL_INVALID_BUFFER_SIZE },
1403     { &ATOM(invalid_mip_level), CL_INVALID_MIP_LEVEL },
1404     { &ATOM(invalid_global_work_size), CL_INVALID_GLOBAL_WORK_SIZE },
1405     { 0, 0 }
1406 };
1407 
1408 DECL_ATOM(snorm_int8);
1409 DECL_ATOM(snorm_int16);
1410 DECL_ATOM(unorm_int8);
1411 DECL_ATOM(unorm_int16);
1412 DECL_ATOM(unorm_int24);
1413 DECL_ATOM(unorm_short_565);
1414 DECL_ATOM(unorm_short_555);
1415 DECL_ATOM(unorm_int_101010);
1416 DECL_ATOM(signed_int8);
1417 DECL_ATOM(signed_int16);
1418 DECL_ATOM(signed_int32);
1419 DECL_ATOM(unsigned_int8);
1420 DECL_ATOM(unsigned_int16);
1421 DECL_ATOM(unsigned_int32);
1422 DECL_ATOM(half_float);
1423 // DECL_ATOM(float);
1424 
1425 ecl_kv_t kv_channel_type[] = { // enum
1426     { &ATOM(snorm_int8), CL_SNORM_INT8 },
1427     { &ATOM(snorm_int16), CL_SNORM_INT16 },
1428     { &ATOM(unorm_int8), CL_UNORM_INT8 },
1429     { &ATOM(unorm_int16), CL_UNORM_INT16 },
1430     { &ATOM(unorm_short_565), CL_UNORM_SHORT_565 },
1431     { &ATOM(unorm_short_555), CL_UNORM_SHORT_555 },
1432     { &ATOM(unorm_int_101010), CL_UNORM_INT_101010 },
1433     { &ATOM(signed_int8), CL_SIGNED_INT8 },
1434     { &ATOM(signed_int16), CL_SIGNED_INT16 },
1435     { &ATOM(signed_int32), CL_SIGNED_INT32 },
1436     { &ATOM(unsigned_int8), CL_UNSIGNED_INT8 },
1437     { &ATOM(unsigned_int16), CL_UNSIGNED_INT16 },
1438     { &ATOM(unsigned_int32), CL_UNSIGNED_INT32 },
1439     { &ATOM(half_float), CL_HALF_FLOAT },
1440     { &ATOM(float), CL_FLOAT },
1441 #if (CL_VERSION_1_2 == 1) && defined(CL_UNORM_INT24)
1442     { &ATOM(unorm_int24), CL_UNORM_INT24 },
1443 #endif
1444     { 0, 0 }
1445 };
1446 
1447 // channel order
1448 DECL_ATOM(r);
1449 DECL_ATOM(a);
1450 DECL_ATOM(rg);
1451 DECL_ATOM(ra);
1452 DECL_ATOM(rgb);
1453 DECL_ATOM(rgba);
1454 DECL_ATOM(bgra);
1455 DECL_ATOM(argb);
1456 DECL_ATOM(intensity);
1457 DECL_ATOM(luminance);
1458 DECL_ATOM(rx);
1459 DECL_ATOM(rgx);
1460 DECL_ATOM(rgbx);
1461 // DECL_ATOM(depth);
1462 DECL_ATOM(depth_stencil);
1463 
1464 // 1.1 features! in apple 1.0?
1465 #ifndef CL_Rx
1466 #define CL_Rx                                       0x10BA
1467 #endif
1468 
1469 #ifndef CL_RGx
1470 #define CL_RGx                                      0x10BB
1471 #endif
1472 
1473 #ifndef CL_RGBx
1474 #define CL_RGBx                                     0x10BC
1475 #endif
1476 
1477 ecl_kv_t kv_channel_order[] = {
1478     { &ATOM(r), CL_R },
1479     { &ATOM(a), CL_A },
1480     { &ATOM(rg), CL_RG },
1481     { &ATOM(ra), CL_RA },
1482     { &ATOM(rgb), CL_RGB },
1483     { &ATOM(rgba), CL_RGBA },
1484     { &ATOM(bgra), CL_BGRA },
1485     { &ATOM(argb), CL_ARGB },
1486     { &ATOM(intensity), CL_INTENSITY },
1487     { &ATOM(luminance), CL_LUMINANCE },
1488     { &ATOM(rx), CL_Rx },
1489     { &ATOM(rgx), CL_RGx },
1490     { &ATOM(rgbx), CL_RGBx },
1491 #if CL_VERSION_1_2 == 1
1492 #if defined(CL_DEPTH)
1493     { &ATOM(depth), CL_DEPTH },
1494 #endif
1495 #if defined(CL_DEPTH_STENCIL)
1496     { &ATOM(depth_stencil), CL_DEPTH_STENCIL },
1497 #endif
1498 #endif
1499     { 0, 0 }
1500 };
1501 
1502 // partition_property
1503 DECL_ATOM(equally);
1504 DECL_ATOM(by_counts);
1505 DECL_ATOM(by_counts_list_end);
1506 DECL_ATOM(by_affinity_domain);
1507 
1508 #if CL_VERSION_1_2 == 1
1509 ecl_kv_t kv_device_partition_property[] = {
1510     { &ATOM(equally), CL_DEVICE_PARTITION_EQUALLY },
1511     { &ATOM(by_counts), CL_DEVICE_PARTITION_BY_COUNTS },
1512     { &ATOM(by_affinity_domain), CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN },
1513     { &ATOM(undefined), 0 },
1514     { 0, 0}
1515 };
1516 #endif
1517 
1518 DECL_ATOM(numa);
1519 DECL_ATOM(l4_cache);
1520 DECL_ATOM(l3_cache);
1521 DECL_ATOM(l2_cache);
1522 DECL_ATOM(l1_cache);
1523 DECL_ATOM(next_partitionable);
1524 
1525 #if CL_VERSION_1_2 == 1
1526 ecl_kv_t kv_device_affinity_domain[] = {
1527     { &ATOM(numa), CL_DEVICE_AFFINITY_DOMAIN_NUMA },
1528     { &ATOM(l4_cache), CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE },
1529     { &ATOM(l3_cache), CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE },
1530     { &ATOM(l2_cache), CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE },
1531     { &ATOM(l1_cache), CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE },
1532     { &ATOM(next_partitionable), CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE },
1533     { &ATOM(undefined), 0 },
1534     { 0, 0}
1535 };
1536 #endif
1537 
1538 // Device info
1539 DECL_ATOM(type);
1540 DECL_ATOM(vendor_id);
1541 DECL_ATOM(max_compute_units);
1542 DECL_ATOM(max_work_item_dimensions);
1543 DECL_ATOM(max_work_group_size);
1544 DECL_ATOM(max_work_item_sizes);
1545 DECL_ATOM(preferred_vector_width_char);
1546 DECL_ATOM(preferred_vector_width_short);
1547 DECL_ATOM(preferred_vector_width_int);
1548 DECL_ATOM(preferred_vector_width_long);
1549 DECL_ATOM(preferred_vector_width_float);
1550 DECL_ATOM(preferred_vector_width_double);
1551 DECL_ATOM(max_clock_frequency);
1552 DECL_ATOM(address_bits);
1553 DECL_ATOM(max_read_image_args);
1554 DECL_ATOM(max_write_image_args);
1555 DECL_ATOM(max_mem_alloc_size);
1556 DECL_ATOM(image2d_max_width);
1557 DECL_ATOM(image2d_max_height);
1558 DECL_ATOM(image3d_max_width);
1559 DECL_ATOM(image3d_max_height);
1560 DECL_ATOM(image3d_max_depth);
1561 DECL_ATOM(image_support);
1562 DECL_ATOM(max_parameter_size);
1563 DECL_ATOM(max_samplers);
1564 DECL_ATOM(mem_base_addr_align);
1565 DECL_ATOM(min_data_type_align_size);
1566 DECL_ATOM(single_fp_config);
1567 DECL_ATOM(global_mem_cache_type);
1568 DECL_ATOM(global_mem_cacheline_size);
1569 DECL_ATOM(global_mem_cache_size);
1570 DECL_ATOM(global_mem_size);
1571 DECL_ATOM(max_constant_buffer_size);
1572 DECL_ATOM(max_constant_args);
1573 DECL_ATOM(local_mem_type);
1574 DECL_ATOM(local_mem_size);
1575 DECL_ATOM(error_correction_support);
1576 DECL_ATOM(profiling_timer_resolution);
1577 DECL_ATOM(endian_little);
1578 DECL_ATOM(available);
1579 DECL_ATOM(compiler_available);
1580 DECL_ATOM(execution_capabilities);
1581 DECL_ATOM(queue_properties);
1582 DECL_ATOM(name);
1583 DECL_ATOM(vendor);
1584 DECL_ATOM(driver_version);
1585 DECL_ATOM(profile);
1586 DECL_ATOM(version);
1587 DECL_ATOM(extensions);
1588 DECL_ATOM(platform);
1589 
1590 // cl_khr_fp64 extension || CL_VERSION_1_2 == 1
1591 DECL_ATOM(double_fp_config);
1592 // cl_khr_fp16 extension || CL_VERSION_1_2 == 1
1593 DECL_ATOM(half_fp_config);
1594 // 1.2
1595 DECL_ATOM(preferred_vector_width_half);
1596 DECL_ATOM(host_unified_memory);
1597 DECL_ATOM(native_vector_width_char);
1598 DECL_ATOM(native_vector_width_short);
1599 DECL_ATOM(native_vector_width_int);
1600 DECL_ATOM(native_vector_width_long);
1601 DECL_ATOM(native_vector_width_float);
1602 DECL_ATOM(native_vector_width_double);
1603 DECL_ATOM(native_vector_width_half);
1604 DECL_ATOM(opencl_c_version);
1605 DECL_ATOM(linker_available);
1606 DECL_ATOM(built_in_kernels);
1607 DECL_ATOM(image_max_buffer_size);
1608 DECL_ATOM(image_max_array_size);
1609 DECL_ATOM(parent_device);
1610 DECL_ATOM(partition_max_sub_devices);
1611 DECL_ATOM(partition_properties);
1612 DECL_ATOM(partition_affinity_domain);
1613 DECL_ATOM(partition_type);
1614 // DECL_ATOM(reference_count);
1615 DECL_ATOM(preferred_interop_user_sync);
1616 DECL_ATOM(printf_buffer_size);
1617 DECL_ATOM(image_pitch_alignment);
1618 DECL_ATOM(image_base_address_alignment);
1619 // cl_nv_device_attribute_query extension
1620 DECL_ATOM(compute_capability_major_nv);
1621 DECL_ATOM(compute_capability_minor_nv);
1622 DECL_ATOM(registers_per_block_nv);
1623 DECL_ATOM(warp_size_nv);
1624 DECL_ATOM(gpu_overlap_nv);
1625 DECL_ATOM(kernel_exec_timeout_nv);
1626 DECL_ATOM(device_integrated_memory_nv);
1627 
1628 // Map device info index 0...N => cl_device_info x Data type
1629 ecl_info_t device_info[] =
1630 {
1631     { &ATOM(type), CL_DEVICE_TYPE, false, OCL_DEVICE_TYPE, kv_device_type, 0 },
1632     { &ATOM(vendor_id), CL_DEVICE_VENDOR_ID, false, OCL_UINT, 0, 0 },
1633     { &ATOM(max_compute_units), CL_DEVICE_MAX_COMPUTE_UNITS, false, OCL_UINT, 0, 0 },
1634     { &ATOM(max_work_item_dimensions), CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, false, OCL_UINT, 0, 0 },
1635     { &ATOM(max_work_group_size), CL_DEVICE_MAX_WORK_GROUP_SIZE, false, OCL_SIZE, 0, 0 },
1636     { &ATOM(max_work_item_sizes), CL_DEVICE_MAX_WORK_ITEM_SIZES, true, OCL_SIZE, 0, 0 },
1637     { &ATOM(preferred_vector_width_char), CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, false, OCL_UINT, 0, 0 },
1638     { &ATOM(preferred_vector_width_short), CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, false, OCL_UINT,  0, 0 },
1639     { &ATOM(preferred_vector_width_int), CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, false, OCL_UINT, 0, 0 },
1640     { &ATOM(preferred_vector_width_long), CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, false,OCL_UINT, 0, 0 },
1641     { &ATOM(preferred_vector_width_float), CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, false, OCL_UINT, 0, 0 },
1642     { &ATOM(preferred_vector_width_double), CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, false, OCL_UINT, 0, 0 },
1643     { &ATOM(max_clock_frequency), CL_DEVICE_MAX_CLOCK_FREQUENCY, false, OCL_UINT, 0, 0 },
1644     { &ATOM(address_bits), CL_DEVICE_ADDRESS_BITS, false, OCL_UINT, 0, 0 },
1645     { &ATOM(max_read_image_args), CL_DEVICE_MAX_READ_IMAGE_ARGS, false, OCL_UINT, 0, 0 },
1646     { &ATOM(max_write_image_args), CL_DEVICE_MAX_WRITE_IMAGE_ARGS, false, OCL_UINT, 0, 0 },
1647     { &ATOM(max_mem_alloc_size), CL_DEVICE_MAX_MEM_ALLOC_SIZE, false, OCL_ULONG, 0, 0 },
1648     { &ATOM(image2d_max_width), CL_DEVICE_IMAGE2D_MAX_WIDTH, false, OCL_SIZE, 0, 0 },
1649     { &ATOM(image2d_max_height), CL_DEVICE_IMAGE2D_MAX_HEIGHT, false, OCL_SIZE, 0, 0 },
1650     { &ATOM(image3d_max_width), CL_DEVICE_IMAGE3D_MAX_WIDTH, false, OCL_SIZE, 0, 0 },
1651     { &ATOM(image3d_max_height), CL_DEVICE_IMAGE3D_MAX_HEIGHT, false, OCL_SIZE, 0, 0 },
1652     { &ATOM(image3d_max_depth), CL_DEVICE_IMAGE3D_MAX_DEPTH, false, OCL_SIZE, 0, 0 },
1653     { &ATOM(image_support), CL_DEVICE_IMAGE_SUPPORT, false, OCL_BOOL, 0, 0 },
1654     { &ATOM(max_parameter_size), CL_DEVICE_MAX_PARAMETER_SIZE, false, OCL_SIZE, 0, 0 },
1655     { &ATOM(max_samplers), CL_DEVICE_MAX_SAMPLERS, false, OCL_UINT, 0, 0 },
1656     { &ATOM(mem_base_addr_align), CL_DEVICE_MEM_BASE_ADDR_ALIGN, false, OCL_UINT, 0, 0 },
1657     { &ATOM(min_data_type_align_size), CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, false, OCL_UINT, 0, 0 },
1658     { &ATOM(single_fp_config), CL_DEVICE_SINGLE_FP_CONFIG, false, OCL_DEVICE_FP_CONFIG, kv_fp_config, 0 },
1659     { &ATOM(global_mem_cache_type), CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, false, OCL_DEVICE_GLOBAL_MEM_CACHE_TYPE, kv_mem_cache_type, 0 },
1660     { &ATOM(global_mem_cacheline_size), CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, false, OCL_UINT, 0, 0 },
1661     { &ATOM(global_mem_cache_size), CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, false, OCL_ULONG, 0, 0 },
1662     { &ATOM(global_mem_size), CL_DEVICE_GLOBAL_MEM_SIZE, false, OCL_ULONG, 0, 0 },
1663     { &ATOM(max_constant_buffer_size), CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE,  false, OCL_ULONG, 0, 0 },
1664     { &ATOM(max_constant_args), CL_DEVICE_MAX_CONSTANT_ARGS, false, OCL_UINT, 0, 0 },
1665     { &ATOM(local_mem_type), CL_DEVICE_LOCAL_MEM_TYPE, false, OCL_DEVICE_LOCAL_MEM_TYPE, kv_local_mem_type, 0 },
1666     { &ATOM(local_mem_size), CL_DEVICE_LOCAL_MEM_SIZE,  false, OCL_ULONG, 0, 0 },
1667     { &ATOM(error_correction_support), CL_DEVICE_ERROR_CORRECTION_SUPPORT, false,  OCL_BOOL, 0, 0 },
1668     { &ATOM(profiling_timer_resolution), CL_DEVICE_PROFILING_TIMER_RESOLUTION, false,  OCL_SIZE, 0, 0 },
1669     { &ATOM(endian_little), CL_DEVICE_ENDIAN_LITTLE, false, OCL_BOOL, 0, 0 },
1670     { &ATOM(available), CL_DEVICE_AVAILABLE,  false, OCL_BOOL, 0, 0 },
1671     { &ATOM(compiler_available), CL_DEVICE_COMPILER_AVAILABLE, false, OCL_BOOL, 0, 0 },
1672     { &ATOM(execution_capabilities), CL_DEVICE_EXECUTION_CAPABILITIES, false, OCL_DEVICE_EXEC_CAPABILITIES, kv_exec_capabilities, 0 },
1673     { &ATOM(queue_properties), CL_DEVICE_QUEUE_PROPERTIES, false, OCL_QUEUE_PROPERTIES, kv_command_queue_properties, 0 },
1674     { &ATOM(name), CL_DEVICE_NAME, false, OCL_STRING, 0, 0 },
1675     { &ATOM(vendor), CL_DEVICE_VENDOR, false, OCL_STRING, 0, 0 },
1676     { &ATOM(driver_version), CL_DRIVER_VERSION, false, OCL_STRING, 0, 0 },
1677     { &ATOM(profile), CL_DEVICE_PROFILE, false, OCL_STRING, 0, 0 },
1678     { &ATOM(version), CL_DEVICE_VERSION, false, OCL_STRING, 0, 0 },
1679     { &ATOM(extensions), CL_DEVICE_EXTENSIONS, false, OCL_STRING, 0, 0 },
1680     { &ATOM(platform), CL_DEVICE_PLATFORM, false, OCL_PLATFORM, 0, 0 },
1681 #if CL_VERSION_1_1 == 1
1682     { &ATOM(preferred_vector_width_half), CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF,false, OCL_UINT, 0, 0 },
1683     { &ATOM(host_unified_memory), CL_DEVICE_HOST_UNIFIED_MEMORY,false,OCL_BOOL,0, 0},
1684     { &ATOM(native_vector_width_char), CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR,false,OCL_UINT, 0, 0},
1685     { &ATOM(native_vector_width_short), CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT,false,OCL_UINT, 0, 0},
1686     { &ATOM(native_vector_width_int), CL_DEVICE_NATIVE_VECTOR_WIDTH_INT,false,OCL_UINT, 0, 0},
1687     { &ATOM(native_vector_width_long), CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG,false,OCL_UINT, 0, 0},
1688     { &ATOM(native_vector_width_float), CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT,false,OCL_UINT, 0, 0},
1689     { &ATOM(native_vector_width_double), CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE,false,OCL_UINT, 0, 0},
1690     { &ATOM(native_vector_width_half), CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF,false,OCL_UINT, 0, 0},
1691     { &ATOM(opencl_c_version), CL_DEVICE_OPENCL_C_VERSION,false,OCL_STRING, 0, 0},
1692 #endif
1693     // cl_khr_fp64 extension || CL_VERSION_1_2 == 1
1694 #if CL_DEVICE_DOUBLE_FP_CONFIG
1695     { &ATOM(double_fp_config), CL_DEVICE_DOUBLE_FP_CONFIG, false, OCL_DEVICE_FP_CONFIG, kv_fp_config, 0 },
1696 #endif
1697     // cl_khr_fp16 extension || CL_VERSION_1_2 == 1
1698 #if CL_DEVICE_HALF_FP_CONFIG
1699     { &ATOM(half_fp_config), CL_DEVICE_HALF_FP_CONFIG, false, OCL_DEVICE_FP_CONFIG, kv_fp_config, 0 },
1700 #endif
1701 #if CL_VERSION_1_2 == 1
1702     { &ATOM(linker_available), CL_DEVICE_LINKER_AVAILABLE,false,OCL_BOOL, 0, 0},
1703     { &ATOM(built_in_kernels), CL_DEVICE_BUILT_IN_KERNELS,false, OCL_STRING, 0, 0},
1704     { &ATOM(image_max_buffer_size), CL_DEVICE_IMAGE_MAX_BUFFER_SIZE,false,OCL_SIZE, 0, 0},
1705     { &ATOM(image_max_array_size), CL_DEVICE_IMAGE_MAX_ARRAY_SIZE,false,OCL_SIZE, 0, 0},
1706     { &ATOM(parent_device), CL_DEVICE_PARENT_DEVICE,false,OCL_DEVICE, 0, 0},
1707     { &ATOM(partition_max_sub_devices), CL_DEVICE_PARTITION_MAX_SUB_DEVICES,false,OCL_SIZE, 0, 0},
1708     { &ATOM(partition_properties), CL_DEVICE_PARTITION_PROPERTIES,true,
1709       OCL_ENUM, kv_device_partition_property, 0},
1710 
1711     { &ATOM(partition_affinity_domain), CL_DEVICE_PARTITION_AFFINITY_DOMAIN,false,OCL_ENUM, kv_device_affinity_domain, 0 },
1712 
1713     { &ATOM(partition_type), CL_DEVICE_PARTITION_TYPE, false, OCL_DEVICE_PARTITION, 0, 0},
1714     { &ATOM(reference_count), CL_DEVICE_REFERENCE_COUNT, false, OCL_UINT, 0, 0 },
1715     { &ATOM(preferred_interop_user_sync), CL_DEVICE_PREFERRED_INTEROP_USER_SYNC,false, OCL_BOOL, 0, 0},
1716     { &ATOM(printf_buffer_size), CL_DEVICE_PRINTF_BUFFER_SIZE,false, OCL_SIZE, 0, 0 },
1717 #ifdef CL_DEVICE_IMAGE_PITCH_ALIGNMENT
1718     { &ATOM(image_pitch_alignment), CL_DEVICE_IMAGE_PITCH_ALIGNMENT, false, OCL_SIZE, 0, 0 },
1719 #endif
1720 #ifdef CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT
1721     { &ATOM(image_base_address_alignment), CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT, false, OCL_SIZE, 0, 0 },
1722 #endif
1723 #endif
1724 
1725     // cl_nv_device_attribute_query extension
1726 #ifdef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV
1727     { &ATOM(compute_capability_major_nv), CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, false, OCL_UINT, 0, 0},
1728 #endif
1729 #ifdef CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV
1730     { &ATOM(compute_capability_minor_nv), CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, false, OCL_UINT, 0, 0},
1731 #endif
1732 #ifdef CL_DEVICE_REGISTERS_PER_BLOCK_NV
1733     { &ATOM(registers_per_block_nv),CL_DEVICE_REGISTERS_PER_BLOCK_NV, false, OCL_UINT, 0, 0},
1734 #endif
1735 #ifdef CL_DEVICE_WARP_SIZE_NV
1736     { &ATOM(warp_size_nv),CL_DEVICE_WARP_SIZE_NV, false, OCL_UINT, 0, 0},
1737 #endif
1738 #ifdef CL_DEVICE_GPU_OVERLAP_NV
1739     { &ATOM(gpu_overlap_nv),CL_DEVICE_GPU_OVERLAP_NV, false, OCL_BOOL, 0, 0},
1740 #endif
1741 #ifdef CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV
1742     { &ATOM(kernel_exec_timeout_nv), CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, false, OCL_BOOL, 0, 0},
1743 #endif
1744 #ifdef CL_DEVICE_INTEGRATED_MEMORY_NV
1745     { &ATOM(device_integrated_memory_nv),CL_DEVICE_INTEGRATED_MEMORY_NV, false, OCL_BOOL, 0, 0},
1746 #endif
1747 
1748 };
1749 
1750 // Map device info index 0...N => cl_device_info x Data type
1751 ecl_info_t platform_info[] =
1752 {
1753     { &ATOM(profile), CL_PLATFORM_PROFILE, false, OCL_STRING, 0, 0 },
1754     { &ATOM(version), CL_PLATFORM_VERSION, false, OCL_STRING, 0, 0 },
1755     { &ATOM(name),    CL_PLATFORM_NAME,    false, OCL_STRING, 0, 0 },
1756     { &ATOM(vendor),  CL_PLATFORM_VENDOR,  false, OCL_STRING, 0, 0 },
1757     { &ATOM(extensions), CL_PLATFORM_EXTENSIONS, false, OCL_STRING, 0, 0 }
1758 };
1759 
1760 ecl_info_t context_info[] =
1761 {
1762     { &ATOM(reference_count), CL_CONTEXT_REFERENCE_COUNT, false, OCL_UINT, 0, 0 },
1763     { &ATOM(devices), CL_CONTEXT_DEVICES, true, OCL_DEVICE, 0, 0 },
1764     { &ATOM(properties), CL_CONTEXT_PROPERTIES, true, OCL_INT, 0, 0 }
1765 };
1766 
1767 ecl_info_t queue_info[] =
1768 {
1769     { &ATOM(context), CL_QUEUE_CONTEXT, false, OCL_CONTEXT, 0, 0 },
1770     { &ATOM(device),  CL_QUEUE_DEVICE, false, OCL_DEVICE, 0, 0 },
1771     { &ATOM(reference_count), CL_QUEUE_REFERENCE_COUNT, false, OCL_UINT, 0, 0 },
1772     { &ATOM(properties), CL_QUEUE_PROPERTIES, false, OCL_QUEUE_PROPERTIES, kv_command_queue_properties, 0 }
1773 };
1774 
1775 ecl_info_t mem_info[] =
1776 {
1777     { &ATOM(object_type), CL_MEM_TYPE, false, OCL_MEM_OBJECT_TYPE, kv_mem_object_type, 0 },
1778     { &ATOM(flags), CL_MEM_FLAGS, false, OCL_MEM_FLAGS, kv_mem_flags, 0 },
1779     { &ATOM(size),  CL_MEM_SIZE,  false, OCL_SIZE, 0, 0 },
1780     // FIXME: pointer!! map it (binary resource?)
1781     { &ATOM(host_ptr), CL_MEM_HOST_PTR, false, OCL_POINTER, 0, 0 },
1782     { &ATOM(map_count), CL_MEM_MAP_COUNT, false, OCL_UINT, 0, 0 },
1783     { &ATOM(reference_count), CL_MEM_REFERENCE_COUNT, false, OCL_UINT, 0, 0 },
1784     { &ATOM(context), CL_MEM_CONTEXT, false, OCL_CONTEXT, 0, 0 }
1785 };
1786 
1787 ecl_info_t image_info[] =
1788 {
1789     { &ATOM(format), CL_IMAGE_FORMAT, false, OCL_IMAGE_FORMAT, 0, 0 },
1790     { &ATOM(element_size), CL_IMAGE_ELEMENT_SIZE, false, OCL_SIZE, 0, 0 },
1791     { &ATOM(row_pitch),  CL_IMAGE_ROW_PITCH,  false, OCL_SIZE, 0, 0 },
1792     { &ATOM(slice_pitch), CL_IMAGE_SLICE_PITCH, false, OCL_SIZE, 0, 0 },
1793     { &ATOM(width), CL_IMAGE_WIDTH, false, OCL_SIZE, 0, 0 },
1794     { &ATOM(height), CL_IMAGE_HEIGHT, false, OCL_SIZE, 0, 0 },
1795     { &ATOM(depth), CL_IMAGE_DEPTH, false, OCL_SIZE, 0, 0 }
1796 };
1797 
1798 ecl_info_t sampler_info[] =
1799 {
1800     { &ATOM(reference_count), CL_SAMPLER_REFERENCE_COUNT, false, OCL_UINT, 0, 0},
1801     { &ATOM(context), CL_SAMPLER_CONTEXT, false,  OCL_CONTEXT, 0, 0 },
1802     { &ATOM(normalized_coords), CL_SAMPLER_NORMALIZED_COORDS, false, OCL_BOOL, 0, 0 },
1803     {  &ATOM(addressing_mode), CL_SAMPLER_ADDRESSING_MODE, false, OCL_SAMPLER_ADDRESSING_MODE, kv_addressing_mode, 0 },
1804     { &ATOM(filter_mode), CL_SAMPLER_FILTER_MODE, false, OCL_SAMPLER_FILTER_MODE, kv_filter_mode, 0 }
1805 };
1806 
1807 ecl_info_t program_info[] = {
1808     { &ATOM(reference_count), CL_PROGRAM_REFERENCE_COUNT, false, OCL_UINT, 0, 0 },
1809     { &ATOM(context), CL_PROGRAM_CONTEXT, false, OCL_CONTEXT, 0, 0 },
1810     { &ATOM(num_devices), CL_PROGRAM_NUM_DEVICES, false, OCL_UINT, 0, 0 },
1811     { &ATOM(devices), CL_PROGRAM_DEVICES, true, OCL_DEVICE, 0, 0 },
1812     { &ATOM(source), CL_PROGRAM_SOURCE, false, OCL_STRING, 0, 0 },
1813     { &ATOM(binary_sizes), CL_PROGRAM_BINARY_SIZES, true, OCL_SIZE, 0, 0 },
1814     { &ATOM(binaries), CL_PROGRAM_BINARIES, true, OCL_STRING, 0, 0 }
1815 };
1816 
1817 ecl_info_t build_info[] = {
1818     { &ATOM(status), CL_PROGRAM_BUILD_STATUS, false, OCL_BUILD_STATUS, kv_build_status, 0 },
1819     { &ATOM(options), CL_PROGRAM_BUILD_OPTIONS, false, OCL_STRING, 0, 0 },
1820     { &ATOM(log), CL_PROGRAM_BUILD_LOG, false, OCL_STRING, 0, 0 },
1821 #if CL_VERSION_1_2 == 1
1822     { &ATOM(binary_type), CL_PROGRAM_BINARY_TYPE, false, OCL_PROGRAM_BINARY_TYPE, kv_program_binary_type, 0 },
1823 #endif
1824 };
1825 
1826 ecl_info_t kernel_info[] = {
1827     { &ATOM(function_name), CL_KERNEL_FUNCTION_NAME, false, OCL_STRING, 0, 0 },
1828     { &ATOM(num_args), CL_KERNEL_NUM_ARGS, false, OCL_UINT, 0, 0 },
1829     { &ATOM(reference_count), CL_KERNEL_REFERENCE_COUNT, false, OCL_UINT, 0, 0 },
1830     { &ATOM(context), CL_KERNEL_CONTEXT, false, OCL_CONTEXT, 0, 0 },
1831     { &ATOM(program), CL_KERNEL_PROGRAM, false, OCL_PROGRAM, 0, 0 }
1832 };
1833 
1834 ecl_info_t workgroup_info[] = {
1835     { &ATOM(work_group_size), CL_KERNEL_WORK_GROUP_SIZE, false, OCL_SIZE, 0, sizeof(size_t)},
1836     { &ATOM(compile_work_group_size), CL_KERNEL_COMPILE_WORK_GROUP_SIZE, true, OCL_SIZE, 0, sizeof(size_t[3])},
1837     { &ATOM(local_mem_size), CL_KERNEL_LOCAL_MEM_SIZE, false, OCL_ULONG, 0, sizeof(cl_ulong)},
1838 #if CL_VERSION_1_1 == 1
1839     { &ATOM(preferred_work_group_size_multiple), CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, false,  OCL_SIZE, 0, sizeof(size_t) },
1840     { &ATOM(private_mem_size), CL_KERNEL_PRIVATE_MEM_SIZE, false, OCL_ULONG, 0, sizeof(cl_ulong)},
1841 #endif
1842 #if CL_VERSION_1_2 == 1
1843     { &ATOM(global_work_size), CL_KERNEL_GLOBAL_WORK_SIZE, true, OCL_SIZE, 0, sizeof(size_t[3])},
1844 #endif
1845 };
1846 
1847 ecl_info_t event_info[] = {
1848     { &ATOM(command_queue),  CL_EVENT_COMMAND_QUEUE, false, OCL_COMMAND_QUEUE, 0, 0 },
1849     { &ATOM(command_type),   CL_EVENT_COMMAND_TYPE, false,  OCL_ENUM, kv_command_type, 0 },
1850     { &ATOM(reference_count), CL_EVENT_REFERENCE_COUNT, false, OCL_UINT, 0, 0 },
1851     { &ATOM(execution_status), CL_EVENT_COMMAND_EXECUTION_STATUS, false, OCL_ENUM, kv_execution_status, 0 }
1852 };
1853 
1854 // clGetKernelArgInfo 1.2
1855 #if CL_VERSION_1_2 == 1
1856 
1857 ecl_kv_t kv_address_qualifier[] = {
1858     { &ATOM(global), CL_KERNEL_ARG_ADDRESS_GLOBAL },
1859     { &ATOM(local),  CL_KERNEL_ARG_ADDRESS_LOCAL },
1860     { &ATOM(constant), CL_KERNEL_ARG_ADDRESS_CONSTANT },
1861     { &ATOM(private), CL_KERNEL_ARG_ADDRESS_PRIVATE },
1862     { 0, 0 }
1863 };
1864 
1865 ecl_kv_t kv_access_qualifier[] = {
1866     { &ATOM(read_only), CL_KERNEL_ARG_ACCESS_READ_ONLY },
1867     { &ATOM(write_only), CL_KERNEL_ARG_ACCESS_WRITE_ONLY },
1868     { &ATOM(read_write), CL_KERNEL_ARG_ACCESS_READ_WRITE },
1869     { &ATOM(none), CL_KERNEL_ARG_ACCESS_NONE },
1870     { 0, 0 }
1871 };
1872 
1873 ecl_kv_t kv_type_qualifier[] = {
1874     { &ATOM(none), CL_KERNEL_ARG_TYPE_NONE },
1875     { &ATOM(const), CL_KERNEL_ARG_TYPE_CONST },
1876     { &ATOM(restrict), CL_KERNEL_ARG_TYPE_RESTRICT },
1877     { &ATOM(volatile), CL_KERNEL_ARG_TYPE_VOLATILE },
1878     { 0, 0 }
1879 };
1880 
1881 ecl_info_t arg_info[] = {
1882     { &ATOM(address_qualifier), CL_KERNEL_ARG_ADDRESS_QUALIFIER, false, OCL_ENUM, kv_address_qualifier },
1883     { &ATOM(access_qualifier), CL_KERNEL_ARG_ACCESS_QUALIFIER, false, OCL_ENUM, kv_access_qualifier },
1884     { &ATOM(type_name), CL_KERNEL_ARG_TYPE_NAME, false, OCL_STRING, 0 },
1885     { &ATOM(type_qualifier), CL_KERNEL_ARG_TYPE_QUALIFIER, false, OCL_ENUM, kv_type_qualifier },
1886     { &ATOM(name),  CL_KERNEL_ARG_NAME, false, OCL_STRING, 0 },
1887 };
1888 #endif
1889 
1890 // Error reasons
ecl_error(cl_int err)1891 ERL_NIF_TERM ecl_error(cl_int err)
1892 {
1893     switch(err) {
1894     case CL_DEVICE_NOT_FOUND:
1895 	return ATOM(device_not_found);
1896     case CL_DEVICE_NOT_AVAILABLE:
1897 	return ATOM(device_not_available);
1898     case CL_COMPILER_NOT_AVAILABLE:
1899 	return ATOM(compiler_not_available);
1900     case CL_MEM_OBJECT_ALLOCATION_FAILURE:
1901 	return ATOM(mem_object_allocation_failure);
1902     case CL_OUT_OF_RESOURCES:
1903 	return ATOM(out_of_resources);
1904     case CL_OUT_OF_HOST_MEMORY:
1905 	return ATOM(out_of_host_memory);
1906     case CL_PROFILING_INFO_NOT_AVAILABLE:
1907 	return ATOM(profiling_info_not_available);
1908     case CL_MEM_COPY_OVERLAP:
1909 	return ATOM(mem_copy_overlap);
1910     case CL_IMAGE_FORMAT_MISMATCH:
1911 	return ATOM(image_format_mismatch);
1912     case CL_IMAGE_FORMAT_NOT_SUPPORTED:
1913 	return ATOM(image_format_not_supported);
1914     case CL_BUILD_PROGRAM_FAILURE:
1915 	return ATOM(build_program_failure);
1916     case CL_MAP_FAILURE:
1917 	return ATOM(map_failure);
1918     case CL_INVALID_VALUE:
1919 	return ATOM(invalid_value);
1920     case CL_INVALID_DEVICE_TYPE:
1921 	return ATOM(invalid_device_type);
1922     case CL_INVALID_PLATFORM:
1923 	return ATOM(invalid_platform);
1924     case CL_INVALID_DEVICE:
1925 	return ATOM(invalid_device);
1926     case CL_INVALID_CONTEXT:
1927 	return ATOM(invalid_context);
1928     case CL_INVALID_QUEUE_PROPERTIES:
1929 	return ATOM(invalid_queue_properties);
1930     case CL_INVALID_COMMAND_QUEUE:
1931 	return ATOM(invalid_command_queue);
1932     case CL_INVALID_HOST_PTR:
1933 	return ATOM(invalid_host_ptr);
1934     case CL_INVALID_MEM_OBJECT:
1935 	return ATOM(invalid_mem_object);
1936     case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
1937 	return ATOM(invalid_image_format_descriptor);
1938     case CL_INVALID_IMAGE_SIZE:
1939 	return ATOM(invalid_image_size);
1940     case CL_INVALID_SAMPLER:
1941 	return ATOM(invalid_sampler);
1942     case CL_INVALID_BINARY:
1943 	return ATOM(invalid_binary);
1944     case CL_INVALID_BUILD_OPTIONS:
1945 	return ATOM(invalid_build_options);
1946     case CL_INVALID_PROGRAM:
1947 	return ATOM(invalid_program);
1948     case CL_INVALID_PROGRAM_EXECUTABLE:
1949 	return ATOM(invalid_program_executable);
1950     case CL_INVALID_KERNEL_NAME:
1951 	return ATOM(invalid_kernel_name);
1952     case CL_INVALID_KERNEL_DEFINITION:
1953 	return ATOM(invalid_kernel_definition);
1954     case CL_INVALID_KERNEL:
1955 	return ATOM(invalid_kernel);
1956     case CL_INVALID_ARG_INDEX:
1957 	return ATOM(invalid_arg_index);
1958     case CL_INVALID_ARG_VALUE:
1959 	return ATOM(invalid_arg_value);
1960     case CL_INVALID_ARG_SIZE:
1961 	return ATOM(invalid_arg_size);
1962     case CL_INVALID_KERNEL_ARGS:
1963 	return ATOM(invalid_kernel_args);
1964     case CL_INVALID_WORK_DIMENSION:
1965 	return ATOM(invalid_work_dimension);
1966     case CL_INVALID_WORK_GROUP_SIZE:
1967 	return ATOM(invalid_work_group_size);
1968     case CL_INVALID_WORK_ITEM_SIZE:
1969 	return ATOM(invalid_work_item_size);
1970     case CL_INVALID_GLOBAL_OFFSET:
1971 	return ATOM(invalid_global_offset);
1972     case CL_INVALID_EVENT_WAIT_LIST:
1973 	return ATOM(invalid_event_wait_list);
1974     case CL_INVALID_EVENT:
1975 	return ATOM(invalid_event);
1976     case CL_INVALID_OPERATION:
1977 	return ATOM(invalid_operation);
1978     case CL_INVALID_GL_OBJECT:
1979 	return ATOM(invalid_gl_object);
1980     case CL_INVALID_BUFFER_SIZE:
1981 	return ATOM(invalid_buffer_size);
1982     case CL_INVALID_MIP_LEVEL:
1983 	return ATOM(invalid_mip_level);
1984     case CL_INVALID_GLOBAL_WORK_SIZE:
1985 	return ATOM(invalid_global_work_size);
1986     default:
1987 	return ATOM(unknown);
1988     }
1989 }
1990 
ecl_make_error(ErlNifEnv * env,cl_int err)1991 ERL_NIF_TERM ecl_make_error(ErlNifEnv* env, cl_int err)
1992 {
1993     return enif_make_tuple2(env, ATOM(error), ecl_error(err));
1994 }
1995 
ecl_emit_error(char * file,int line,...)1996 static void ecl_emit_error(char* file, int line, ...)
1997 {
1998     va_list ap;
1999     char* fmt;
2000 
2001     va_start(ap, line);
2002     fmt = va_arg(ap, char*);
2003 
2004     fprintf(stderr, "%s:%d: ", file, line);
2005     vfprintf(stderr, fmt, ap);
2006     fprintf(stderr, "\r\n");
2007     va_end(ap);
2008     fflush(stderr);
2009 }
2010 
2011 // Parse bool
get_bool(ErlNifEnv * env,const ERL_NIF_TERM key,cl_bool * val)2012 static int get_bool(ErlNifEnv* env, const ERL_NIF_TERM key, cl_bool* val)
2013 {
2014     UNUSED(env);
2015     if (key == ATOM(true)) {
2016 	*val = true;
2017 	return 1;
2018     }
2019     else if (key == ATOM(false)) {
2020 	*val = false;
2021 	return 1;
2022     }
2023     return 0;
2024 }
2025 
2026 
2027 // Parse enum
get_enum(ErlNifEnv * env,const ERL_NIF_TERM key,cl_uint * num,ecl_kv_t * kv)2028 static int get_enum(ErlNifEnv* env, const ERL_NIF_TERM key,
2029 		    cl_uint* num, ecl_kv_t* kv)
2030 {
2031     UNUSED(env);
2032 
2033     if (!enif_is_atom(env, key))
2034 	return 0;
2035     while(kv->key) {
2036 	if (*kv->key == key) {
2037 	    *num = (cl_uint) kv->value;
2038 	    return 1;
2039 	}
2040 	kv++;
2041     }
2042     return 0;
2043 }
2044 
2045 // Parse bitfield
get_bitfield(ErlNifEnv * env,const ERL_NIF_TERM key,cl_bitfield * field,ecl_kv_t * kv)2046 static int get_bitfield(ErlNifEnv* env, const ERL_NIF_TERM key,
2047 			cl_bitfield* field, ecl_kv_t* kv)
2048 {
2049     UNUSED(env);
2050 
2051     if (!enif_is_atom(env, key))
2052 	return 0;
2053     while(kv->key) {
2054 	if (*kv->key == key) {
2055 	    *field = kv->value;
2056 	    return 1;
2057 	}
2058 	kv++;
2059     }
2060     return 0;
2061 }
2062 
2063 
get_bitfields(ErlNifEnv * env,const ERL_NIF_TERM term,cl_bitfield * field,ecl_kv_t * kv)2064 static int get_bitfields(ErlNifEnv* env, const ERL_NIF_TERM term,
2065 			 cl_bitfield* field, ecl_kv_t* kv)
2066 {
2067     cl_bitfield t;
2068 
2069     if (enif_is_atom(env, term)) {
2070 	if (!get_bitfield(env, term, &t, kv))
2071 	    return 0;
2072 	*field = t;
2073 	return 1;
2074     }
2075     else if (enif_is_empty_list(env, term)) {
2076 	*field = 0;
2077 	return 1;
2078     }
2079     else if (enif_is_list(env, term)) {
2080 	cl_bitfield fs = 0;
2081 	ERL_NIF_TERM list = term;
2082 	ERL_NIF_TERM head, tail;
2083 
2084 	while(enif_get_list_cell(env, list, &head, &tail)) {
2085 	    if (!get_bitfield(env, head, &t, kv))
2086 		return 0;
2087 	    fs |= t;
2088 	    list = tail;
2089 	}
2090 	if (!enif_is_empty_list(env, list))
2091 	    return 0;
2092 	*field = fs;
2093 	return 1;
2094     }
2095     return 0;
2096 }
2097 
make_enum(ErlNifEnv * env,cl_uint num,ecl_kv_t * kv)2098 ERL_NIF_TERM make_enum(ErlNifEnv* env, cl_uint num, ecl_kv_t* kv)
2099 {
2100     while(kv->key) {
2101 	if (num == (cl_uint)kv->value)
2102 	    return *kv->key;
2103 	kv++;
2104     }
2105     return enif_make_uint(env, num);
2106 }
2107 
make_bitfields(ErlNifEnv * env,cl_bitfield v,ecl_kv_t * kv)2108 ERL_NIF_TERM make_bitfields(ErlNifEnv* env, cl_bitfield v, ecl_kv_t* kv)
2109 {
2110     ERL_NIF_TERM list = enif_make_list(env, 0);
2111 
2112     if (v) {
2113 	int n = 0;
2114 	while(kv->key) {
2115 	    kv++;
2116 	    n++;
2117 	}
2118 	while(n--) {
2119 	    kv--;
2120 	    if ((kv->value & v) == kv->value)
2121 		list = enif_make_list_cell(env, *kv->key, list);
2122 	}
2123     }
2124     return list;
2125 }
2126 
2127 
2128 
2129 /******************************************************************************
2130  *
2131  *   Linear hash functions
2132  *
2133  *****************************************************************************/
2134 
2135 #define EPTR_HANDLE(ptr) ((intptr_t)(ptr))
2136 
ref_hash(void * key)2137 static lhash_value_t ref_hash(void* key)
2138 {
2139     return (lhash_value_t) key;
2140 }
2141 
ref_cmp(void * key,void * data)2142 static int ref_cmp(void* key, void* data)
2143 {
2144     if (((intptr_t)key) == EPTR_HANDLE(((ecl_object_t*)data)->opaque))
2145 	return 0;
2146     return 1;
2147 }
2148 
ref_release(void * data)2149 static void ref_release(void *data)
2150 {
2151     UNUSED(data);
2152     // object's are free'd by garbage collection
2153 }
2154 
2155 // Remove object from hash
object_erase(ecl_object_t * obj)2156 static void object_erase(ecl_object_t* obj)
2157 {
2158     ecl_env_t* ecl = obj->env;
2159     enif_rwlock_rwlock(ecl->ref_lock);
2160     lhash_erase(&ecl->ref, (void*)EPTR_HANDLE(obj->opaque));
2161     enif_rwlock_rwunlock(ecl->ref_lock);
2162 }
2163 
2164 /******************************************************************************
2165  *
2166  *   Message queue
2167  *
2168  *****************************************************************************/
2169 
2170 // Peek at queue front
2171 #if 0
2172 static ecl_message_t* ecl_queue_peek(ecl_queue_t* q)
2173 {
2174     ecl_qlink_t* ql;
2175 
2176     enif_mutex_lock(q->mtx);
2177     ql = q->front;
2178     enif_mutex_unlock(q->mtx);
2179     if (ql)
2180 	return &ql->mesg;
2181     else
2182 	return 0;
2183 }
2184 #endif
2185 
2186 // Get message from queue front
ecl_queue_get(ecl_queue_t * q,ecl_message_t * m)2187 static int ecl_queue_get(ecl_queue_t* q, ecl_message_t* m)
2188 {
2189     ecl_qlink_t* ql;
2190 
2191     enif_mutex_lock(q->mtx);
2192     while(!(ql = q->front)) {
2193 	enif_cond_wait(q->cv, q->mtx);
2194     }
2195     if (!(q->front = ql->next))
2196 	q->rear = 0;
2197     q->len--;
2198 
2199     *m = ql->mesg;
2200 
2201     if ((ql >= &q->ql[0]) && (ql <= &q->ql[MAX_QLINK-1])) {
2202 	ql->next = q->free;
2203 	q->free = ql;
2204     }
2205     else
2206 	enif_free(ql);
2207     enif_mutex_unlock(q->mtx);
2208     return 0;
2209 }
2210 
2211 // Put message at queue rear
ecl_queue_put(ecl_queue_t * q,ecl_message_t * m)2212 static int ecl_queue_put(ecl_queue_t* q, ecl_message_t* m)
2213 {
2214     ecl_qlink_t* ql;
2215     ecl_qlink_t* qr;
2216     int res = 0;
2217 
2218     enif_mutex_lock(q->mtx);
2219 
2220     if ((ql = q->free))
2221 	q->free = ql->next;
2222     else
2223 	ql = enif_alloc(sizeof(ecl_qlink_t));
2224     if (!ql)
2225 	res = -1;
2226     else {
2227 	ql->mesg = *m;
2228 	q->len++;
2229 	ql->next = 0;
2230 	if (!(qr = q->rear)) {
2231 	    q->front = ql;
2232 	    enif_cond_signal(q->cv);
2233 	}
2234 	else
2235 	    qr->next = ql;
2236 	q->rear = ql;
2237     }
2238     enif_mutex_unlock(q->mtx);
2239     return res;
2240 }
2241 
ecl_queue_init(ecl_queue_t * q)2242 static int ecl_queue_init(ecl_queue_t* q)
2243 {
2244     int i;
2245     if (!(q->cv     = enif_cond_create("queue_cv")))
2246 	return -1;
2247     if (!(q->mtx    = enif_mutex_create("queue_mtx")))
2248 	return -1;
2249     q->front  = 0;
2250     q->rear   = 0;
2251     q->len    = 0;
2252     for (i = 0; i < MAX_QLINK-1; i++)
2253 	q->ql[i].next = &q->ql[i+1];
2254     q->ql[MAX_QLINK-1].next = 0;
2255     q->free = &q->ql[0];
2256     return 0;
2257 }
2258 
ecl_queue_destroy(ecl_queue_t * q)2259 static void ecl_queue_destroy(ecl_queue_t* q)
2260 {
2261     ecl_qlink_t* ql;
2262 
2263     enif_cond_destroy(q->cv);
2264     enif_mutex_destroy(q->mtx);
2265 
2266     ql = q->front;
2267     while(ql) {
2268 	ecl_qlink_t* qln = ql->next;
2269 	if ((ql >= &q->ql[0]) && (ql <= &q->ql[MAX_QLINK-1]))
2270 	    ;
2271 	else
2272 	    enif_free(ql);
2273 	ql = qln;
2274     }
2275 }
2276 
2277 /******************************************************************************
2278  *
2279  *   Threads
2280  *
2281  *****************************************************************************/
2282 
ecl_message_send(ecl_thread_t * thr,ecl_message_t * m)2283 static int ecl_message_send(ecl_thread_t* thr, ecl_message_t* m)
2284 {
2285     return ecl_queue_put(&thr->q, m);
2286 }
2287 
ecl_message_recv(ecl_thread_t * thr,ecl_message_t * m)2288 static int ecl_message_recv(ecl_thread_t* thr, ecl_message_t* m)
2289 {
2290     int r;
2291     if ((r = ecl_queue_get(&thr->q, m)) < 0)
2292 	return r;
2293     return 0;
2294 }
2295 
2296 #if 0
2297 static ecl_message_t* ecl_message_peek(ecl_thread_t* thr, ecl_thread_t** from)
2298 {
2299     ecl_message_t* m;
2300     if ((m = ecl_queue_peek(&thr->q))) {
2301 	if (from)
2302 	    *from = m->sender;
2303     }
2304     return m;
2305 }
2306 #endif
2307 
ecl_thread_start(void * (* func)(void * arg),void * arg,int stack_size)2308 static ecl_thread_t* ecl_thread_start(void* (*func)(void* arg),
2309 				      void* arg, int stack_size)
2310 {
2311     ErlNifThreadOpts* opts;
2312     ecl_thread_t* thr;
2313 
2314     if (!(thr = enif_alloc(sizeof(ecl_thread_t))))
2315 	return 0;
2316     if (ecl_queue_init(&thr->q) < 0)
2317 	goto error;
2318     if (!(opts = enif_thread_opts_create("ecl_thread_opts")))
2319 	goto error;
2320     opts->suggested_stack_size = stack_size;
2321     thr->arg = arg;
2322 
2323     enif_thread_create("ecl_thread", &thr->tid, func, thr, opts);
2324     enif_thread_opts_destroy(opts);
2325     return thr;
2326 error:
2327     enif_free(thr);
2328     return 0;
2329 }
2330 
ecl_thread_stop(ecl_thread_t * thr,void ** exit_value)2331 static int ecl_thread_stop(ecl_thread_t* thr, void** exit_value)
2332 {
2333     ecl_message_t m;
2334 
2335     m.type   = ECL_MESSAGE_STOP;
2336     m.env    = 0;
2337     ecl_message_send(thr, &m);
2338     enif_thread_join(thr->tid, exit_value);
2339     ecl_queue_destroy(&thr->q);
2340     enif_free(thr);
2341     return 0;
2342 }
2343 
ecl_thread_exit(void * value)2344 static void ecl_thread_exit(void* value)
2345 {
2346     enif_thread_exit(value);
2347 }
2348 
2349 /******************************************************************************
2350  *
2351  *   Ecl resource
2352  *
2353  *****************************************************************************/
2354 
ecl_resource_init(ErlNifEnv * env,ecl_resource_t * res,char * name,size_t size,void (* dtor)(ErlNifEnv *,ecl_object_t *),ErlNifResourceFlags flags,ErlNifResourceFlags * tried)2355 static int ecl_resource_init(ErlNifEnv* env,
2356 			     ecl_resource_t* res,
2357 			     char* name,
2358 			     size_t size,  // object size
2359 			     void (*dtor)(ErlNifEnv*, ecl_object_t*),
2360 			     ErlNifResourceFlags flags,
2361 			     ErlNifResourceFlags* tried)
2362 {
2363     res->name = name;
2364     res->type = enif_make_atom(env, name);
2365     res->size = size;
2366     res->res  = enif_open_resource_type(env, 0, name,
2367 					(ErlNifResourceDtor*) dtor,
2368 					flags, tried);
2369     return 0;
2370 }
2371 
2372 //
2373 // Reference new kernel argument and Dereference old value
2374 //
2375 
unref_kernel_arg(int type,void * val)2376 static void unref_kernel_arg(int type, void* val)
2377 {
2378     switch(type) {
2379     case KERNEL_ARG_MEM:
2380 	if (val)
2381 	    clReleaseMemObject((cl_mem) val);
2382 	break;
2383     case KERNEL_ARG_SAMPLER:
2384 	if (val)
2385 	    clReleaseSampler((cl_sampler) val);
2386 	break;
2387     case KERNEL_ARG_OTHER:
2388     default:
2389 	break;
2390     }
2391 }
2392 
ref_kernel_arg(int type,void * val)2393 static void ref_kernel_arg(int type, void* val)
2394 {
2395     switch(type) {
2396     case KERNEL_ARG_MEM:
2397 	if (val)
2398 	    clRetainMemObject((cl_mem) val);
2399 	break;
2400     case KERNEL_ARG_SAMPLER:
2401 	if (val)
2402 	    clRetainSampler((cl_sampler) val);
2403 	break;
2404     case KERNEL_ARG_OTHER:
2405     default:
2406 	break;
2407     }
2408 }
2409 
set_kernel_arg(ecl_kernel_t * kern,cl_uint i,int type,void * value)2410 static int set_kernel_arg(ecl_kernel_t* kern, cl_uint i, int type, void* value)
2411 {
2412     if (i < kern->num_args) {
2413 	int   old_type  = kern->arg[i].type;
2414 	void* old_value = kern->arg[i].value;
2415 	ref_kernel_arg(type, value);
2416 	kern->arg[i].type  = type;
2417 	kern->arg[i].value = value;
2418 	unref_kernel_arg(old_type, old_value);
2419 	return 0;
2420     }
2421     return -1;
2422 }
2423 
2424 /******************************************************************************
2425  *
2426  *   Resource destructors
2427  *
2428  *****************************************************************************/
2429 
ecl_platform_dtor(ErlNifEnv * env,ecl_object_t * obj)2430 static void ecl_platform_dtor(ErlNifEnv* env, ecl_object_t* obj)
2431 {
2432     UNUSED(env);
2433     UNUSED(obj);
2434     DBG("ecl_platform_dtor: %p", obj);
2435     object_erase(obj);
2436     if (obj->parent) enif_release_resource(obj->parent);
2437 }
2438 
ecl_device_dtor(ErlNifEnv * env,ecl_object_t * obj)2439 static void ecl_device_dtor(ErlNifEnv* env, ecl_object_t* obj)
2440 {
2441     UNUSED(env);
2442     UNUSED(obj);
2443     DBG("ecl_device_dtor: %p", obj);
2444     object_erase(obj);
2445     if (obj->parent) enif_release_resource(obj->parent);
2446 }
2447 
ecl_queue_dtor(ErlNifEnv * env,ecl_object_t * obj)2448 static void ecl_queue_dtor(ErlNifEnv* env, ecl_object_t* obj)
2449 {
2450     UNUSED(env);
2451     DBG("ecl_queue_dtor: %p", obj);
2452     clReleaseCommandQueue(obj->queue);
2453     object_erase(obj);
2454     if (obj->parent) enif_release_resource(obj->parent);
2455 }
2456 
ecl_mem_dtor(ErlNifEnv * env,ecl_object_t * obj)2457 static void ecl_mem_dtor(ErlNifEnv* env, ecl_object_t* obj)
2458 {
2459     UNUSED(env);
2460     DBG("ecl_mem_dtor: %p", obj);
2461     clReleaseMemObject(obj->mem);
2462     object_erase(obj);
2463     if (obj->parent) enif_release_resource(obj->parent);
2464 }
2465 
ecl_sampler_dtor(ErlNifEnv * env,ecl_object_t * obj)2466 static void ecl_sampler_dtor(ErlNifEnv* env, ecl_object_t* obj)
2467 {
2468     UNUSED(env);
2469     DBG("ecl_sampler_dtor: %p", obj);
2470     clReleaseSampler(obj->sampler);
2471     object_erase(obj);
2472     if (obj->parent) enif_release_resource(obj->parent);
2473 }
2474 
ecl_program_dtor(ErlNifEnv * env,ecl_object_t * obj)2475 static void ecl_program_dtor(ErlNifEnv* env, ecl_object_t* obj)
2476 {
2477     UNUSED(env);
2478     DBG("ecl_program_dtor: %p", obj);
2479     clReleaseProgram(obj->program);
2480     object_erase(obj);
2481     if (obj->parent) enif_release_resource(obj->parent);
2482 }
2483 
ecl_kernel_dtor(ErlNifEnv * env,ecl_object_t * obj)2484 static void ecl_kernel_dtor(ErlNifEnv* env, ecl_object_t* obj)
2485 {
2486     ecl_kernel_t* kern = (ecl_kernel_t*) obj;
2487     cl_uint i;
2488     UNUSED(env);
2489     DBG("ecl_kernel_dtor: %p", kern);
2490     for (i = 0; i < kern->num_args; i++)
2491 	unref_kernel_arg(kern->arg[i].type, kern->arg[i].value);
2492     enif_free(kern->arg);
2493     clReleaseKernel(kern->obj.kernel);
2494     object_erase(obj);
2495     if (obj->parent) enif_release_resource(obj->parent);
2496 }
2497 
ecl_event_dtor(ErlNifEnv * env,ecl_object_t * obj)2498 static void ecl_event_dtor(ErlNifEnv* env, ecl_object_t* obj)
2499 {
2500     ecl_event_t* evt = (ecl_event_t*) obj;
2501     UNUSED(env);
2502     DBG("ecl_event_dtor: %p", evt);
2503     clReleaseEvent(evt->obj.event);
2504     object_erase(obj);
2505     if (evt->bin) {
2506 	if (!evt->rl)
2507 	    enif_release_binary(evt->bin);
2508 	enif_free(evt->bin);
2509     }
2510     if (evt->bin_env)
2511 	enif_free_env(evt->bin_env);
2512     if (obj->parent) enif_release_resource(obj->parent);
2513 }
2514 
ecl_context_dtor(ErlNifEnv * env,ecl_object_t * obj)2515 static void ecl_context_dtor(ErlNifEnv* env, ecl_object_t* obj)
2516 {
2517     void* exit_value;
2518     ecl_context_t* ctx = (ecl_context_t*) obj;
2519     ecl_context_t** pp;
2520     ecl_env_t* ecl = enif_priv_data(env);
2521     ecl_thread_t* thr = ctx->thr;
2522 
2523     DBG("ecl_context_dtor: %p", ctx);
2524 
2525     enif_rwlock_rwlock(ecl->context_list_lock);
2526     pp = &ecl->context_list;
2527     while(*pp != ctx)
2528 	pp = &(*pp)->next;
2529     *pp = ctx->next;
2530     enif_rwlock_rwunlock(ecl->context_list_lock);
2531 
2532     clReleaseContext(ctx->obj.context);
2533     object_erase(obj);
2534     // parent is always = 0
2535     // kill the event thread
2536     ecl_thread_stop(thr, &exit_value);
2537 }
2538 
2539 
2540 /******************************************************************************
2541  *
2542  *   make/get
2543  *
2544  *****************************************************************************/
2545 
2546 // For now, wrap the resource object {type,pointer-val,handle}
make_object(ErlNifEnv * env,const ERL_NIF_TERM type,void * robject)2547 static ERL_NIF_TERM make_object(ErlNifEnv* env, const ERL_NIF_TERM type,
2548 				void* robject)
2549 {
2550     if (!robject)
2551 	return ATOM(undefined);
2552     else
2553 	return enif_make_tuple3(env,
2554 				type,
2555 				ecl_make_sizet(env, (size_t) robject),
2556 				enif_make_resource(env, robject));
2557 }
2558 
2559 // Accept {type,pointer-val,handle}
get_ecl_object(ErlNifEnv * env,const ERL_NIF_TERM term,ecl_resource_t * rtype,bool nullp,ecl_object_t ** robjectp)2560 static int get_ecl_object(ErlNifEnv* env, const ERL_NIF_TERM term,
2561 			  ecl_resource_t* rtype, bool nullp,
2562 			  ecl_object_t** robjectp)
2563 {
2564     const ERL_NIF_TERM* elem;
2565     int arity;
2566     size_t handle;  // not really a size_t but the type has a good size
2567 
2568     if (nullp && (term == ATOM(undefined))) {
2569 	*robjectp = 0;
2570 	return 1;
2571     }
2572     if (!enif_get_tuple(env, term, &arity, &elem))
2573 	return 0;
2574     if (arity != 3)
2575 	return 0;
2576     if (!enif_is_atom(env, elem[0]) || (elem[0] != rtype->type))
2577 	return 0;
2578     if (!ecl_get_sizet(env, elem[1], &handle))
2579 	return 0;
2580     if (!enif_get_resource(env, elem[2], rtype->res, (void**) robjectp))
2581 	return 0;
2582     if ((size_t)*robjectp != handle)
2583 	return 0;
2584     return 1;
2585 }
2586 
2587 #if 0
2588 static int get_ecl_object_list(ErlNifEnv* env, const ERL_NIF_TERM term,
2589 			       ecl_resource_t* rtype, bool nullp,
2590 			       ecl_object_t** robjv, size_t* rlen)
2591 {
2592     size_t maxlen = *rlen;
2593     size_t n = 0;
2594     ERL_NIF_TERM list = term;
2595 
2596     while(n < maxlen) {
2597 	ERL_NIF_TERM head, tail;
2598 
2599 	if (enif_get_list_cell(env, list, &head, &tail)) {
2600 	    if (!get_ecl_object(env, head, rtype, nullp, robjv))
2601 		return 0;
2602 	    n++;
2603 	    robjv++;
2604 	    list = tail;
2605 	}
2606 	else if (enif_is_empty_list(env, list)) {
2607 	    *rlen = n;
2608 	    return 1;
2609 	}
2610 	else
2611 	    return 0;
2612     }
2613     return 0;
2614 }
2615 #endif
2616 
get_object(ErlNifEnv * env,const ERL_NIF_TERM term,ecl_resource_t * rtype,bool nullp,void ** rptr)2617 static int get_object(ErlNifEnv* env, const ERL_NIF_TERM term,
2618 		      ecl_resource_t* rtype, bool nullp,
2619 		      void** rptr)
2620 {
2621     ecl_object_t* obj;
2622     if (get_ecl_object(env, term, rtype, nullp, &obj)) {
2623 	*rptr = obj ? obj->opaque : 0;
2624 	return 1;
2625     }
2626     return 0;
2627 }
2628 
get_object_list(ErlNifEnv * env,const ERL_NIF_TERM term,ecl_resource_t * rtype,bool nullp,void ** robjv,cl_uint * rlen)2629 static int get_object_list(ErlNifEnv* env, const ERL_NIF_TERM term,
2630 			   ecl_resource_t* rtype, bool nullp,
2631 			   void** robjv, cl_uint* rlen)
2632 {
2633     cl_uint maxlen = *rlen;
2634     cl_uint n = 0;
2635     ERL_NIF_TERM list = term;
2636 
2637     while(n < maxlen) {
2638 	ERL_NIF_TERM head, tail;
2639 
2640 	if (enif_get_list_cell(env, list, &head, &tail)) {
2641 	    if (!get_object(env, head, rtype, nullp, robjv))
2642 		return 0;
2643 	    n++;
2644 	    robjv++;
2645 	    list = tail;
2646 	}
2647 	else if (enif_is_empty_list(env, list)) {
2648 	    *rlen = n;
2649 	    return 1;
2650 	}
2651 	else
2652 	    return 0;
2653     }
2654     return 0;
2655 }
2656 
2657 
2658 
get_sizet_list(ErlNifEnv * env,const ERL_NIF_TERM term,size_t * rvec,size_t * rlen)2659 static int get_sizet_list(ErlNifEnv* env, const ERL_NIF_TERM term,
2660 			  size_t* rvec, size_t* rlen)
2661 {
2662     size_t maxlen = *rlen;
2663     size_t n = 0;
2664     ERL_NIF_TERM list = term;
2665 
2666     while(n < maxlen) {
2667 	ERL_NIF_TERM head, tail;
2668 
2669 	if (enif_get_list_cell(env, list, &head, &tail)) {
2670 	    if (!ecl_get_sizet(env, head, rvec))
2671 		return 0;
2672 	    n++;
2673 	    rvec++;
2674 	    list = tail;
2675 	}
2676 	else if (enif_is_empty_list(env, list)) {
2677 	    *rlen = n;
2678 	    return 1;
2679 	}
2680 	else
2681 	    return 0;
2682     }
2683     if (enif_is_empty_list(env, list)) {
2684 	*rlen = n;
2685 	return 1;
2686     }
2687     return 0;
2688 }
2689 
get_binary_list(ErlNifEnv * env,const ERL_NIF_TERM term,ErlNifBinary * rvec,size_t * rlen)2690 static int get_binary_list(ErlNifEnv* env, const ERL_NIF_TERM term,
2691 			   ErlNifBinary* rvec, size_t* rlen)
2692 {
2693     size_t maxlen = *rlen;
2694     size_t n = 0;
2695     ERL_NIF_TERM list = term;
2696 
2697     while(n < maxlen) {
2698 	ERL_NIF_TERM head, tail;
2699 
2700 	if (enif_get_list_cell(env, list, &head, &tail)) {
2701 	    if (!enif_inspect_binary(env, head, rvec))
2702 		return 0;
2703 	    n++;
2704 	    rvec++;
2705 	    list = tail;
2706 	}
2707 	else if (enif_is_empty_list(env, list)) {
2708 	    *rlen = n;
2709 	    return 1;
2710 	}
2711 	else
2712 	    return 0;
2713     }
2714     return 0;
2715 }
2716 
2717 #if CL_VERSION_1_2 == 1
2718 // avoid warning
2719 // currently onlt used my compile_program which is a 1.2 function
2720 
2721 // free an array of strings
free_string_list(char ** rvec,size_t n)2722 static void free_string_list(char** rvec, size_t n)
2723 {
2724     int i;
2725     for (i = 0; i < (int)n; i++)
2726 	enif_free(rvec[i]);
2727 }
2728 
2729 // get a list of, max *rlen, dynamically allocated, strings.
get_string_list(ErlNifEnv * env,const ERL_NIF_TERM term,char ** rvec,size_t * rlen)2730 static int get_string_list(ErlNifEnv* env, const ERL_NIF_TERM term,
2731 			   char** rvec, size_t* rlen)
2732 {
2733     char** rvec0 = rvec;
2734     size_t maxlen = *rlen;
2735     size_t n = 0;
2736     ERL_NIF_TERM list = term;
2737     ERL_NIF_TERM head, tail;
2738     while((n < maxlen) &&
2739 	  enif_get_list_cell(env, list, &head, &tail)) {
2740 	char* str;
2741 	unsigned int len;
2742 	if (!enif_get_list_length(env, head, &len))
2743 	    goto error;
2744 	if (!(str = enif_alloc(len+1)))
2745 	    goto error;
2746 	if (!enif_get_string(env, head, str, len+1, ERL_NIF_LATIN1))
2747 	    goto error;
2748 	*rvec++ = str;
2749 	n++;
2750 	list = tail;
2751     }
2752     if (enif_is_empty_list(env, list)) {
2753 	*rlen = n;
2754 	return 1;
2755     }
2756 error:
2757     free_string_list(rvec0, rvec-rvec0);
2758     return 0;
2759 }
2760 #endif
2761 
2762 // Copy a "local" binary to a new process independent environment
2763 // fill the binary structure with the new data and return it.
2764 //
ecl_make_binary(ErlNifEnv * src_env,const ERL_NIF_TERM src,ErlNifEnv * dst_env,ErlNifBinary * bin)2765 static int ecl_make_binary(ErlNifEnv* src_env,
2766 			   const ERL_NIF_TERM src,
2767 			   ErlNifEnv* dst_env,
2768 			   ErlNifBinary* bin)
2769 {
2770     ERL_NIF_TERM ref_counted;
2771 
2772     if (enif_is_binary(src_env, src)) {
2773 	// Update refc (and/or fix heap binaries)
2774 	ref_counted = enif_make_copy(dst_env, src);
2775 	return enif_inspect_binary(dst_env, ref_counted, bin);
2776     } else {
2777 	//  iolist to binary
2778 	if (!enif_inspect_iolist_as_binary(src_env, src, bin))
2779 	    return 0;
2780 	// ref count binary ?
2781 	enif_make_binary(dst_env, bin);
2782 	return 1;
2783     }
2784 }
2785 
2786 
2787 // Lookup a openCL object (native => reource ecl_object_t*)
ecl_lookup(ErlNifEnv * env,void * ptr)2788 static ecl_object_t* ecl_lookup(ErlNifEnv* env, void* ptr)
2789 {
2790     if (!ptr)
2791 	return 0;
2792     else {
2793 	ecl_env_t* ecl = enif_priv_data(env);
2794 	ecl_object_t* obj;
2795 
2796 	enif_rwlock_rlock(ecl->ref_lock);
2797 	obj = (ecl_object_t*) lhash_lookup(&ecl->ref,(void*)EPTR_HANDLE(ptr));
2798 	enif_rwlock_runlock(ecl->ref_lock);
2799 	return obj;
2800     }
2801 }
2802 
2803 // Create a new openCL resource object
ecl_new(ErlNifEnv * env,ecl_resource_t * rtype,void * ptr,ecl_object_t * parent,cl_int version)2804 static ecl_object_t* ecl_new(ErlNifEnv* env, ecl_resource_t* rtype,
2805 			     void* ptr, ecl_object_t* parent, cl_int version)
2806 {
2807     if (!ptr)
2808 	return 0;
2809     else {
2810 	ecl_env_t* ecl = enif_priv_data(env);
2811 	ecl_object_t* obj;
2812 
2813 	obj = enif_alloc_resource(rtype->res, rtype->size);
2814 	if (obj) {
2815 	    if (parent)	enif_keep_resource(parent);
2816 	    obj->opaque = ptr;
2817 	    obj->env    = ecl;
2818 	    obj->parent = parent;
2819 	    if(version == -1) {
2820 		version = parent ? parent->version : ecl->icd_version;
2821 	    }
2822 	    obj->version = (version < ecl->icd_version) ? version : ecl->icd_version;
2823 	    enif_rwlock_rwlock(ecl->ref_lock);
2824 	    lhash_insert_new(&ecl->ref, (void*)EPTR_HANDLE(ptr), obj);
2825 	    enif_rwlock_rwunlock(ecl->ref_lock);
2826 	}
2827 	return obj;
2828     }
2829 }
2830 
ecl_make_object(ErlNifEnv * env,ecl_resource_t * rtype,void * ptr,ecl_object_t * parent)2831 static ERL_NIF_TERM ecl_make_object(ErlNifEnv* env, ecl_resource_t* rtype,
2832 				    void* ptr, ecl_object_t* parent)
2833 {
2834     ecl_object_t* obj = ecl_new(env,rtype,ptr,parent,-1);
2835     ERL_NIF_TERM  res;
2836     res = make_object(env, rtype->type, obj);
2837     if (obj)
2838 	enif_release_resource(obj);
2839     return res;
2840 }
2841 
2842 
2843 // lookup or create a new ecl_object_t resource
ecl_maybe_new(ErlNifEnv * env,ecl_resource_t * rtype,void * ptr,ecl_object_t * parent,bool * is_new)2844 static ecl_object_t* ecl_maybe_new(ErlNifEnv* env, ecl_resource_t* rtype,
2845 				   void* ptr, ecl_object_t* parent,
2846 				   bool* is_new)
2847 {
2848     ecl_object_t* obj = ecl_lookup(env, ptr);
2849     if (!obj) {
2850 	obj = ecl_new(env, rtype, ptr, parent,-1);
2851 	*is_new = true;
2852     }
2853     else
2854 	*is_new = false;
2855     return obj;
2856 }
2857 
2858 
2859 // lookup or create resource object, return as erlang term
ecl_lookup_object(ErlNifEnv * env,ecl_resource_t * rtype,void * ptr,ecl_object_t * parent)2860 static ERL_NIF_TERM ecl_lookup_object(ErlNifEnv* env, ecl_resource_t* rtype,
2861 				      void* ptr, ecl_object_t* parent)
2862 {
2863     bool is_new;
2864     ERL_NIF_TERM  res;
2865     ecl_object_t* obj = ecl_maybe_new(env,rtype,ptr,parent,&is_new);
2866 
2867     res = make_object(env, rtype->type, obj);
2868     if (obj && is_new)
2869 	enif_release_resource(obj);
2870     return res;
2871 }
2872 
ecl_make_kernel(ErlNifEnv * env,cl_kernel kernel,ecl_object_t * parent)2873 static ERL_NIF_TERM ecl_make_kernel(ErlNifEnv* env, cl_kernel kernel,
2874 				    ecl_object_t* parent)
2875 {
2876     ecl_kernel_t* kern = (ecl_kernel_t*) ecl_new(env,&kernel_r,
2877 						 (void*)kernel,parent,-1);
2878     ERL_NIF_TERM  res;
2879     cl_uint num_args;
2880     size_t sz;
2881 
2882     // Get number of arguments, FIXME: check error return
2883     clGetKernelInfo(kernel,CL_KERNEL_NUM_ARGS,sizeof(num_args),&num_args,0);
2884     sz = num_args*sizeof(ecl_kernel_arg_t);
2885 
2886     kern->arg = (ecl_kernel_arg_t*) enif_alloc(sz);
2887     memset(kern->arg, 0, sz);
2888     kern->num_args = num_args;
2889 
2890     res = make_object(env, kernel_r.type, kern);
2891     if (kern)
2892 	enif_release_resource(kern);
2893     return res;
2894 }
2895 
ecl_make_event(ErlNifEnv * env,cl_event event,bool rd,bool rl,ErlNifEnv * bin_env,ErlNifBinary * bin,ecl_object_t * parent)2896 static ERL_NIF_TERM ecl_make_event(ErlNifEnv* env, cl_event event,
2897 				   bool rd, bool rl,
2898 				   ErlNifEnv* bin_env,
2899 				   ErlNifBinary* bin,
2900 				   ecl_object_t* parent)
2901 {
2902     ecl_event_t* evt = (ecl_event_t*) ecl_new(env,&event_r,
2903 					      (void*)event,parent,-1);
2904     ERL_NIF_TERM res;
2905     evt->bin_env = bin_env;
2906     evt->bin = bin;
2907     evt->rd  = rd;
2908     evt->rl  = rl;
2909     res = make_object(env, event_r.type, (ecl_object_t*) evt);
2910     if (evt)
2911 	enif_release_resource(evt);
2912     return res;
2913 }
2914 
ecl_make_context(ErlNifEnv * env,cl_context context,cl_int version)2915 static ERL_NIF_TERM ecl_make_context(ErlNifEnv* env, cl_context context, cl_int version)
2916 {
2917     ERL_NIF_TERM  res;
2918     ecl_env_t* ecl;
2919     ecl_context_t* ctx = (ecl_context_t*) ecl_new(env,&context_r,
2920 						  (void*)context,0,version);
2921     ecl = ctx->obj.env;
2922     ctx->upgrade_count = 0;  // first incarnation
2923     ctx->thr = ecl_thread_start(ecl_context_main, ctx, 8); // 8K stack!
2924     res = make_object(env, context_r.type, (ecl_object_t*) ctx);
2925     enif_rwlock_rwlock(ecl->context_list_lock);
2926     // link contexts for upgrade
2927     ctx->next = ecl->context_list;
2928     ecl->context_list = ctx;
2929     enif_rwlock_rwunlock(ecl->context_list_lock);
2930 
2931     if (ctx)
2932 	enif_release_resource(ctx);
2933     return res;
2934 }
2935 
2936 
2937 typedef cl_int CL_API_CALL info_fn_t(void* ptr, cl_uint param_name,
2938 				     size_t param_value_size,
2939 				     void* param_value, size_t* param_value_size_ret);
2940 typedef cl_int CL_API_CALL info2_fn_t(void* ptr1, void* ptr2, cl_uint param_name,
2941 				      size_t param_value_size,
2942 				      void* param_value, size_t* param_value_size_ret);
2943 
2944 // return size of type
ecl_sizeof(ocl_type_t type)2945 static size_t ecl_sizeof(ocl_type_t type)
2946 {
2947     switch(type) {
2948     case OCL_CHAR: return sizeof(cl_char);
2949     case OCL_UCHAR: return sizeof(cl_uchar);
2950     case OCL_SHORT: return sizeof(cl_short);
2951     case OCL_USHORT: return sizeof(cl_ushort);
2952     case OCL_INT: return sizeof(cl_int);
2953     case OCL_UINT: return sizeof(cl_uint);
2954     case OCL_LONG: return sizeof(cl_long);
2955     case OCL_ULONG: return sizeof(cl_ulong);
2956     case OCL_HALF: return sizeof(cl_half);
2957     case OCL_FLOAT: return sizeof(cl_float);
2958     case OCL_DOUBLE: return sizeof(cl_double);
2959     case OCL_BOOL: return sizeof(cl_bool);
2960     case OCL_STRING: return sizeof(cl_char*);
2961     case OCL_ENUM: return sizeof(cl_int);
2962     case OCL_BITFIELD: return sizeof(cl_bitfield);
2963     case OCL_POINTER: return sizeof(void*);
2964     case OCL_SIZE: return sizeof(size_t);
2965     case OCL_PLATFORM: return sizeof(void*);
2966     case OCL_DEVICE: return sizeof(void*);
2967     case OCL_CONTEXT: return sizeof(void*);
2968     case OCL_PROGRAM: return sizeof(void*);
2969     case OCL_COMMAND_QUEUE: return sizeof(void*);
2970     case OCL_IMAGE_FORMAT: return sizeof(cl_image_format);
2971 #if CL_VERSION_1_2 == 1
2972     case OCL_DEVICE_PARTITION: return sizeof(cl_device_partition_property);
2973 #endif
2974     case OCL_NUM_TYPES:
2975     default:
2976 	DBG("info_size: unknown type %d detected", type);
2977 	return sizeof(cl_int);
2978     }
2979 }
2980 
2981 // put basic value types
make_info_element(ErlNifEnv * env,ocl_type_t type,void * ptr,ecl_kv_t * kv)2982 static ERL_NIF_TERM make_info_element(ErlNifEnv* env, ocl_type_t type, void* ptr, ecl_kv_t* kv)
2983 {
2984     switch(type) {
2985     case OCL_CHAR:  return enif_make_int(env, *((cl_char*)ptr));
2986     case OCL_SHORT: return enif_make_int(env, *((cl_short*)ptr));
2987     case OCL_INT: return enif_make_int(env, *((cl_int*)ptr));
2988     case OCL_LONG: return enif_make_int64(env, *((cl_long*)ptr));
2989     case OCL_UCHAR:  return enif_make_uint(env, *((cl_uchar*)ptr));
2990     case OCL_USHORT: return enif_make_uint(env, *((cl_ushort*)ptr));
2991     case OCL_UINT: return enif_make_uint(env, *((cl_uint*)ptr));
2992     case OCL_HALF: return enif_make_uint(env, *((cl_half*)ptr));
2993     case OCL_ULONG: return enif_make_uint64(env, *((cl_ulong*)ptr));
2994     case OCL_SIZE: return ecl_make_sizet(env, *((size_t*)ptr));
2995     case OCL_FLOAT: return enif_make_double(env, *((cl_float*)ptr));
2996     case OCL_DOUBLE: return enif_make_double(env, *((cl_double*)ptr));
2997     case OCL_BOOL: return (*((cl_bool*)ptr)) ? ATOM(true) : ATOM(false);
2998     // case POINTER: cbuf_put_pointer(data, *((pointer_t*)ptr)); break;
2999     case OCL_STRING:
3000 	return enif_make_string_len(env, (char*) ptr, strlen((char*) ptr), ERL_NIF_LATIN1);
3001 
3002     case OCL_BITFIELD:
3003 	return make_bitfields(env, *((cl_bitfield*)ptr), kv);
3004 
3005     case OCL_ENUM:
3006 	return make_enum(env, *((cl_int*)ptr), kv);
3007 
3008     case OCL_POINTER:
3009 	return ecl_make_sizet(env, *((intptr_t*)ptr));
3010 
3011     case OCL_PLATFORM:
3012 	return ecl_lookup_object(env,&platform_r,*(void**)ptr,0);
3013 
3014     case OCL_DEVICE:
3015 	return ecl_lookup_object(env,&device_r,*(void**)ptr,0);
3016 
3017     case OCL_CONTEXT:
3018 	return ecl_lookup_object(env,&context_r,*(void**)ptr,0);
3019 
3020     case OCL_PROGRAM:
3021 	// FIXME: find context object, pass as parent
3022 	return ecl_lookup_object(env,&program_r,*(void**)ptr,0);
3023 
3024     case OCL_COMMAND_QUEUE:
3025 	// FIXME: find context object, pass as parent
3026 	return ecl_lookup_object(env,&command_queue_r,*(void**)ptr,0);
3027 
3028     case OCL_IMAGE_FORMAT: {
3029 	cl_image_format* fmt = (cl_image_format*) ptr;
3030 	ERL_NIF_TERM channel_order;
3031 	ERL_NIF_TERM channel_type;
3032 	channel_order = make_enum(env,fmt->image_channel_order,
3033 				  kv_channel_order);
3034 	channel_type = make_enum(env,fmt->image_channel_data_type,
3035 				 kv_channel_type);
3036 	return enif_make_tuple2(env, channel_order, channel_type);
3037     }
3038 #if CL_VERSION_1_2 == 1
3039     case OCL_DEVICE_PARTITION: { // cl_device_partition_property
3040 	cl_device_partition_property* prop = (cl_device_partition_property*)ptr;
3041 	ERL_NIF_TERM term = ATOM(undefined);
3042 
3043 	switch(*prop++) {
3044 	case CL_DEVICE_PARTITION_EQUALLY:
3045 	    term = enif_make_uint(env, *prop);
3046 	    return enif_make_tuple2(env, ATOM(equally), term);
3047 	case CL_DEVICE_PARTITION_BY_COUNTS: {
3048 	    cl_device_partition_property* pp = prop;
3049 	    term = enif_make_list(env, 0);
3050 	    while(*pp != CL_DEVICE_PARTITION_BY_COUNTS_LIST_END)
3051 		pp++;
3052 	    if (pp > prop) {  // build list backwards
3053 		pp--;
3054 		while(pp >= prop) {
3055 		    ERL_NIF_TERM ui = enif_make_uint(env, *pp);
3056 		    term = enif_make_list_cell(env, ui, term);
3057 		    pp--;
3058 		}
3059 	    }
3060 	    return enif_make_tuple2(env, ATOM(by_counts), term);
3061 	}
3062 	case CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN:
3063 	    switch(*prop) {
3064 	    case CL_DEVICE_AFFINITY_DOMAIN_NUMA:
3065 		term = ATOM(numa); break;
3066 	    case CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE:
3067 		term = ATOM(l4_cache); break;
3068 	    case CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE:
3069 		term = ATOM(l3_cache); break;
3070 	    case CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE:
3071 		term = ATOM(l2_cache); break;
3072 	    case CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE:
3073 		term = ATOM(l1_cache); break;
3074 	    case CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE:
3075 		term = ATOM(next_partitionable); break;
3076 	    default: return ATOM(undefined);
3077 	    }
3078 	    return enif_make_tuple2(env, ATOM(by_affinity_domain), term);
3079 
3080 	default:
3081 	    return ATOM(undefined);
3082 	}
3083 	break;
3084     }
3085 #endif
3086     case OCL_NUM_TYPES:
3087     default:
3088 	return ATOM(undefined);
3089     }
3090 }
3091 
3092 
make_info_value(ErlNifEnv * env,ecl_info_t * iptr,void * buf,size_t buflen)3093 static ERL_NIF_TERM make_info_value(ErlNifEnv* env, ecl_info_t* iptr, void* buf, size_t buflen)
3094 {
3095     char* dptr = (char*) buf;
3096     ERL_NIF_TERM value;
3097 
3098     if (iptr->is_array) {  // arrays are return as lists of items
3099 	ERL_NIF_TERM list = enif_make_list(env, 0);
3100 	size_t elem_size = ecl_sizeof(iptr->info_type);
3101 	size_t n = (buflen / elem_size);
3102 	dptr += (n*elem_size);  // run backwards!!!
3103 	while (buflen >= elem_size) {
3104 	    dptr -= elem_size;
3105 	    value = make_info_element(env, iptr->info_type, dptr, iptr->extern_info);
3106 	    list = enif_make_list_cell(env, value, list);
3107 	    buflen -= elem_size;
3108 	}
3109 	value = list;
3110     }
3111     else {
3112 	value = make_info_element(env, iptr->info_type, dptr, iptr->extern_info);
3113     }
3114     return value;
3115 }
3116 
3117 // Find object value
3118 // return {ok,Value} | {error,Reason} | exception badarg
3119 //
make_object_info(ErlNifEnv * env,ERL_NIF_TERM key,ecl_object_t * obj,info_fn_t * func,ecl_info_t * info,size_t num_info)3120 ERL_NIF_TERM make_object_info(ErlNifEnv* env,  ERL_NIF_TERM key, ecl_object_t* obj, info_fn_t* func,
3121 			      ecl_info_t* info, size_t num_info)
3122 {
3123     size_t returned_size = 0;
3124     size_t size = MAX_INFO_SIZE;
3125     unsigned char buf[MAX_INFO_SIZE];
3126     void* ptr = buf;
3127     ERL_NIF_TERM res;
3128     cl_int err;
3129     unsigned int i;
3130 
3131     if (!enif_is_atom(env, key))
3132 	return enif_make_badarg(env);
3133     i = 0;
3134     while((i < num_info) && (*info[i].info_key != key))
3135 	i++;
3136     if (i == num_info)
3137 	return enif_make_badarg(env);  // or error ?
3138 
3139     err = (*func)(obj->opaque,info[i].info_id,size,ptr,&returned_size);
3140     if (err == CL_INVALID_VALUE) {
3141         // try again allocate returned_size, returned_size does not
3142 	// (yet) return the actual needed bytes (by spec)
3143 	// but it looks like it... ;-)
3144 	size = returned_size;
3145 	if (!(ptr = enif_alloc(size)))
3146 	    return ecl_make_error(env, CL_OUT_OF_HOST_MEMORY);
3147 	err = (*func)(obj->opaque,info[i].info_id,size,ptr,&returned_size);
3148     }
3149 
3150     if (!err) {
3151 	res = enif_make_tuple2(env, ATOM(ok),
3152 			       make_info_value(env,&info[i],ptr,returned_size));
3153     }
3154     else
3155 	res = ecl_make_error(env, err);
3156     if (ptr != buf)
3157 	enif_free(ptr);
3158     return res;
3159 }
3160 
3161 
make_object_info2(ErlNifEnv * env,ERL_NIF_TERM key,ecl_object_t * obj1,void * obj2,info2_fn_t * func,ecl_info_t * info,size_t num_info)3162 ERL_NIF_TERM make_object_info2(ErlNifEnv* env,  ERL_NIF_TERM key, ecl_object_t* obj1, void* obj2,
3163 				   info2_fn_t* func, ecl_info_t* info, size_t num_info)
3164 {
3165     size_t returned_size = 0;
3166     cl_long *buf;
3167     cl_int err;
3168     unsigned int i;
3169     ERL_NIF_TERM result;
3170 
3171     if (!enif_is_atom(env, key))
3172 	return enif_make_badarg(env);
3173     i = 0;
3174     while((i < num_info) && (*info[i].info_key != key))
3175 	i++;
3176     if (i == num_info)
3177 	return enif_make_badarg(env);  // or error ?
3178     returned_size = info[i].def_size;
3179     if (returned_size > 0 ||
3180         !(err = (*func)(obj1->opaque, obj2, info[i].info_id,
3181 			0, NULL, &returned_size))) {
3182 	if (!(buf = enif_alloc(returned_size)))
3183 	    return ecl_make_error(env, CL_OUT_OF_RESOURCES);
3184 	if (!(err = (*func)(obj1->opaque, obj2, info[i].info_id,
3185 			    returned_size, buf, &returned_size))) {
3186 	    result = enif_make_tuple2(env, ATOM(ok), make_info_value(env, &info[i], buf, returned_size));
3187 	    enif_free(buf);
3188 	    return result;
3189 	}
3190     }
3191     return ecl_make_error(env, err);
3192 }
3193 
3194 /******************************************************************************
3195  *
3196  * main ecl event loop run as a thread.
3197  *  The main purpose is to dispatch and send messages to owners
3198  *
3199  *****************************************************************************/
3200 
ecl_context_main(void * arg)3201 static void* ecl_context_main(void* arg)
3202 {
3203     ecl_thread_t* self = arg;
3204     ecl_context_t* ctx = self->arg;
3205 
3206     ctx->upgrade_count++;     // signal that we have started/upgraded
3207 
3208     DBG("ecl_context_main: started (addr=%p,tid=%p,count=%d)",
3209 	&self, self->tid, ctx->upgrade_count);
3210 
3211     while(1) {
3212 	ecl_message_t m;
3213 	ecl_message_recv(self, &m);
3214 
3215 	switch(m.type) {
3216 	case ECL_MESSAGE_UPGRADE:
3217 	    DBG("ecl_context_main: %p got upgrade func=%p",
3218 		self, m.upgrade);
3219 	    // upgrade must never return and SHOULD be tail recursive!
3220 	    return (m.upgrade)(arg);
3221 
3222 	case ECL_MESSAGE_SYNC:
3223 	    DBG("ecl_context_main: %p got sync", self);
3224 	    m.type = ECL_MESSAGE_SYNC_ACK;
3225 	    ecl_queue_put(&ctx->obj.env->q, &m);
3226 	    break;
3227 
3228 	case ECL_MESSAGE_SYNC_ACK:
3229 	    // Should not end up here
3230 	    DBG("ecl_context_main: sync ack received");
3231 	    break;
3232 
3233 	case ECL_MESSAGE_STOP: {
3234 	    DBG("ecl_context_main: stopped by command");
3235 	    if (m.env) {
3236 		enif_send(0, &m.sender, m.env,
3237 			  enif_make_tuple3(m.env,
3238 					   ATOM(cl_async), m.ref,
3239 					   ATOM(ok)));
3240 		enif_free_env(m.env);
3241 	    }
3242 	    ecl_thread_exit(self);
3243 	    break;
3244 	}
3245 
3246 	case ECL_MESSAGE_FLUSH: {  // flush message queue
3247 	    cl_int err;
3248 
3249 	    DBG("ecl_context_main: flush q=%lu", (unsigned long) m.queue);
3250 	    err = ECL_CALL(clFlush)(m.queue->queue);
3251 	    // send {cl_async, Ref, ok | {error,Reason}}
3252 	    if (m.env) {
3253 		ERL_NIF_TERM reply;
3254 		int res;
3255 		UNUSED(res);
3256 		reply = !err ? ATOM(ok) : ecl_make_error(m.env, err);
3257 		res = enif_send(0, &m.sender, m.env,
3258 				enif_make_tuple3(m.env,
3259 						 ATOM(cl_async),
3260 						 m.ref,
3261 						 reply));
3262 		DBG("ecl_context_main: send r=%d", res);
3263 		enif_free_env(m.env);
3264 	    }
3265 	    enif_release_resource(m.queue);
3266 	    break;
3267 	}
3268 
3269 	case ECL_MESSAGE_FINISH: {  // finish message queue
3270 	    cl_int err;
3271 	    DBG("ecl_context_main: finish q=%lu", (unsigned long) m.queue);
3272 	    err = ECL_CALL(clFlush)(m.queue->queue);
3273 	    // send {cl_async, Ref, ok | {error,Reason}}
3274 	    if (m.env) {
3275 		int res;
3276 		ERL_NIF_TERM reply;
3277 		UNUSED(res);
3278 		reply = !err ? ATOM(ok) : ecl_make_error(m.env, err);
3279 		res = enif_send(0, &m.sender, m.env,
3280 				enif_make_tuple3(m.env,
3281 						 ATOM(cl_async), m.ref,
3282 						 reply));
3283 		DBG("ecl_context_main: send r=%d", res);
3284 		enif_free_env(m.env);
3285 	    }
3286 	    enif_release_resource(m.queue);
3287 	    break;
3288 	}
3289 
3290 	case ECL_MESSAGE_WAIT_FOR_EVENT: { // wait for one event
3291 	    cl_int err;
3292 	    cl_event list[1];
3293 	    DBG("ecl_context_main: wait_for_event e=%lu",
3294 		(unsigned long) m.event);
3295 	    list[0] = m.event->obj.event;
3296 	    err = ECL_CALL(clWaitForEvents)(1, list);
3297 	    DBG("ecl_context_main: wait_for_event err=%d", err);
3298 	    // reply to caller pid !
3299 	    if (m.env) {
3300 		ERL_NIF_TERM reply;
3301 		int res;
3302 
3303 		UNUSED(res);
3304 		if (!err) {
3305 		    cl_int status;
3306 		    // read status COMPLETE | ERROR
3307 		    // FIXME: check error
3308 		    clGetEventInfo(m.event->obj.event,
3309 				   CL_EVENT_COMMAND_EXECUTION_STATUS,
3310 				   sizeof(status), &status, 0);
3311 		    switch(status) {
3312 		    case CL_COMPLETE:
3313 			DBG("ecl_context_main: wait_for_event complete");
3314 			if (m.event->bin && m.event->rd) {
3315 			    m.event->rl = true;
3316 			    reply = enif_make_binary(m.env, m.event->bin);
3317 			}
3318 			else
3319 			    reply = ATOM(complete);
3320 			break;
3321 		    default:
3322 		      DBG("ecl_context_main: wait_for_event: status=%d",
3323 			  status);
3324 			// must/should be an error
3325 			reply = ecl_make_error(m.env, status);
3326 			break;
3327 		    }
3328 		}
3329 		else
3330 		    reply = ecl_make_error(m.env, err);
3331 		res = enif_send(0, &m.sender, m.env,
3332 				enif_make_tuple3(m.env,
3333 						 ATOM(cl_event), m.ref,
3334 						 reply));
3335 		DBG("ecl_context_main: send r=%d", res);
3336 		enif_free_env(m.env);
3337 	    }
3338 	    enif_release_resource(m.event);
3339 	    break;
3340 	}
3341 	default:
3342 	    break;
3343 	}
3344     }
3345     return 0;
3346 }
3347 
3348 
3349 //
3350 // API functions
3351 //
3352 
3353 // noop - no operation for NIF interface performance benchmarking
ecl_noop(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])3354 static ERL_NIF_TERM ecl_noop(ErlNifEnv* env, int argc,
3355 			     const ERL_NIF_TERM argv[])
3356 {
3357     UNUSED(env);
3358     UNUSED(argc);
3359     UNUSED(argv);
3360     return ATOM(ok);
3361 }
3362 
3363 // version - return list of API versions supported
ecl_versions(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])3364 static ERL_NIF_TERM ecl_versions(ErlNifEnv* env, int argc,
3365 				 const ERL_NIF_TERM argv[])
3366 {
3367     ERL_NIF_TERM list = enif_make_list(env, 0);
3368     ERL_NIF_TERM vsn;
3369     UNUSED(env);
3370     UNUSED(argc);
3371     UNUSED(argv);
3372 
3373 #if CL_VERSION_1_0 == 1
3374     vsn = enif_make_tuple2(env, enif_make_int(env, 1), enif_make_int(env, 0));
3375     list = enif_make_list_cell(env, vsn, list);
3376 #endif
3377 #if CL_VERSION_1_1 == 1
3378     vsn = enif_make_tuple2(env, enif_make_int(env, 1), enif_make_int(env, 1));
3379     list = enif_make_list_cell(env, vsn, list);
3380 #endif
3381 #if CL_VERSION_1_2 == 1
3382     vsn = enif_make_tuple2(env, enif_make_int(env, 1), enif_make_int(env, 2));
3383     list = enif_make_list_cell(env, vsn, list);
3384 #endif
3385     return list;
3386 }
3387 
ecl_get_platform_ids(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])3388 static ERL_NIF_TERM ecl_get_platform_ids(ErlNifEnv* env, int argc,
3389 					 const ERL_NIF_TERM argv[])
3390 {
3391     cl_uint          num_platforms;
3392     cl_platform_id   platform_id[MAX_PLATFORMS];
3393     ERL_NIF_TERM     idv[MAX_PLATFORMS];
3394     ERL_NIF_TERM     platform_list;
3395     cl_uint i;
3396     cl_int err;
3397     UNUSED(argc);
3398     UNUSED(argv);
3399 
3400     if ((err = ECL_CALL(clGetPlatformIDs)(MAX_PLATFORMS, platform_id, &num_platforms)))
3401 	return ecl_make_error(env, err);
3402 
3403     for (i = 0; i < num_platforms; i++)
3404 	idv[i] = ecl_lookup_object(env,&platform_r,platform_id[i],0);
3405 
3406     platform_list = enif_make_list_from_array(env, idv,num_platforms);
3407     return enif_make_tuple2(env, ATOM(ok), platform_list);
3408 }
3409 
ecl_get_platform_info(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])3410 static ERL_NIF_TERM ecl_get_platform_info(ErlNifEnv* env, int argc,
3411 					  const ERL_NIF_TERM argv[])
3412 {
3413     ecl_object_t* o_platform;
3414     UNUSED(argc);
3415 
3416     if (!get_ecl_object(env, argv[0], &platform_r, false, &o_platform))
3417 	return enif_make_badarg(env);
3418     return make_object_info(env, argv[1], o_platform,
3419 			    (info_fn_t*) ECL_FUNC_PTR(clGetPlatformInfo),
3420 			    platform_info,
3421 			    sizeof_array(platform_info));
3422 }
3423 
3424 
ecl_get_device_ids(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])3425 static ERL_NIF_TERM ecl_get_device_ids(ErlNifEnv* env, int argc,
3426 				       const ERL_NIF_TERM argv[])
3427 {
3428     cl_device_type   device_type = 0;
3429     cl_device_id     device_id[MAX_DEVICES];
3430     ERL_NIF_TERM     idv[MAX_DEVICES];
3431     ERL_NIF_TERM     device_list;
3432     cl_uint          num_devices;
3433     cl_uint          i;
3434     cl_platform_id   platform;
3435     cl_int err;
3436     UNUSED(argc);
3437 
3438     if (!get_object(env, argv[0], &platform_r, true,(void**)&platform))
3439 	return enif_make_badarg(env);
3440     if (!get_bitfields(env, argv[1], &device_type, kv_device_type))
3441 	return enif_make_badarg(env);
3442     if ((err = ECL_CALL(clGetDeviceIDs)(platform, device_type, MAX_DEVICES,
3443 					device_id, &num_devices)))
3444 	return ecl_make_error(env, err);
3445 
3446     for (i = 0; i < num_devices; i++)
3447 	idv[i] = ecl_lookup_object(env, &device_r, device_id[i], 0);
3448     device_list = enif_make_list_from_array(env, idv, num_devices);
3449     return enif_make_tuple2(env, ATOM(ok), device_list);
3450 }
3451 
3452 #if CL_VERSION_1_2 == 1
3453 
3454 // parse:
3455 //    {equally,<unsigned int>} |
3456 //    {by_counts, [<unsigned_int>]}
3457 //    {by_affinity_domain, num|l4_cache|l3_cache|l2_cache|l1_cache|
3458 //                         next_partiionable}
3459 //
get_partition_properties(ErlNifEnv * env,const ERL_NIF_TERM term,cl_device_partition_property * rvec,size_t * rlen)3460 static int get_partition_properties(ErlNifEnv* env, const ERL_NIF_TERM term,
3461 				    cl_device_partition_property* rvec,
3462 				    size_t* rlen)
3463 {
3464     const ERL_NIF_TERM* elem;
3465     int arity;
3466     size_t maxlen = *rlen;
3467     size_t n = 0;
3468 
3469     if (!enif_get_tuple(env, term, &arity, &elem))
3470 	return 0;
3471     if (arity != 2)
3472 	return 0;
3473     if (!enif_is_atom(env, elem[0]))
3474 	return 0;
3475 
3476     if (elem[0] == ATOM(equally)) {
3477 	unsigned long v;
3478 	*rvec++ = CL_DEVICE_PARTITION_EQUALLY;
3479 	if (!enif_get_ulong(env, elem[1], &v))
3480 	    return 0;
3481 	*rvec++ = v;
3482 	n=2;
3483     }
3484     else if (elem[0] == ATOM(by_counts)) {
3485 	ERL_NIF_TERM head, tail;
3486 	ERL_NIF_TERM list = elem[1];
3487 	unsigned long v;
3488 	*rvec++ = CL_DEVICE_PARTITION_BY_COUNTS;
3489 	n++;
3490 	while((n < maxlen-1) &&
3491 	      enif_get_list_cell(env, list, &head, &tail)) {
3492 	    if (!enif_get_ulong(env, head, &v))
3493 		return 0;
3494 	    *rvec++=v;
3495 	    n++;
3496 	    list = tail;
3497 	}
3498 	if (!enif_is_empty_list(env, list))
3499 	    return 0;
3500 	*rvec++ = CL_DEVICE_PARTITION_BY_COUNTS_LIST_END;
3501 	n++;
3502     }
3503     else if (elem[0] == ATOM(by_affinity_domain)) {
3504 	*rvec++ = CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN;
3505 	if (elem[1] == ATOM(numa))
3506 	    *rvec++ = CL_DEVICE_AFFINITY_DOMAIN_NUMA;
3507 	else if (elem[1] == ATOM(l4_cache))
3508 	    *rvec++ = CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE;
3509 	else if (elem[1] == ATOM(l3_cache))
3510 	    *rvec++ = CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE;
3511 	else if (elem[1] == ATOM(l2_cache))
3512 	    *rvec++ = CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE;
3513 	else if (elem[1] == ATOM(l1_cache))
3514 	    *rvec++ = CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE;
3515 	else if (elem[1] == ATOM(next_partitionable))
3516 	    *rvec++ = CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE;
3517 	else
3518 	    return 0;
3519 	n = 2;
3520     }
3521     else
3522 	return 0;
3523     *rlen = n;
3524     return 1;
3525 }
3526 
ecl_create_sub_devices(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])3527 static ERL_NIF_TERM ecl_create_sub_devices(ErlNifEnv* env, int argc,
3528 					   const ERL_NIF_TERM argv[])
3529 {
3530     ecl_object_t*    d;
3531     cl_device_id     out_devices[MAX_DEVICES];
3532     ERL_NIF_TERM     idv[MAX_DEVICES];
3533     ERL_NIF_TERM     device_list;
3534     cl_uint          num_devices;
3535     cl_uint          i;
3536     cl_device_partition_property properties[128];
3537     size_t num_property =  128-1;
3538     cl_int err;
3539     UNUSED(argc);
3540 
3541     // fixme calc length of properties !
3542     if (!get_ecl_object(env, argv[0], &device_r, false, &d))
3543 	return enif_make_badarg(env);
3544     if (!get_partition_properties(env, argv[1], properties, &num_property))
3545 	return enif_make_badarg(env);
3546     properties[num_property] = 0;
3547 
3548     err = ECL_CALL(clCreateSubDevices)(d->device, properties, MAX_DEVICES,
3549 				       out_devices, &num_devices);
3550     if (err)
3551 	return ecl_make_error(env, err);
3552     for (i = 0; i < num_devices; i++) {
3553 	ecl_object_t* obj;
3554 	if ((obj = ecl_lookup(env, out_devices[i])) == NULL)
3555 	    obj = ecl_new(env, &device_r, out_devices[i], 0, d->version);
3556 	idv[i] = make_object(env, device_r.type, obj);
3557     }
3558     device_list = enif_make_list_from_array(env, idv, num_devices);
3559     return enif_make_tuple2(env, ATOM(ok), device_list);
3560 }
3561 #endif
3562 
3563 
ecl_get_device_info(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])3564 static ERL_NIF_TERM ecl_get_device_info(ErlNifEnv* env, int argc,
3565 					const ERL_NIF_TERM argv[])
3566 {
3567     ecl_object_t* o_device;
3568     UNUSED(argc);
3569 
3570     if (!get_ecl_object(env, argv[0], &device_r, false, &o_device))
3571 	return enif_make_badarg(env);
3572     return make_object_info(env, argv[1], o_device,
3573 			    (info_fn_t*) clGetDeviceInfo,
3574 			    device_info,
3575 			    sizeof_array(device_info));
3576 }
3577 
3578 typedef struct {
3579     ErlNifPid        sender;  // sender pid
3580     ErlNifEnv*        s_env;  // senders message environment (ref, bin's etc)
3581     ErlNifEnv*        r_env;  // receiver message environment (ref, bin's etc)
3582     ErlNifTid           tid;  // Calling thread
3583 } ecl_notify_data_t;
3584 
ecl_context_notify(const char * errinfo,const void * private_info,size_t cb,void * user_data)3585 void CL_CALLBACK ecl_context_notify(const char *errinfo,
3586 				    const void* private_info, size_t cb,
3587 				    void * user_data)
3588 {
3589     /* ecl_notify_data_t* bp = user_data; */
3590     /* ERL_NIF_TERM reply; */
3591     /* ErlNifEnv*   s_env; */
3592     /* int res; */
3593     UNUSED(errinfo);
3594     UNUSED(private_info);
3595     UNUSED(cb);
3596     UNUSED(user_data);
3597 
3598     DBG("ecl_context_notify:  user_data=%p", user_data);
3599     DBG("ecl_context_notify:  priv_info=%p cb=%d", private_info, cb);
3600     CL_ERROR("CL ERROR ASYNC: %s", errinfo);
3601 }
3602 
3603 //
3604 // cl:create_context([cl_device_id()]) ->
3605 //   {ok, cl_context()} | {error, cl_error()}
3606 //
ecl_create_context(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])3607 static ERL_NIF_TERM ecl_create_context(ErlNifEnv* env, int argc,
3608 				       const ERL_NIF_TERM argv[])
3609 {
3610     cl_device_id     device_list[MAX_DEVICES];
3611     cl_uint          num_devices = MAX_DEVICES;
3612     cl_context       context;
3613     cl_int err;
3614     ecl_notify_data_t* bp;
3615 
3616     UNUSED(argc);
3617 
3618     if (!get_object_list(env, argv[0], &device_r, false,
3619 			 (void**) device_list, &num_devices))
3620 	return enif_make_badarg(env);
3621 
3622     if (!(bp = enif_alloc(sizeof(ecl_notify_data_t))))
3623 	return ecl_make_error(env, CL_OUT_OF_RESOURCES);  // enomem?
3624 
3625     if (!(bp->r_env = enif_alloc_env())) {
3626 	enif_free(bp);
3627 	return ecl_make_error(env, CL_OUT_OF_RESOURCES);  // enomem?
3628     }
3629     (void) enif_self(env, &bp->sender);
3630     bp->s_env = env;
3631     bp->tid = enif_thread_self();
3632     DBG("ecl_create_context: self %p", bp->tid);
3633 
3634     context = ECL_CALL(clCreateContext)(0, num_devices, device_list,
3635 					ecl_context_notify,
3636 					bp,
3637 					&err);
3638     if (context) {
3639 	cl_uint i;
3640 	ERL_NIF_TERM t;
3641 	ecl_object_t *dev;
3642 	cl_int version = 100;
3643 	for(i = 0; i < num_devices; i++) {
3644 	    dev = ecl_lookup(env, device_list[i]);
3645 	    /* Should hopefully be the same for all devices ?
3646 	       use the least version */
3647 	    if(dev->version < version)
3648 		version = dev->version;
3649 	}
3650 	t = ecl_make_context(env, context, version);
3651 	return enif_make_tuple2(env, ATOM(ok), t);
3652     }
3653     return ecl_make_error(env, err);
3654 }
3655 
ecl_get_context_info(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])3656 static ERL_NIF_TERM ecl_get_context_info(ErlNifEnv* env, int argc,
3657 					 const ERL_NIF_TERM argv[])
3658 {
3659     ecl_object_t* o_context;
3660     UNUSED(argc);
3661 
3662     if (!get_ecl_object(env, argv[0], &context_r, false, &o_context))
3663 	return enif_make_badarg(env);
3664     return make_object_info(env, argv[1], o_context,
3665 			    (info_fn_t*) ECL_FUNC_PTR(clGetContextInfo),
3666 			    context_info,
3667 			    sizeof_array(context_info));
3668 }
3669 
ecl_create_queue(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])3670 static ERL_NIF_TERM ecl_create_queue(ErlNifEnv* env, int argc,
3671 				     const ERL_NIF_TERM argv[])
3672 {
3673     ecl_object_t* o_context;
3674     cl_device_id  device;
3675     cl_command_queue_properties properties;
3676     cl_command_queue queue;
3677     cl_int err;
3678     UNUSED(argc);
3679 
3680     if (!get_ecl_object(env, argv[0], &context_r, false, &o_context))
3681 	return enif_make_badarg(env);
3682     if (!get_object(env, argv[1], &device_r, false, (void**) &device))
3683 	return enif_make_badarg(env);
3684     if (!get_bitfields(env, argv[2], &properties,
3685 		       kv_command_queue_properties))
3686 	return enif_make_badarg(env);
3687     queue = ECL_CALL(clCreateCommandQueue)(o_context->context, device, properties,
3688 				 &err);
3689     if (queue) {
3690 	ERL_NIF_TERM t;
3691 	t = ecl_make_object(env, &command_queue_r,(void*) queue, o_context);
3692 	return enif_make_tuple2(env, ATOM(ok), t);
3693     }
3694     return ecl_make_error(env, err);
3695 }
3696 
ecl_get_queue_info(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])3697 static ERL_NIF_TERM ecl_get_queue_info(ErlNifEnv* env, int argc,
3698 				       const ERL_NIF_TERM argv[])
3699 {
3700     ecl_object_t* o_queue;
3701     UNUSED(argc);
3702 
3703     if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
3704 	return enif_make_badarg(env);
3705     return make_object_info(env, argv[1], o_queue,
3706 			    (info_fn_t*) ECL_FUNC_PTR(clGetCommandQueueInfo),
3707 			    queue_info,
3708 			    sizeof_array(queue_info));
3709 }
3710 
3711 
ecl_create_buffer(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])3712 static ERL_NIF_TERM ecl_create_buffer(ErlNifEnv* env, int argc,
3713 				      const ERL_NIF_TERM argv[])
3714 {
3715     ecl_object_t* o_context;
3716     size_t size;
3717     cl_mem_flags mem_flags;
3718     cl_mem mem;
3719     ErlNifBinary bin;
3720     void* host_ptr = 0;
3721     cl_int err;
3722     UNUSED(argc);
3723 
3724 
3725     if (!get_ecl_object(env, argv[0], &context_r, false, &o_context))
3726 	return enif_make_badarg(env);
3727     if (!get_bitfields(env, argv[1], &mem_flags, kv_mem_flags))
3728 	return enif_make_badarg(env);
3729     if (!ecl_get_sizet(env, argv[2], &size))
3730 	return enif_make_badarg(env);
3731     if (!enif_inspect_iolist_as_binary(env, argv[3], &bin))
3732 	return enif_make_badarg(env);
3733     // How do we keep binary data (CL_MEM_USE_HOST_PTR)
3734     // We should probably make sure that the buffer is read_only in this
3735     // case!
3736     // we must be able to reference count the binary object!
3737     // USE enif_make_copy !!!! this copy is done to the thread environment!
3738     if (bin.size > 0) {
3739 	host_ptr = bin.data;
3740 	mem_flags |= CL_MEM_COPY_HOST_PTR;
3741 	if (size < bin.size)
3742 	    size = bin.size;
3743     }
3744     else if (size)
3745 	mem_flags |= CL_MEM_ALLOC_HOST_PTR;
3746 
3747     mem = ECL_CALL(clCreateBuffer)(o_context->context, mem_flags, size,
3748 				   host_ptr, &err);
3749 
3750     if (!err) {
3751 	ERL_NIF_TERM t;
3752 	t = ecl_make_object(env, &mem_r,(void*) mem, o_context);
3753 	return enif_make_tuple2(env, ATOM(ok), t);
3754     }
3755     return ecl_make_error(env, err);
3756 }
3757 
3758 #if CL_VERSION_1_1 == 1
ecl_create_sub_buffer(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])3759 static ERL_NIF_TERM ecl_create_sub_buffer(ErlNifEnv* env, int argc,
3760 					  const ERL_NIF_TERM argv[])
3761 {
3762     ecl_object_t* o_buf;
3763     cl_mem_flags mem_flags;
3764     cl_mem mem;
3765     ERL_NIF_TERM info;
3766     ERL_NIF_TERM info_arg1, info_arg2;
3767     cl_buffer_region reg;
3768     cl_int err;
3769     UNUSED(argc);
3770 
3771     if (!get_ecl_object(env, argv[0], &mem_r, false, &o_buf))
3772 	return enif_make_badarg(env);
3773     if (!get_bitfields(env, argv[1], &mem_flags, kv_mem_flags))
3774 	return enif_make_badarg(env);
3775     if (!enif_is_atom(env, argv[2]) || (argv[2] != ATOM(region)))
3776 	return enif_make_badarg(env);
3777     info = argv[3];
3778     if (!enif_is_list(env, info))
3779 	return enif_make_badarg(env);
3780     enif_get_list_cell(env, info, &info_arg1, &info);
3781     if (!enif_is_list(env, info))
3782 	return enif_make_badarg(env);
3783     enif_get_list_cell(env, info, &info_arg2, &info);
3784     if (!enif_is_empty_list(env, info))
3785 	return enif_make_badarg(env);
3786     if (!ecl_get_sizet(env, info_arg1, &reg.origin))
3787 	return enif_make_badarg(env);
3788     if (!ecl_get_sizet(env, info_arg2, &reg.size))
3789 	return enif_make_badarg(env);
3790 
3791     mem = ECL_CALL(clCreateSubBuffer)(o_buf->mem, mem_flags,
3792 				      CL_BUFFER_CREATE_TYPE_REGION,
3793 				      &reg, &err);
3794     if (!err) {
3795 	ERL_NIF_TERM t;
3796 	t = ecl_make_object(env, &mem_r,(void*) mem, o_buf);
3797 	return enif_make_tuple2(env, ATOM(ok), t);
3798     }
3799     return ecl_make_error(env, err);
3800 }
3801 #endif
3802 //
3803 // format {channel_order, channel_data_type} (old) |
3804 // {'cl_image_format', order, data_type }
3805 //
get_image_format(ErlNifEnv * env,ERL_NIF_TERM arg,cl_image_format * format)3806 static int get_image_format(ErlNifEnv* env, ERL_NIF_TERM arg,
3807 				cl_image_format* format)
3808 {
3809     const ERL_NIF_TERM* rec;
3810     int i, arity;
3811 
3812     if (!enif_get_tuple(env, arg, &arity, &rec))
3813 	return 0;
3814     if (arity == 2)
3815 	i = 0;
3816     else if (arity == 3) {
3817 	i = 1;
3818 	if (!enif_is_atom(env, rec[0]) || (rec[0] != ATOM(cl_image_format)))
3819 	    return 0;
3820     }
3821     else
3822 	return 0;
3823 
3824     if (!get_enum(env, rec[i], &format->image_channel_order,
3825 		  kv_channel_order))
3826 	return 0;
3827     if (!get_enum(env, rec[i+1], &format->image_channel_data_type,
3828 		  kv_channel_type))
3829 	return 0;
3830     return 1;
3831 }
3832 
3833 //
3834 // format {'cl_image_desc',image_type,image_width,image_height,image_depth,
3835 //             image_array_size,image_row_pitch,image_slice_pitch,
3836 //             num_mip_levels,num_samples,buffer}
3837 //
get_image_desc(ErlNifEnv * env,ERL_NIF_TERM arg,cl_image_desc * desc)3838 static int get_image_desc(ErlNifEnv* env, ERL_NIF_TERM arg,
3839 			  cl_image_desc* desc)
3840 {
3841     const ERL_NIF_TERM* rec;
3842     int arity;
3843 
3844     if (!enif_get_tuple(env, arg, &arity, &rec) || (arity != 11))
3845 	return 0;
3846 
3847     if (!enif_is_atom(env, rec[0]) || (rec[0] != ATOM(cl_image_desc)))
3848 	return 0;
3849 
3850     if (!get_enum(env, rec[1], &desc->image_type, kv_mem_object_type))
3851 	return 0;
3852     if (!ecl_get_sizet(env, rec[2], &desc->image_width))
3853 	return 0;
3854     if (!ecl_get_sizet(env, rec[3], &desc->image_height))
3855 	return 0;
3856     if (!ecl_get_sizet(env, rec[4], &desc->image_depth))
3857 	return 0;
3858     if (!ecl_get_sizet(env, rec[5], &desc->image_array_size))
3859 	return 0;
3860     if (!ecl_get_sizet(env, rec[6], &desc->image_row_pitch))
3861 	return 0;
3862     if (!ecl_get_sizet(env, rec[7], &desc->image_slice_pitch))
3863 	return 0;
3864     desc->num_mip_levels = 0;  // rec[8] according to spec
3865     desc->num_samples = 0;     // rec[9] according to spec
3866     if (!get_object(env, rec[10], &mem_r, true, (void**)&desc->buffer))
3867 	return 0;
3868     return 1;
3869 }
3870 
3871 // 1.0, 1,1 wrapper where clCreateImage
e_clCreateImage(cl_context context,cl_mem_flags flags,const cl_image_format image_format,const cl_image_desc * image_desc,void * host_ptr,cl_int * errcode_ret)3872 cl_mem CL_CALLBACK e_clCreateImage(cl_context context,
3873 		       cl_mem_flags flags,
3874 		       const cl_image_format image_format,
3875 		       const cl_image_desc* image_desc,
3876 		       void* host_ptr,
3877 		       cl_int* errcode_ret)
3878 {
3879     UNUSED(context);
3880     UNUSED(flags);
3881     UNUSED(image_format);
3882     UNUSED(image_desc);
3883     UNUSED(host_ptr);
3884 
3885     *errcode_ret = CL_INVALID_OPERATION;
3886     return NULL;
3887 }
3888 
3889 // 1.2 -> 1.0 wrapper: clCreateImage2D using clCreateImage
eclCreateImage2D(cl_context context,cl_mem_flags mem_flags,const cl_image_format * format,size_t width,size_t height,size_t row_pitch,void * host_ptr,cl_int * err)3890 cl_mem CL_CALLBACK eclCreateImage2D(cl_context context,
3891 			 cl_mem_flags mem_flags,
3892 			 const cl_image_format * format,
3893 			 size_t width,
3894 			 size_t height,
3895 			 size_t row_pitch,
3896 			 void * host_ptr,
3897 			 cl_int *err)
3898 {
3899     cl_image_desc desc;
3900 
3901     desc.image_type = CL_MEM_OBJECT_IMAGE2D;
3902     desc.image_width = width;
3903     desc.image_height = height;
3904     desc.image_depth = 1;       // used with IMAGE3D
3905     desc.image_array_size = 1;  // used with IMAGE2D/3D_ARRAY?
3906     desc.image_row_pitch = row_pitch;
3907     desc.image_slice_pitch = 0;  // maybe 0 for 2D image
3908     desc.num_mip_levels = 0;  // must be 0
3909     desc.num_samples= 0;      // must be 0
3910     desc.buffer = NULL;       // used when CL_MEM_OBJECT_IMAGE1D_BUFFER
3911 
3912     return ECL_CALL(clCreateImage)(context,
3913 				   mem_flags,
3914 				   format,
3915 				   &desc,
3916 				   host_ptr,
3917 				   err);
3918 }
3919 
ecl_create_image2d(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])3920 static ERL_NIF_TERM ecl_create_image2d(ErlNifEnv* env, int argc,
3921 					const ERL_NIF_TERM argv[])
3922 {
3923     ecl_object_t* o_context;
3924     size_t width;
3925     size_t height;
3926     size_t row_pitch;
3927     cl_image_format format;
3928     cl_mem_flags mem_flags;
3929     cl_mem mem;
3930     ErlNifBinary bin;
3931     void* host_ptr = 0;
3932     cl_int err;
3933     UNUSED(argc);
3934 
3935     if (!get_ecl_object(env, argv[0], &context_r, false, &o_context))
3936 	return enif_make_badarg(env);
3937     if (!get_bitfields(env, argv[1], &mem_flags, kv_mem_flags))
3938 	return enif_make_badarg(env);
3939 
3940     if (!get_image_format(env, argv[2], &format))
3941 	return enif_make_badarg(env);
3942 
3943     if (!ecl_get_sizet(env, argv[3], &width))
3944 	return enif_make_badarg(env);
3945     if (!ecl_get_sizet(env, argv[4], &height))
3946 	return enif_make_badarg(env);
3947     if (!ecl_get_sizet(env, argv[5], &row_pitch))
3948 	return enif_make_badarg(env);
3949 
3950     if (!enif_inspect_iolist_as_binary(env, argv[6], &bin))
3951 	return enif_make_badarg(env);
3952     // How do we keep binary data (CL_MEM_USE_HOST_PTR) (read_only)
3953     // we must be able to reference count the binary object!
3954     if (bin.size > 0) {
3955 	host_ptr = bin.data;
3956 	mem_flags |= CL_MEM_COPY_HOST_PTR;
3957     }
3958     else if (width && height)
3959 	mem_flags |= CL_MEM_ALLOC_HOST_PTR;
3960     mem = ECL_CALL(clCreateImage2D)(o_context->context, mem_flags, &format,
3961 				    width, height, row_pitch,
3962 				    host_ptr, &err);
3963     if (!err) {
3964 	ERL_NIF_TERM t;
3965 	t = ecl_make_object(env, &mem_r,(void*) mem, o_context);
3966 	return enif_make_tuple2(env, ATOM(ok), t);
3967     }
3968     return ecl_make_error(env, err);
3969 }
3970 
3971 // 1.2 -> 1.1 wrapper: clCreateImage3D using clCreateImage
eclCreateImage3D(cl_context context,cl_mem_flags mem_flags,const cl_image_format * format,size_t width,size_t height,size_t depth,size_t row_pitch,size_t slice_pitch,void * host_ptr,cl_int * err)3972 cl_mem CL_CALLBACK eclCreateImage3D(cl_context context,
3973 			cl_mem_flags mem_flags,
3974 			const cl_image_format* format,
3975 			size_t width,
3976 			size_t height,
3977 			size_t depth,
3978 			size_t row_pitch,
3979 			size_t slice_pitch,
3980 			void * host_ptr,
3981 			cl_int *err)
3982 {
3983     cl_image_desc desc;
3984 
3985     desc.image_type = CL_MEM_OBJECT_IMAGE3D;
3986     desc.image_width = width;
3987     desc.image_height = height;
3988     desc.image_depth = depth;       // used with IMAGE3D
3989     desc.image_array_size = 1;  // used with IMAGE2D/3D_ARRAY?
3990     desc.image_row_pitch = row_pitch;
3991     desc.image_slice_pitch = slice_pitch;  // maybe 0 for 2D image
3992     desc.num_mip_levels = 0;  // must be 0
3993     desc.num_samples= 0;      // must be 0
3994     desc.buffer = NULL;       // used when CL_MEM_OBJECT_IMAGE1D_BUFFER
3995 
3996     return ECL_CALL(clCreateImage)(context,
3997 				   mem_flags,
3998 				   format,
3999 				   &desc,
4000 				   host_ptr,
4001 				   err);
4002 }
4003 
4004 
ecl_create_image3d(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])4005 static ERL_NIF_TERM ecl_create_image3d(ErlNifEnv* env, int argc,
4006 				       const ERL_NIF_TERM argv[])
4007 {
4008     ecl_object_t* o_context;
4009     size_t width;
4010     size_t height;
4011     size_t depth;
4012     size_t row_pitch;
4013     size_t slice_pitch;
4014     cl_image_format format;
4015     cl_mem_flags mem_flags;
4016     cl_mem mem;
4017     ErlNifBinary bin;
4018     void* host_ptr = 0;
4019     cl_int err;
4020     UNUSED(argc);
4021 
4022     if (!get_ecl_object(env, argv[0], &context_r, false, &o_context))
4023 	return enif_make_badarg(env);
4024     if (!get_bitfields(env, argv[1], &mem_flags, kv_mem_flags))
4025 	return enif_make_badarg(env);
4026 
4027     if (!get_image_format(env, argv[2], &format))
4028 	return enif_make_badarg(env);
4029 
4030     if (!ecl_get_sizet(env, argv[3], &width))
4031 	return enif_make_badarg(env);
4032     if (!ecl_get_sizet(env, argv[4], &height))
4033 	return enif_make_badarg(env);
4034     if (!ecl_get_sizet(env, argv[5], &depth))
4035 	return enif_make_badarg(env);
4036     if (!ecl_get_sizet(env, argv[6], &row_pitch))
4037 	return enif_make_badarg(env);
4038     if (!ecl_get_sizet(env, argv[7], &slice_pitch))
4039 	return enif_make_badarg(env);
4040 
4041     if (!enif_inspect_iolist_as_binary(env, argv[8], &bin))
4042 	return enif_make_badarg(env);
4043     // How do we keep binary data (CL_MEM_USE_HOST_PTR)  (read_only)
4044     // we must be able to reference count the binary object!
4045     if (bin.size > 0) {
4046 	host_ptr = bin.data;
4047 	mem_flags |= CL_MEM_COPY_HOST_PTR;
4048     }
4049     else if (width && height && depth)
4050 	mem_flags |= CL_MEM_ALLOC_HOST_PTR;
4051     mem = ECL_CALL(clCreateImage3D)(o_context->context, mem_flags, &format,
4052 				    width, height, depth, row_pitch,
4053 				    slice_pitch,
4054 				    host_ptr, &err);
4055     if (mem) {
4056 	ERL_NIF_TERM t;
4057 	t = ecl_make_object(env, &mem_r,(void*) mem, o_context);
4058 	return enif_make_tuple2(env, ATOM(ok), t);
4059     }
4060     return ecl_make_error(env, err);
4061 }
4062 
4063 //
4064 // cl:create_image(Context, MemFlags, ImageFormat, ImageDesc, Data) ->
4065 //
4066 //
4067 #if CL_VERSION_1_2 == 1
ecl_create_image(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])4068 static ERL_NIF_TERM ecl_create_image(ErlNifEnv* env, int argc,
4069 				     const ERL_NIF_TERM argv[])
4070 {
4071     ecl_object_t* o_context;
4072     cl_image_format format;
4073     cl_image_desc   desc;
4074     cl_mem_flags mem_flags;
4075     cl_mem mem;
4076     ErlNifBinary bin;
4077     void* host_ptr = 0;
4078     cl_int err;
4079     UNUSED(argc);
4080 
4081     if (!get_ecl_object(env, argv[0], &context_r, false, &o_context))
4082 	return enif_make_badarg(env);
4083     if (!get_bitfields(env, argv[1], &mem_flags, kv_mem_flags))
4084 	return enif_make_badarg(env);
4085 
4086     if (!get_image_format(env, argv[2], &format))
4087 	return enif_make_badarg(env);
4088 
4089     if (!get_image_desc(env, argv[3], &desc))
4090 	return enif_make_badarg(env);
4091 
4092     if (!enif_inspect_iolist_as_binary(env, argv[4], &bin))
4093 	return enif_make_badarg(env);
4094 
4095     if (bin.size > 0) {
4096 	host_ptr = bin.data;
4097 	mem_flags |= CL_MEM_COPY_HOST_PTR;
4098     }
4099     else if (desc.image_width && desc.image_height && desc.image_depth)
4100 	mem_flags |= CL_MEM_ALLOC_HOST_PTR;
4101 
4102     mem = ECL_CALL(clCreateImage)(o_context->context,
4103 				  mem_flags,
4104 				  &format,
4105 				  &desc,
4106 				  host_ptr,
4107 				  &err);
4108     if (mem) {
4109 	ERL_NIF_TERM t;
4110 	t = ecl_make_object(env, &mem_r,(void*) mem, o_context);
4111 	return enif_make_tuple2(env, ATOM(ok), t);
4112     }
4113     return ecl_make_error(env, err);
4114 }
4115 #endif
4116 
ecl_get_supported_image_formats(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])4117 static ERL_NIF_TERM ecl_get_supported_image_formats(ErlNifEnv* env, int argc,
4118 						    const ERL_NIF_TERM argv[])
4119 {
4120     cl_context context;
4121     cl_mem_flags flags;
4122     cl_mem_object_type image_type;
4123     cl_image_format image_format[MAX_IMAGE_FORMATS];
4124     cl_uint num_image_formats;
4125     cl_int err;
4126     UNUSED(argc);
4127 
4128     if (!get_object(env, argv[0], &context_r, false, (void**) &context))
4129 	return enif_make_badarg(env);
4130     if (!get_bitfields(env, argv[1], &flags, kv_mem_flags))
4131 	return enif_make_badarg(env);
4132     if (!get_enum(env, argv[2], &image_type, kv_mem_object_type))
4133 	return enif_make_badarg(env);
4134     err = ECL_CALL(clGetSupportedImageFormats)(context, flags, image_type,
4135 					       MAX_IMAGE_FORMATS,
4136 					       image_format,
4137 					       &num_image_formats);
4138     if (!err) {
4139 	int i = (int) num_image_formats;
4140 	ERL_NIF_TERM list = enif_make_list(env, 0);
4141 
4142 	while(i) {
4143 	    ERL_NIF_TERM channel_order, channel_type;
4144 	    ERL_NIF_TERM elem;
4145 	    i--;
4146 	    channel_order = make_enum(env,
4147 				      image_format[i].image_channel_order,
4148 				      kv_channel_order);
4149 	    channel_type = make_enum(env,
4150 				     image_format[i].image_channel_data_type,
4151 				     kv_channel_type);
4152 	    elem = enif_make_tuple2(env, channel_order, channel_type);
4153 	    list = enif_make_list_cell(env, elem, list);
4154 	}
4155 	return enif_make_tuple2(env, ATOM(ok), list);
4156     }
4157     return ecl_make_error(env, err);
4158 }
4159 
4160 
ecl_get_mem_object_info(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])4161 static ERL_NIF_TERM ecl_get_mem_object_info(ErlNifEnv* env, int argc,
4162 					    const ERL_NIF_TERM argv[])
4163 {
4164     ecl_object_t* o_mem;
4165     UNUSED(argc);
4166 
4167     if (!get_ecl_object(env, argv[0], &mem_r, false, &o_mem))
4168 	return enif_make_badarg(env);
4169     return make_object_info(env, argv[1], o_mem,
4170 			    (info_fn_t*) ECL_FUNC_PTR(clGetMemObjectInfo),
4171 			    mem_info,
4172 			    sizeof_array(mem_info));
4173 }
4174 
ecl_get_image_info(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])4175 static ERL_NIF_TERM ecl_get_image_info(ErlNifEnv* env, int argc,
4176 				       const ERL_NIF_TERM argv[])
4177 {
4178     ecl_object_t* o_mem;
4179     UNUSED(argc);
4180 
4181     if (!get_ecl_object(env, argv[0], &mem_r, false, &o_mem))
4182 	return enif_make_badarg(env);
4183     return make_object_info(env, argv[1], o_mem,
4184 			    (info_fn_t*) ECL_FUNC_PTR(clGetImageInfo),
4185 			    image_info,
4186 			    sizeof_array(image_info));
4187 }
4188 
4189 //
4190 // cl:create_sampler(Context::cl_context(),Normalized::boolean(),
4191 //		     AddressingMode::cl_addressing_mode(),
4192 //		     FilterMode::cl_filter_mode()) ->
4193 //    {'ok', cl_sampler()} | {'error', cl_error()}.
4194 //
4195 
ecl_create_sampler(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])4196 static ERL_NIF_TERM ecl_create_sampler(ErlNifEnv* env, int argc,
4197 				       const ERL_NIF_TERM argv[])
4198 {
4199     ecl_object_t* o_context;
4200     cl_bool normalized_coords;
4201     cl_addressing_mode addressing_mode;
4202     cl_filter_mode filter_mode;
4203     cl_sampler sampler;
4204     cl_int err;
4205     UNUSED(argc);
4206 
4207     if (!get_ecl_object(env, argv[0], &context_r, false, &o_context))
4208 	return enif_make_badarg(env);
4209     if (!get_bool(env, argv[1], &normalized_coords))
4210 	return enif_make_badarg(env);
4211     if (!get_enum(env, argv[2], &addressing_mode, kv_addressing_mode))
4212 	return enif_make_badarg(env);
4213     if (!get_enum(env, argv[3], &filter_mode, kv_filter_mode))
4214 	return enif_make_badarg(env);
4215 
4216     sampler = ECL_CALL(clCreateSampler)(o_context->context,
4217 					normalized_coords, addressing_mode, filter_mode,
4218 					&err);
4219     if (!err) {
4220 	ERL_NIF_TERM t;
4221 	t = ecl_make_object(env, &sampler_r,(void*) sampler, o_context);
4222 	return enif_make_tuple2(env, ATOM(ok), t);
4223     }
4224     return ecl_make_error(env, err);
4225 }
4226 
4227 
ecl_get_sampler_info(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])4228 static ERL_NIF_TERM ecl_get_sampler_info(ErlNifEnv* env, int argc,
4229 					 const ERL_NIF_TERM argv[])
4230 {
4231     ecl_object_t* o_sampler;
4232     UNUSED(argc);
4233 
4234     if (!get_ecl_object(env, argv[0], &sampler_r, false, &o_sampler))
4235 	return enif_make_badarg(env);
4236     return make_object_info(env, argv[1], o_sampler,
4237 			    (info_fn_t*) ECL_FUNC_PTR(clGetSamplerInfo),
4238 			    sampler_info,
4239 			    sizeof_array(sampler_info));
4240 }
4241 
4242 //
4243 // cl:create_program_with_source(Context::cl_context(), Source::iodata()) ->
4244 //   {'ok', cl_program()} | {'error', cl_error()}
4245 //
ecl_create_program_with_source(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])4246 static ERL_NIF_TERM ecl_create_program_with_source(ErlNifEnv* env, int argc,
4247 						   const ERL_NIF_TERM argv[])
4248 {
4249     ecl_object_t* o_context;
4250     cl_program program;
4251     ErlNifBinary source;
4252     char* strings[1];
4253     size_t lengths[1];
4254     cl_int err;
4255     UNUSED(argc);
4256 
4257     if (!get_ecl_object(env, argv[0], &context_r, false, &o_context))
4258 	return enif_make_badarg(env);
4259     if (!enif_inspect_iolist_as_binary(env, argv[1], &source))
4260 	return enif_make_badarg(env);
4261     strings[0] = (char*) source.data;
4262     lengths[0] = source.size;
4263     program = ECL_CALL(clCreateProgramWithSource)(o_context->context,
4264 						  1,
4265 						  (const char**) strings,
4266 						  lengths,
4267 						  &err);
4268     if (!err) {
4269 	ERL_NIF_TERM t;
4270 	t = ecl_make_object(env, &program_r,(void*) program, o_context);
4271 	return enif_make_tuple2(env, ATOM(ok), t);
4272     }
4273     return ecl_make_error(env, err);
4274 }
4275 
4276 //
4277 //  cl:create_program_with_binary(Context::cl_context(),
4278 //                                  DeviceList::[cl_device_id()],
4279 //                                  BinaryList::[binary()]) ->
4280 //    {'ok', cl_program()} | {'error', cl_error()}
4281 //
ecl_create_program_with_binary(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])4282 static ERL_NIF_TERM ecl_create_program_with_binary(ErlNifEnv* env, int argc,
4283 						   const ERL_NIF_TERM argv[])
4284 {
4285     ecl_object_t* o_context;
4286     cl_program     program;
4287     cl_device_id   device_list[MAX_DEVICES];
4288     cl_uint        num_devices = MAX_DEVICES;
4289     ErlNifBinary   binary_list[MAX_DEVICES];
4290     size_t         num_binaries = MAX_DEVICES;
4291     size_t         lengths[MAX_DEVICES];
4292     unsigned char* data[MAX_DEVICES];
4293     cl_uint        i;
4294     cl_int         status[MAX_DEVICES];
4295     cl_int         err;
4296     UNUSED(argc);
4297 
4298     if (!get_ecl_object(env, argv[0], &context_r, false, &o_context))
4299 	return enif_make_badarg(env);
4300     if (!get_object_list(env, argv[1], &device_r, false,
4301 			 (void**) device_list, &num_devices))
4302 	return enif_make_badarg(env);
4303     if (!get_binary_list(env, argv[2], binary_list, &num_binaries))
4304 	return enif_make_badarg(env);
4305     if (num_binaries != num_devices)
4306 	return enif_make_badarg(env);
4307 
4308     for (i = 0; i < num_devices; i++) {
4309 	lengths[i] = binary_list[i].size;
4310 	data[i]    = binary_list[i].data;
4311     }
4312     program = ECL_CALL(clCreateProgramWithBinary)(o_context->context,
4313 						  num_devices,
4314 						  (const cl_device_id*) device_list,
4315 						  (const size_t*) lengths,
4316 						  (const unsigned char**) data,
4317 						  status,
4318 						  &err);
4319     if (!err) {
4320 	ERL_NIF_TERM t;
4321 	t = ecl_make_object(env, &program_r,(void*) program, o_context);
4322 	return enif_make_tuple2(env, ATOM(ok), t);
4323     }
4324     // FIXME: handle the value in the status array
4325     // In cases of error we can then detect which binary was corrupt...
4326     return ecl_make_error(env, err);
4327 }
4328 
4329 
4330 //
4331 //  cl:create_program_with_builtin_kernels(Context::cl_context(),
4332 //                                  DeviceList::[cl_device_id()],
4333 //                                  KernelNames::string()) ->
4334 //    {'ok', cl_program()} | {'error', cl_error()}
4335 //
4336 #if CL_VERSION_1_2 == 1
4337 
ecl_create_program_with_builtin_kernels(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])4338 static ERL_NIF_TERM ecl_create_program_with_builtin_kernels(
4339     ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
4340 {
4341     ecl_object_t* o_context;
4342     cl_program     program;
4343     cl_device_id   device_list[MAX_DEVICES];
4344     cl_uint        num_devices = MAX_DEVICES;
4345     char kernel_names[MAX_KERNEL_NAME];
4346     cl_int         err;
4347     UNUSED(argc);
4348 
4349     if (!get_ecl_object(env, argv[0], &context_r, false, &o_context))
4350 	return enif_make_badarg(env);
4351     if (!get_object_list(env, argv[1], &device_r, false,
4352 			 (void**) device_list, &num_devices))
4353 	return enif_make_badarg(env);
4354     if (!enif_get_string(env, argv[2], kernel_names, sizeof(kernel_names),
4355 			 ERL_NIF_LATIN1))
4356 	return enif_make_badarg(env);
4357 
4358     program = ECL_CALL(clCreateProgramWithBuiltInKernels)(
4359 	o_context->context,
4360 	num_devices,
4361 	(const cl_device_id*) device_list,
4362 	kernel_names,
4363 	&err);
4364     if (!err) {
4365 	ERL_NIF_TERM t;
4366 	t = ecl_make_object(env, &program_r,(void*) program, o_context);
4367 	return enif_make_tuple2(env, ATOM(ok), t);
4368     }
4369     return ecl_make_error(env, err);
4370 }
4371 #endif
4372 
4373 //
4374 // @spec async_build_program(Program::cl_program(),
4375 //                     DeviceList::[cl_device_id()],
4376 //                     Options::string()) ->
4377 //  {'ok',Ref} | {'error', cl_error()}
4378 //
4379 //
4380 // Notification functio for clBuildProgram
4381 // Passed to main thread by sending a async response
4382 // FIXME: lock needed?
4383 //
4384 typedef struct {
4385     ErlNifPid        sender;  // sender pid
4386     ErlNifEnv*        s_env;  // senders message environment (ref, bin's etc)
4387     ErlNifEnv*        r_env;  // receiver message environment (ref, bin's etc)
4388     ErlNifTid           tid;  // Calling thread
4389     ERL_NIF_TERM        ref;  // ref (in env!)
4390     ecl_object_t*  program;
4391 } ecl_build_data_t;
4392 
ecl_build_notify(cl_program program,void * user_data)4393 void CL_CALLBACK ecl_build_notify(cl_program program, void* user_data)
4394 {
4395     ecl_build_data_t* bp = user_data;
4396     ERL_NIF_TERM reply;
4397     ErlNifEnv*        s_env;
4398     int res;
4399     UNUSED(program);
4400     UNUSED(res);
4401 
4402     DBG("ecl_build_notify: done program=%p, user_data=%p",
4403 	program, user_data);
4404 
4405     // FIXME: check all devices for build_status!
4406     // clGetProgramBuildInfo(bp->program->program, CL_PROGRAM_BUILD_STATUS,
4407 
4408     // reply = !err ? ATOM(ok) : ecl_make_error(bp->env, err);
4409 
4410     if(enif_equal_tids(bp->tid, enif_thread_self()))
4411        s_env = bp->s_env;
4412     else
4413        s_env = 0;
4414 
4415     reply = ATOM(ok);
4416     res = enif_send(s_env, &bp->sender, bp->r_env,
4417 		    enif_make_tuple3(bp->r_env,
4418 				     ATOM(cl_async),
4419 				     bp->ref,
4420 				     reply));
4421     DBG("ecl_build_notify: send r=%d", res);
4422     enif_free_env(bp->r_env);
4423     if (bp->program)
4424 	enif_release_resource(bp->program);
4425     enif_free(bp);
4426 }
4427 
4428 
ecl_async_build_program(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])4429 static ERL_NIF_TERM ecl_async_build_program(ErlNifEnv* env, int argc,
4430 					    const ERL_NIF_TERM argv[])
4431 {
4432     ecl_object_t*    o_program;
4433     cl_device_id     device_list[MAX_DEVICES];
4434     cl_uint          num_devices = MAX_DEVICES;
4435     char             options[MAX_OPTION_LIST];
4436     ERL_NIF_TERM     ref;
4437     ecl_build_data_t* bp;
4438     cl_int           err;
4439     UNUSED(argc);
4440 
4441     if (!get_ecl_object(env, argv[0], &program_r, false, &o_program))
4442 	return enif_make_badarg(env);
4443     if (!get_object_list(env, argv[1], &device_r, false,
4444 			 (void**) device_list, &num_devices))
4445 	return enif_make_badarg(env);
4446     if (!enif_get_string(env, argv[2], options, sizeof(options),ERL_NIF_LATIN1))
4447 	return enif_make_badarg(env);
4448     if (!(bp = enif_alloc(sizeof(ecl_build_data_t))))
4449 	return ecl_make_error(env, CL_OUT_OF_RESOURCES);  // enomem?
4450 
4451     if (!(bp->r_env = enif_alloc_env())) {
4452 	enif_free(bp);
4453 	return ecl_make_error(env, CL_OUT_OF_RESOURCES);  // enomem?
4454     }
4455     ref = enif_make_ref(env);
4456     (void) enif_self(env, &bp->sender);
4457     bp->ref    = enif_make_copy(bp->r_env, ref);
4458     bp->program = o_program;
4459     bp->s_env = env;
4460     bp->tid = enif_thread_self();
4461     enif_keep_resource(o_program);    // keep while operation is running
4462 
4463     err = ECL_CALL(clBuildProgram)(o_program->program,
4464 				   num_devices,
4465 				   device_list,
4466 				   (const char*) options,
4467 				   ecl_build_notify,
4468 				   bp);
4469     DBG("ecl_async_build_program: err=%d user_data=%p", err, bp);
4470 
4471     if ((err==CL_SUCCESS) ||
4472 	// This should not be returned, it is not according to spec!!!!
4473 	(err==CL_BUILD_PROGRAM_FAILURE))
4474 	return enif_make_tuple2(env, ATOM(ok), ref);
4475     else {
4476         enif_free_env(bp->r_env);
4477 	enif_release_resource(bp->program);
4478 	enif_free(bp);
4479 	return ecl_make_error(env, err);
4480     }
4481 }
4482 
4483 #if CL_VERSION_1_2 == 1
ecl_unload_platform_compiler(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])4484 static ERL_NIF_TERM ecl_unload_platform_compiler(ErlNifEnv* env, int argc,
4485 						 const ERL_NIF_TERM argv[])
4486 {
4487     cl_int err;
4488     cl_platform_id   platform;
4489     ecl_env_t* ecl = enif_priv_data(env);
4490     UNUSED(argc);
4491 
4492     if(ecl->icd_version < 12)
4493 	return ecl_make_error(env, CL_INVALID_OPERATION);
4494     if (!get_object(env, argv[0], &platform_r, true,(void**)&platform))
4495 	return enif_make_badarg(env);
4496     err = ECL_CALL(clUnloadPlatformCompiler)(platform);
4497     // err = eclUnloadPlatformCompiler(platform);
4498     if (err)
4499 	return ecl_make_error(env, err);
4500     return ATOM(ok);
4501 }
4502 #endif
4503 
4504 #if CL_VERSION_1_2 == 1
4505 // -spec compile_program(Program::cl_program(),
4506 //		      DeviceList::[cl_device_id()],
4507 //		      Options::string(),
4508 //		      Headers::[cl_program()],
4509 //		      Names::[string()]) ->
4510 //    'ok' | {'error', cl_error()}.
4511 
4512 #define MAX_HEADERS 128
4513 
ecl_async_compile_program(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])4514 static ERL_NIF_TERM ecl_async_compile_program(ErlNifEnv* env, int argc,
4515 					      const ERL_NIF_TERM argv[])
4516 {
4517     ecl_object_t*    o_program;
4518     cl_device_id     device_list[MAX_DEVICES];
4519     cl_uint          num_devices = MAX_DEVICES;
4520     char             options[MAX_OPTION_LIST];
4521     cl_uint          num_input_headers = MAX_HEADERS;
4522     cl_program       input_headers[MAX_HEADERS];
4523     size_t           num_header_include_names = MAX_HEADERS;
4524     char*            header_include_names[MAX_HEADERS];
4525     ERL_NIF_TERM     ref;
4526     ecl_build_data_t* bp = NULL;
4527     cl_int           err;
4528     UNUSED(argc);
4529 
4530     if (!get_ecl_object(env, argv[0], &program_r, false, &o_program))
4531 	return enif_make_badarg(env);
4532     if (!get_object_list(env, argv[1], &device_r, false,
4533 			 (void**) device_list, &num_devices))
4534 	return enif_make_badarg(env);
4535     if (!enif_get_string(env, argv[2], options, sizeof(options),ERL_NIF_LATIN1))
4536 	return enif_make_badarg(env);
4537     if (!get_object_list(env, argv[3], &program_r, false,
4538 			 (void**) input_headers, &num_input_headers))
4539 	return enif_make_badarg(env);
4540     num_header_include_names = num_input_headers;
4541     if (!get_string_list(env, argv[4], header_include_names,
4542 			 &num_header_include_names))
4543 	return enif_make_badarg(env);
4544 
4545     if (!(bp = enif_alloc(sizeof(ecl_build_data_t)))) {
4546 	err =  CL_OUT_OF_RESOURCES;
4547 	goto error;
4548     }
4549     if (!(bp->r_env = enif_alloc_env())) {
4550 	err =  CL_OUT_OF_RESOURCES;
4551 	goto error;
4552     }
4553 
4554     ref = enif_make_ref(env);
4555     (void) enif_self(env, &bp->sender);
4556     bp->ref    = enif_make_copy(bp->r_env, ref);
4557     bp->program = o_program;
4558     bp->s_env = env;
4559     bp->tid = enif_thread_self();
4560     enif_keep_resource(o_program);    // keep while operation is running
4561 
4562     DBG("ecl_async_compile_program: program: %p, num_input_headers: %d, bp=%p",
4563 	o_program->program, num_input_headers, bp);
4564 
4565     err = ECL_CALL(clCompileProgram)(o_program->program,
4566 				     num_devices,
4567 				     device_list,
4568 				     (const char*) options,
4569 				     num_input_headers,
4570 				     num_input_headers ? input_headers : NULL,
4571 				     num_input_headers ?
4572 				     (const char**)header_include_names : NULL,
4573 				     ecl_build_notify,
4574 				     bp);
4575     DBG("ecl_async_compile_program: err=%d user_data=%p", err, bp);
4576 
4577     if ((err==CL_SUCCESS) || (err==CL_BUILD_PROGRAM_FAILURE)) {
4578 	// check if we need to save this until complete!
4579 	free_string_list(header_include_names, num_header_include_names);
4580 	return enif_make_tuple2(env, ATOM(ok), ref);
4581     }
4582 
4583 error:
4584     free_string_list(header_include_names, num_header_include_names);
4585     if (bp) {
4586 	if (bp->program) enif_release_resource(bp->program);
4587 	if (bp->r_env) enif_free_env(bp->r_env);
4588 	enif_free(bp);
4589     }
4590     return ecl_make_error(env, err);
4591 }
4592 #endif
4593 
4594 #if CL_VERSION_1_2 == 1
4595 // -spec link_program(Context::cl_context(),
4596 //		   DeviceList::[cl_device_id()],
4597 //		   Options::string(),
4598 //		   Programs::[cl_program()]) ->
4599 //    {'ok',cl_program()} | {'error', cl_error()}.
4600 
4601 #define MAX_INPUT_PROGRAMS 128
4602 
ecl_async_link_program(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])4603 static ERL_NIF_TERM ecl_async_link_program(ErlNifEnv* env, int argc,
4604 					   const ERL_NIF_TERM argv[])
4605 {
4606     ecl_object_t*    o_context;
4607     cl_program       program;
4608     cl_device_id     device_list[MAX_DEVICES];
4609     cl_uint          num_devices = MAX_DEVICES;
4610     char             options[MAX_OPTION_LIST];
4611     cl_uint          num_input_programs = MAX_INPUT_PROGRAMS;
4612     cl_program       input_programs[MAX_INPUT_PROGRAMS];
4613     ERL_NIF_TERM     ref;
4614     ERL_NIF_TERM     prog;
4615     ecl_build_data_t* bp;
4616     cl_int           err;
4617     UNUSED(argc);
4618 
4619     if (!get_ecl_object(env, argv[0], &context_r, false, &o_context))
4620 	return enif_make_badarg(env);
4621     if (!get_object_list(env, argv[1], &device_r, false,
4622 			 (void**) device_list, &num_devices))
4623 	return enif_make_badarg(env);
4624     if (!enif_get_string(env, argv[2], options, sizeof(options),ERL_NIF_LATIN1))
4625 	return enif_make_badarg(env);
4626     if (!get_object_list(env, argv[3], &program_r, false,
4627 			 (void**) input_programs, &num_input_programs))
4628 	return enif_make_badarg(env);
4629 
4630     if (!(bp = enif_alloc(sizeof(ecl_build_data_t))))
4631 	return ecl_make_error(env, CL_OUT_OF_RESOURCES);
4632     if (!(bp->r_env = enif_alloc_env())) {
4633 	enif_free(bp);
4634 	return ecl_make_error(env, CL_OUT_OF_RESOURCES);  // enomem?
4635     }
4636 
4637     ref = enif_make_ref(env);
4638     (void) enif_self(env, &bp->sender);
4639     bp->ref    = enif_make_copy(bp->r_env, ref);
4640     bp->program = NULL;
4641     bp->s_env = env;
4642     bp->tid = enif_thread_self();
4643 
4644     DBG("ecl_async_link_program: context: %p, num_input_programs %d, bp=%p",
4645 	o_context->context, num_input_programs, bp);
4646 
4647     // lock callback inorder avoid race?
4648     program = ECL_CALL(clLinkProgram)(o_context->context,
4649 				      num_devices,
4650 				      num_devices ? device_list : NULL,
4651 				      (const char*) options,
4652 				      num_input_programs,
4653 				      input_programs,
4654 			    ecl_build_notify,
4655 				      bp,
4656 				      &err);
4657     DBG("ecl_async_link_program: err=%d program %p, user_data=%p",
4658 	err, program, bp);
4659 
4660     if (program == NULL) {
4661 	enif_free_env(bp->r_env);
4662 	enif_free(bp);
4663 	return ecl_make_error(env, err);
4664     }
4665     prog = ecl_make_object(env, &program_r,(void*) program, o_context);
4666     return enif_make_tuple2(env, ATOM(ok),
4667 			    enif_make_tuple2(env, ref, prog));
4668 }
4669 
4670 #endif
4671 
ecl_unload_compiler(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])4672 static ERL_NIF_TERM ecl_unload_compiler(ErlNifEnv* env, int argc,
4673 					const ERL_NIF_TERM argv[])
4674 {
4675     cl_int err;
4676     ecl_env_t* ecl = enif_priv_data(env);
4677 
4678     UNUSED(argc);
4679     UNUSED(argv);
4680 
4681     if (ecl->icd_version >= 12) {
4682 	ecl_env_t* ecl = enif_priv_data(env);
4683 	cl_platform_id platform;
4684 	if (ecl->nplatforms <= 0)
4685 	    return ecl_make_error(env, CL_INVALID_VALUE);
4686 	platform = (cl_platform_id) ecl->platform[0].o_platform->opaque;
4687 	err = ECL_CALL(clUnloadPlatformCompiler)(platform);
4688 	// err = eclUnloadPlatformCompiler(platform);
4689     } else {
4690 	err = ECL_CALL(clUnloadCompiler)();
4691     }
4692     if (err)
4693 	return ecl_make_error(env, err);
4694     return ATOM(ok);
4695 }
4696 
4697 // Special (workaround) for checking if program may have binaries
program_may_have_binaries(cl_program program)4698 static int program_may_have_binaries(cl_program program)
4699 {
4700     cl_int num_devices;
4701     size_t returned_size;
4702     cl_device_id devices[MAX_DEVICES];
4703     int i;
4704 
4705     if (ECL_CALL(clGetProgramInfo)
4706 	(program,
4707 	 CL_PROGRAM_NUM_DEVICES,
4708 	 sizeof(num_devices),
4709 	 &num_devices,
4710 	 &returned_size) != CL_SUCCESS)
4711 	return 0;
4712 
4713     if (ECL_CALL(clGetProgramInfo)(program, CL_PROGRAM_DEVICES,
4714 				   num_devices*sizeof(cl_device_id),
4715 				   devices, NULL) != CL_SUCCESS)
4716 	return 0;
4717 
4718     for (i = 0; i < num_devices; i++) {
4719 	cl_build_status build_status = CL_BUILD_NONE;
4720         if (ECL_CALL(clGetProgramBuildInfo)
4721 	    (program, devices[i], CL_PROGRAM_BUILD_STATUS,
4722 	     sizeof(build_status),
4723 	     &build_status, NULL) != CL_SUCCESS)
4724 	    return 0;
4725 	if (build_status != CL_BUILD_SUCCESS) return 0;
4726     }
4727     return 1;
4728 }
4729 
4730 // Special util to extract program binary_sizes
make_program_binary_sizes(ErlNifEnv * env,cl_program program)4731 static ERL_NIF_TERM make_program_binary_sizes(ErlNifEnv* env,
4732 					      cl_program program)
4733 {
4734     cl_int err;
4735     ERL_NIF_TERM list;
4736     size_t returned_size;
4737     cl_uint num_devices;
4738     size_t size[MAX_DEVICES];
4739     int i;
4740 
4741     memset(size, 0,     sizeof(size));
4742 
4743     if ((err = ECL_CALL(clGetProgramInfo)
4744 	 (program,
4745 	  CL_PROGRAM_NUM_DEVICES,
4746 	  sizeof(num_devices),
4747 	  &num_devices,
4748 	  &returned_size)))
4749 	return ecl_make_error(env, err);
4750 
4751     if (program_may_have_binaries(program)) {
4752 	if ((err = ECL_CALL(clGetProgramInfo)
4753 	     (program,
4754 	      CL_PROGRAM_BINARY_SIZES,
4755 	      num_devices*sizeof(size_t),
4756 	      &size[0],
4757 	      &returned_size)))
4758 	    return ecl_make_error(env, err);
4759     }
4760     list = enif_make_list(env, 0);
4761     for (i = num_devices-1; i >= 0; i--) {
4762 	ERL_NIF_TERM elem = ecl_make_sizet(env, size[i]);
4763 	list = enif_make_list_cell(env, elem, list);
4764     }
4765     return enif_make_tuple2(env, ATOM(ok), list);
4766 }
4767 
4768 
4769 // Special util to extract program binaries
make_program_binaries(ErlNifEnv * env,cl_program program)4770 static ERL_NIF_TERM make_program_binaries(ErlNifEnv* env, cl_program program)
4771 {
4772     cl_int err;
4773     ERL_NIF_TERM list;
4774     size_t returned_size;
4775     cl_uint num_devices;
4776     int i;
4777 
4778     if ((err = ECL_CALL(clGetProgramInfo)
4779 	 (program,
4780 	  CL_PROGRAM_NUM_DEVICES,
4781 	  sizeof(num_devices),
4782 	  &num_devices,
4783 	  &returned_size)))
4784 	return ecl_make_error(env, err);
4785 
4786     if (!program_may_have_binaries(program)) {
4787 	ErlNifBinary empty;
4788 	enif_alloc_binary(0, &empty);
4789 
4790 	list = enif_make_list(env, 0);
4791 	for (i = num_devices-1; i >= 0; i--) {
4792 	    ERL_NIF_TERM elem;
4793 	    elem = enif_make_binary(env, &empty);
4794 	    list = enif_make_list_cell(env, elem, list);
4795 	}
4796 	enif_release_binary(&empty);
4797 	return enif_make_tuple2(env, ATOM(ok), list);
4798     }
4799     else {
4800 	size_t size[MAX_DEVICES];
4801 	ErlNifBinary binary[MAX_DEVICES];
4802 	unsigned char* data[MAX_DEVICES];
4803 
4804 	memset(size, 0,     sizeof(size));
4805 	memset(binary, 0,   sizeof(binary));
4806 
4807 	if ((err = ECL_CALL(clGetProgramInfo)
4808 	     (program,
4809 	      CL_PROGRAM_BINARY_SIZES,
4810 	      num_devices*sizeof(size_t),
4811 	      &size[0],
4812 	      &returned_size)))
4813 	    return ecl_make_error(env, err);
4814 	i = 0;
4815 	while (i < (int) num_devices) {
4816 	    if (!enif_alloc_binary(size[i], &binary[i])) {
4817 		err = CL_OUT_OF_HOST_MEMORY;
4818 		goto cleanup;
4819 	    }
4820 	    data[i] = binary[i].data;
4821 	    i++;
4822 	}
4823 	if ((err = ECL_CALL(clGetProgramInfo)
4824 	     (program,
4825 	      CL_PROGRAM_BINARIES,
4826 	      sizeof(unsigned char*)*num_devices,
4827 	      data,
4828 	      &returned_size)))
4829 	    goto cleanup;
4830 
4831 	list = enif_make_list(env, 0);
4832 	for (i = num_devices-1; i >= 0; i--) {
4833 	    ERL_NIF_TERM elem = enif_make_binary(env, &binary[i]);
4834 	    list = enif_make_list_cell(env, elem, list);
4835 	}
4836 	return enif_make_tuple2(env, ATOM(ok), list);
4837 
4838     cleanup:
4839 	while(i > 0) {
4840 	    i--;
4841 	    enif_release_binary(&binary[i]);
4842 	}
4843 	return ecl_make_error(env, err);
4844     }
4845 }
4846 
ecl_get_program_info(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])4847 static ERL_NIF_TERM ecl_get_program_info(ErlNifEnv* env, int argc,
4848 					 const ERL_NIF_TERM argv[])
4849 {
4850     ecl_object_t* o_program;
4851     UNUSED(argc);
4852 
4853     if (!get_ecl_object(env, argv[0], &program_r, false, &o_program))
4854 	return enif_make_badarg(env);
4855 
4856     if (argv[1] == ATOM(binaries))
4857 	return make_program_binaries(env, o_program->program);
4858     else if (argv[1] == ATOM(binary_sizes))
4859 	return make_program_binary_sizes(env, o_program->program);
4860     else
4861 	return make_object_info(env, argv[1], o_program,
4862 				(info_fn_t*) ECL_FUNC_PTR(clGetProgramInfo),
4863 				program_info,
4864 				sizeof_array(program_info));
4865 }
4866 
ecl_get_program_build_info(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])4867 static ERL_NIF_TERM ecl_get_program_build_info(ErlNifEnv* env, int argc,
4868 					       const ERL_NIF_TERM argv[])
4869 {
4870     ecl_object_t* o_program;
4871     ecl_object_t* o_device;
4872     UNUSED(argc);
4873 
4874     if (!get_ecl_object(env, argv[0], &program_r, false, &o_program))
4875 	return enif_make_badarg(env);
4876     if (!get_ecl_object(env, argv[1], &device_r, false, &o_device))
4877 	return enif_make_badarg(env);
4878     return make_object_info2(env, argv[2], o_program, o_device->opaque,
4879 			     (info2_fn_t*) ECL_FUNC_PTR(clGetProgramBuildInfo),
4880 			     build_info,
4881 			     sizeof_array(build_info));
4882 }
4883 
4884 
ecl_create_kernel(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])4885 static ERL_NIF_TERM ecl_create_kernel(ErlNifEnv* env, int argc,
4886 				      const ERL_NIF_TERM argv[])
4887 {
4888     ecl_object_t* o_program;
4889     cl_kernel kernel;
4890     char kernel_name[MAX_KERNEL_NAME];
4891     cl_int err;
4892     UNUSED(argc);
4893 
4894     if (!get_ecl_object(env, argv[0], &program_r, false, &o_program))
4895 	return enif_make_badarg(env);
4896     if (!enif_get_string(env, argv[1], kernel_name, sizeof(kernel_name),
4897 			 ERL_NIF_LATIN1))
4898 	return enif_make_badarg(env);
4899 
4900     kernel = ECL_CALL(clCreateKernel)(o_program->program,kernel_name, &err);
4901     if (!err) {
4902 	ERL_NIF_TERM t;
4903 	t = ecl_make_kernel(env, kernel, o_program);
4904 	return enif_make_tuple2(env, ATOM(ok), t);
4905     }
4906     return ecl_make_error(env, err);
4907 }
4908 
4909 
4910 //
4911 // @spec create_kernels_in_program(Program::cl_program()) ->
4912 //    {'ok', [cl_kernel()]} | {'error', cl_error()}
4913 //
ecl_create_kernels_in_program(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])4914 static ERL_NIF_TERM ecl_create_kernels_in_program(ErlNifEnv* env, int argc,
4915 						  const ERL_NIF_TERM argv[])
4916 {
4917     ecl_object_t* o_program;
4918     ERL_NIF_TERM kernv[MAX_KERNELS];
4919     ERL_NIF_TERM kernel_list;
4920     cl_kernel kernel[MAX_KERNELS];
4921     cl_uint num_kernels_ret;
4922     cl_uint i;
4923     cl_int err;
4924     UNUSED(argc);
4925 
4926     if (!get_ecl_object(env, argv[0], &program_r, false, &o_program))
4927 	return enif_make_badarg(env);
4928 
4929     err = ECL_CALL(clCreateKernelsInProgram)
4930 	(o_program->program,
4931 	 MAX_KERNELS,
4932 	 kernel,
4933 	 &num_kernels_ret);
4934     if (err)
4935 	return ecl_make_error(env, err);
4936     for (i = 0; i < num_kernels_ret; i++) {
4937 	// FIXME: handle out of memory
4938 	kernv[i] = ecl_make_kernel(env, kernel[i], o_program);
4939     }
4940     kernel_list = enif_make_list_from_array(env, kernv, num_kernels_ret);
4941     return enif_make_tuple2(env, ATOM(ok), kernel_list);
4942 }
4943 
4944 
4945 //
4946 // cl:set_kernel_arg(Kernel::cl_kernel(), Index::non_neg_integer(),
4947 //                   Argument::cl_kernel_arg()) ->
4948 // {Type,Value}
4949 // {'size',Value}
4950 // {ecl_object,Handle,<<Res>>}   object (special for sampler)
4951 // integer()   ==  {'int', Value}
4952 // float()     ==  {'float', Value}
4953 // list        ==  Raw data
4954 // binary      ==  Raw data
4955 //
ecl_set_kernel_arg(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])4956 static ERL_NIF_TERM ecl_set_kernel_arg(ErlNifEnv* env, int argc,
4957 				       const ERL_NIF_TERM argv[])
4958 {
4959     ecl_kernel_t* o_kernel;
4960     unsigned char arg_buf[16*sizeof(double)]; // vector type buffer
4961     cl_uint arg_index;
4962     size_t  arg_size;
4963     void*   arg_value;
4964     const ERL_NIF_TERM* array;
4965     double   fval;
4966     int      ival;
4967     long     lval;
4968     unsigned long luval;
4969     size_t   sval;
4970     ErlNifUInt64 u64val;
4971     ErlNifSInt64 i64val;
4972     ErlNifBinary bval;
4973     cl_int   int_arg;
4974     cl_float float_arg;
4975     void*    ptr_arg = 0;
4976     int      arity;
4977     cl_int   err;
4978     int      arg_type = KERNEL_ARG_OTHER;
4979     UNUSED(argc);
4980 
4981     if (!get_ecl_object(env,argv[0],&kernel_r,false,(ecl_object_t**)&o_kernel))
4982 	return enif_make_badarg(env);
4983     if (!enif_get_uint(env, argv[1], &arg_index))
4984 	return enif_make_badarg(env);
4985     if (enif_get_tuple(env, argv[2], &arity, &array)) {
4986 	if (arity == 3) {
4987 	    if (array[0] == ATOM(mem_t)) {
4988 		if (!get_object(env,argv[2],&mem_r,true,&ptr_arg))
4989 		    return enif_make_badarg(env);
4990 		arg_type = KERNEL_ARG_MEM;
4991 		arg_value = &ptr_arg;
4992 		arg_size = sizeof(cl_mem);
4993 		goto do_kernel_arg;
4994 	    }
4995 	    else if (array[0] == ATOM(sampler_t)) {
4996 		if (!get_object(env,argv[2],&sampler_r,false,&ptr_arg))
4997 		    return enif_make_badarg(env);
4998 		arg_type = KERNEL_ARG_SAMPLER;
4999 		arg_value = &ptr_arg;
5000 		arg_size = sizeof(cl_sampler);
5001 		goto do_kernel_arg;
5002 	    }
5003 	    return enif_make_badarg(env);
5004 	}
5005 	else if (arity == 2) {
5006 	    cl_uint typen;
5007 	    ocl_type_t base_type;
5008 	    size_t     base_size;
5009 	    int       vec_size;
5010 	    int value_arity;
5011 	    const ERL_NIF_TERM* values;
5012 	    unsigned char* ptr = arg_buf;
5013 	    int i;
5014 
5015 	    if (!get_enum(env, array[0], &typen, kv_cl_type))
5016 		return enif_make_badarg(env);
5017 	    vec_size = typen >> 16;
5018 	    base_type = typen & 0xFFFF;
5019 	    base_size = ecl_sizeof(base_type);
5020 	    if ((vec_size == 1) && !enif_is_tuple(env, array[1])) {
5021 		value_arity = 1;
5022 		values = &array[1];
5023 	    }
5024 	    else if (!enif_get_tuple(env, array[1], &value_arity, &values))
5025 		return enif_make_badarg(env);
5026 	    if (value_arity != vec_size)
5027 		return enif_make_badarg(env);
5028 	    for (i = 0; i < vec_size; i++) {
5029 		switch(base_type) {
5030 		case OCL_CHAR:
5031 		    if (!enif_get_long(env, values[i], &lval))
5032 			return enif_make_badarg(env);
5033 		    *((cl_char*)ptr) = (cl_char) lval;
5034 		    break;
5035 		case OCL_UCHAR:
5036 		    if (!enif_get_ulong(env, values[i], &luval))
5037 			return enif_make_badarg(env);
5038 		    *((cl_uchar*)ptr) = (cl_uchar) luval;
5039 		    break;
5040 		case OCL_SHORT:
5041 		    if (!enif_get_long(env, values[i], &lval))
5042 			return enif_make_badarg(env);
5043 		    *((cl_short*)ptr) = (cl_short) lval;
5044 		    break;
5045 		case OCL_USHORT:
5046 		    if (!enif_get_ulong(env, values[i], &luval))
5047 			return enif_make_badarg(env);
5048 		    *((cl_ushort*)ptr) = (cl_ushort) luval;
5049 		    break;
5050 		case OCL_INT:
5051 		    if (!enif_get_long(env, values[i], &lval))
5052 			return enif_make_badarg(env);
5053 		    *((cl_int*)ptr) = (cl_int) lval;
5054 		    break;
5055 		case OCL_UINT:
5056 		    if (!enif_get_ulong(env, values[i], &luval))
5057 			return enif_make_badarg(env);
5058 		    *((cl_uint*)ptr) = (cl_uint) luval;
5059 		    break;
5060 		case OCL_LONG:
5061 		    if (!enif_get_int64(env, values[i], &i64val))
5062 			return enif_make_badarg(env);
5063 		    *((cl_long*)ptr) = i64val;
5064 		    break;
5065 		case OCL_ULONG:
5066 		    if (!enif_get_uint64(env, values[i], &u64val))
5067 			return enif_make_badarg(env);
5068 		    *((cl_ulong*)ptr) = u64val;
5069 		    break;
5070 		case OCL_HALF:
5071 		    if (!enif_get_ulong(env, values[i], &luval))
5072 			return enif_make_badarg(env);
5073 		    *((cl_half*)ptr) = (cl_half) luval;
5074 		    break;
5075 		case OCL_FLOAT:
5076 		    if (!enif_get_double(env, values[i], &fval))
5077 			return enif_make_badarg(env);
5078 		    *((cl_float*)ptr) = (cl_float) fval;
5079 		    break;
5080 
5081 		case OCL_DOUBLE:
5082 		    if (!enif_get_double(env, values[i], &fval))
5083 			return enif_make_badarg(env);
5084 		    *((cl_double*)ptr) = fval;
5085 		    break;
5086 		case OCL_SIZE:
5087 		    if (!ecl_get_sizet(env, values[i], &sval))
5088 			return enif_make_badarg(env);
5089 		    *((size_t*)ptr) = sval;
5090 		    break;
5091 		case OCL_BOOL:
5092 		case OCL_STRING:
5093 		case OCL_ENUM:
5094 		case OCL_BITFIELD:
5095 		case OCL_POINTER:
5096 		case OCL_PLATFORM:
5097 		case OCL_DEVICE:
5098 		case OCL_CONTEXT:
5099 		case OCL_PROGRAM:
5100 		case OCL_COMMAND_QUEUE:
5101 		case OCL_IMAGE_FORMAT:
5102 		case OCL_DEVICE_PARTITION:
5103 		case OCL_NUM_TYPES:
5104 		default:
5105 		    return enif_make_badarg(env);
5106 		}
5107 		ptr += base_size;
5108 	    }
5109 	    arg_value = arg_buf;
5110 	    arg_size  = base_size*vec_size;
5111 	    goto do_kernel_arg;
5112 	}
5113 	return enif_make_badarg(env);
5114     }
5115     else if (enif_get_int(env, argv[2], &ival)) {
5116 	int_arg = ival;
5117 	arg_value = &int_arg;
5118 	arg_size = sizeof(int_arg);
5119 	goto do_kernel_arg;
5120     }
5121     else if (enif_get_double(env, argv[2], &fval)) {
5122 	float_arg = (float) fval;
5123 	arg_value = &float_arg;
5124 	arg_size = sizeof(float_arg);
5125 	goto do_kernel_arg;
5126     }
5127     else if (enif_inspect_iolist_as_binary(env, argv[2], &bval)) {
5128 	// rule your own case
5129 	arg_value = bval.data;
5130 	arg_size  = bval.size;
5131 	goto do_kernel_arg;
5132     }
5133     return enif_make_badarg(env);
5134 
5135 do_kernel_arg:
5136     err = ECL_CALL(clSetKernelArg)
5137 	(o_kernel->obj.kernel,
5138 	 arg_index,
5139 	 arg_size,
5140 	 arg_value);
5141     if (!err) {
5142 	set_kernel_arg(o_kernel, arg_index, arg_type, ptr_arg);
5143 	return ATOM(ok);
5144     }
5145     return ecl_make_error(env, err);
5146 }
5147 
5148 // cl:set_kernel_arg_size(Kernel::cl_kernel(), Index::non_neg_integer(),
5149 //                        Size::non_neg_integer()) ->
5150 //    'ok' | {'error', cl_error()}
5151 //
5152 // cl special to set kernel arg with size only (local mem etc)
5153 //
ecl_set_kernel_arg_size(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])5154 static ERL_NIF_TERM ecl_set_kernel_arg_size(ErlNifEnv* env, int argc,
5155 					    const ERL_NIF_TERM argv[])
5156 {
5157     ecl_kernel_t* o_kernel;
5158     cl_uint arg_index;
5159     size_t  arg_size;
5160     unsigned char* arg_value = 0;
5161     cl_int  err;
5162     UNUSED(argc);
5163 
5164     if (!get_ecl_object(env,argv[0],&kernel_r,false,(ecl_object_t**)&o_kernel))
5165 	return enif_make_badarg(env);
5166     if (!enif_get_uint(env, argv[1], &arg_index))
5167 	return enif_make_badarg(env);
5168     if (!ecl_get_sizet(env, argv[2], &arg_size))
5169 	return enif_make_badarg(env);
5170 
5171     err = ECL_CALL(clSetKernelArg)
5172 	(o_kernel->obj.kernel,
5173 	 arg_index,
5174 	 arg_size,
5175 	 arg_value);
5176     if (!err) {
5177 	set_kernel_arg(o_kernel, arg_index, KERNEL_ARG_OTHER, (void*) 0);
5178 	return ATOM(ok);
5179     }
5180     return ecl_make_error(env, err);
5181 
5182 }
5183 
ecl_get_kernel_info(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])5184 static ERL_NIF_TERM ecl_get_kernel_info(ErlNifEnv* env, int argc,
5185 					const ERL_NIF_TERM argv[])
5186 {
5187     ecl_object_t* o_kernel;
5188     UNUSED(argc);
5189 
5190     if (!get_ecl_object(env, argv[0], &kernel_r, false, &o_kernel))
5191 	return enif_make_badarg(env);
5192     return make_object_info(env, argv[1], o_kernel,
5193 			    (info_fn_t*) ECL_FUNC_PTR(clGetKernelInfo),
5194 			    kernel_info,
5195 			    sizeof_array(kernel_info));
5196 }
5197 
ecl_get_kernel_workgroup_info(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])5198 static ERL_NIF_TERM ecl_get_kernel_workgroup_info(ErlNifEnv* env, int argc,
5199 						  const ERL_NIF_TERM argv[])
5200 {
5201     ecl_object_t* o_kernel;
5202     ecl_object_t* o_device;
5203     UNUSED(argc);
5204 
5205     if (!get_ecl_object(env, argv[0], &kernel_r, false, &o_kernel))
5206 	return enif_make_badarg(env);
5207     if (!get_ecl_object(env, argv[1], &device_r, false, &o_device))
5208 	return enif_make_badarg(env);
5209     return make_object_info2(env, argv[2], o_kernel, o_device->opaque,
5210 			     (info2_fn_t*) ECL_FUNC_PTR(clGetKernelWorkGroupInfo),
5211 			     workgroup_info,
5212 			     sizeof_array(workgroup_info));
5213 }
5214 
5215 #if CL_VERSION_1_2 == 1
ecl_get_kernel_arg_info(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])5216 static ERL_NIF_TERM ecl_get_kernel_arg_info(ErlNifEnv* env, int argc,
5217 					    const ERL_NIF_TERM argv[])
5218 {
5219     ecl_object_t* o_kernel;
5220     cl_uint arg_index;
5221     UNUSED(argc);
5222 
5223     if (!get_ecl_object(env, argv[0], &kernel_r, false, &o_kernel))
5224 	return enif_make_badarg(env);
5225     if (!enif_get_uint(env, argv[1], &arg_index))
5226 	return enif_make_badarg(env);
5227     return make_object_info2(env, argv[2], o_kernel,
5228 			     (void*) (size_t) arg_index,
5229 			     (info2_fn_t*) ECL_FUNC_PTR(clGetKernelArgInfo),
5230 			     arg_info,
5231 			     sizeof_array(arg_info));
5232 }
5233 #endif
5234 
5235 //
5236 // cl:enqueue_task(Queue::cl_queue(), Kernel::cl_kernel(),
5237 //                   WaitList::[cl_event()], WantEvent::boolean()) ->
5238 //    'ok' | {'ok', cl_event()} | {'error', cl_error()}
5239 //
ecl_enqueue_task(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])5240 static ERL_NIF_TERM ecl_enqueue_task(ErlNifEnv* env, int argc,
5241 				     const ERL_NIF_TERM argv[])
5242 {
5243     ecl_object_t*    o_queue;
5244     cl_kernel        kernel;
5245     cl_event         wait_list[MAX_WAIT_LIST];
5246     cl_uint          num_events = MAX_WAIT_LIST;
5247     cl_event         event;
5248     cl_int           err;
5249     cl_bool          want_event;
5250     UNUSED(argc);
5251 
5252     if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
5253 	return enif_make_badarg(env);
5254     if (!get_object(env, argv[1], &kernel_r, false,(void**)&kernel))
5255 	return enif_make_badarg(env);
5256     if (!get_object_list(env, argv[2], &event_r, false,
5257 			 (void**) wait_list, &num_events))
5258 	return enif_make_badarg(env);
5259     if (!get_bool(env, argv[3], &want_event))
5260 	return enif_make_badarg(env);
5261 
5262     err = ECL_CALL(clEnqueueTask)
5263 	(o_queue->queue,
5264 	 kernel,
5265 	 num_events,
5266 	 num_events ? wait_list : NULL,
5267 	 want_event ? &event : NULL);
5268     if (!err) {
5269 	if (want_event) {
5270 	    ERL_NIF_TERM t;
5271 	    t = ecl_make_event(env, event, false, false, 0, 0, o_queue);
5272 	    return enif_make_tuple2(env, ATOM(ok), t);
5273 	}
5274 	return ATOM(ok);
5275     }
5276     return ecl_make_error(env, err);
5277 }
5278 //
5279 // cl:enqueue_nd_range_kernel(Queue::cl_queue(), Kernel::cl_kernel(),
5280 //                            Global::[non_neg_integer()],
5281 //                            Local::[non_neg_integer()],
5282 //                            WaitList::[cl_event()], WantEvent::boolean()) ->
5283 //    'ok' | {'ok', cl_event()} | {'error', cl_error()}
5284 //
ecl_enqueue_nd_range_kernel(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])5285 static ERL_NIF_TERM ecl_enqueue_nd_range_kernel(ErlNifEnv* env, int argc,
5286 						const ERL_NIF_TERM argv[])
5287 {
5288     ecl_object_t*    o_queue;
5289     cl_kernel     kernel;
5290     cl_event      wait_list[MAX_WAIT_LIST];
5291     cl_uint       num_events = MAX_WAIT_LIST;
5292     size_t        global_work_size[MAX_WORK_SIZE];
5293     size_t        local_work_size[MAX_WORK_SIZE];
5294     size_t        work_dim = MAX_WORK_SIZE;
5295     size_t        temp_dim = MAX_WORK_SIZE;
5296     cl_event      event;
5297     cl_int        err;
5298     cl_bool       want_event;
5299     UNUSED(argc);
5300 
5301     if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
5302 	return enif_make_badarg(env);
5303     if (!get_object(env, argv[1], &kernel_r, false, (void**) &kernel))
5304 	return enif_make_badarg(env);
5305     if (!get_sizet_list(env, argv[2], global_work_size, &work_dim))
5306 	return enif_make_badarg(env);
5307     if (!get_sizet_list(env, argv[3], local_work_size, &temp_dim))
5308 	return enif_make_badarg(env);
5309     if (!get_object_list(env, argv[4], &event_r, false,
5310 			 (void**) wait_list, &num_events))
5311 	return enif_make_badarg(env);
5312     if (!get_bool(env, argv[5], &want_event))
5313 	return enif_make_badarg(env);
5314 
5315     if (work_dim == 0) {
5316 	return enif_make_badarg(env);
5317     }
5318 
5319     if ((temp_dim > 0) && (work_dim != temp_dim)) {
5320 	return enif_make_badarg(env);
5321     }
5322 
5323     err = ECL_CALL(clEnqueueNDRangeKernel)
5324 	(o_queue->queue, kernel,
5325 	 (cl_uint) work_dim,
5326 	 0, // global_work_offset,
5327 	 global_work_size,
5328 	 temp_dim ? local_work_size : NULL,
5329 	 num_events,
5330 	 num_events ? wait_list : NULL,
5331 	 want_event ? &event : NULL);
5332     if (!err) {
5333 	if (want_event) {
5334 	    ERL_NIF_TERM t;
5335 	    t = ecl_make_event(env, event, false, false, 0, 0, o_queue);
5336 	    return enif_make_tuple2(env, ATOM(ok), t);
5337 	}
5338 	return ATOM(ok);
5339     }
5340     return ecl_make_error(env, err);
5341 }
5342 
5343 // 1.2 -> 1.1 wrapper: clEnqueueMarkerWithWaitList implement clEnqueueMarker
eclEnqueueMarker(cl_command_queue queue,cl_event * event)5344 cl_int CL_CALLBACK eclEnqueueMarker(cl_command_queue queue,
5345 			cl_event * event)
5346 {
5347     return ECL_CALL(clEnqueueMarkerWithWaitList)(queue,0, NULL,event);
5348 }
5349 
ecl_enqueue_marker(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])5350 static ERL_NIF_TERM ecl_enqueue_marker(ErlNifEnv* env, int argc,
5351 				       const ERL_NIF_TERM argv[])
5352 {
5353     ecl_object_t*    o_queue;
5354     cl_event event;
5355     cl_int err;
5356     ERL_NIF_TERM t;
5357     UNUSED(argc);
5358 
5359     if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
5360 	return enif_make_badarg(env);
5361     if (o_queue->version >= 12) {
5362 	err = eclEnqueueMarker(o_queue->queue, &event);
5363     } else { // deprecated in 1.2 available in 1.1
5364 	err = ECL_CALL(clEnqueueMarker)(o_queue->queue, &event);
5365     }
5366     if (!err) {
5367 	t = ecl_make_event(env, event, false, false, 0, 0, o_queue);
5368 	return enif_make_tuple2(env, ATOM(ok), t);
5369     }
5370     return ecl_make_error(env, err);
5371 }
5372 
5373 // 1.2 -> 1.1 wrapper: clEnqueueMarkerWithWaitList implement clEnqueueWaitForEvents
eclEnqueueWaitForEvents(cl_command_queue queue,cl_uint num_events,const cl_event * event_list)5374 cl_int CL_CALLBACK eclEnqueueWaitForEvents(cl_command_queue queue,
5375 			       cl_uint num_events,
5376 			       const cl_event * event_list)
5377 {
5378     return ECL_CALL(clEnqueueMarkerWithWaitList)(queue,
5379 						 num_events,
5380 						 num_events ? event_list : NULL,
5381 						 NULL);
5382 }
5383 
5384 //
5385 // cl:enqueue_wait_for_events(Queue::cl_queue(), WaitList::[cl_event()]) ->
5386 //    'ok' | {'error', cl_error()}
5387 //
ecl_enqueue_wait_for_events(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])5388 static ERL_NIF_TERM ecl_enqueue_wait_for_events(ErlNifEnv* env, int argc,
5389 						const ERL_NIF_TERM argv[])
5390 {
5391     ecl_object_t* o_queue;
5392     cl_event      wait_list[MAX_WAIT_LIST];
5393     cl_uint       num_events = MAX_WAIT_LIST;
5394     cl_int        err;
5395     UNUSED(argc);
5396 
5397     if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
5398 	return enif_make_badarg(env);
5399     if (!get_object_list(env, argv[1], &event_r, false,
5400 			 (void**) wait_list, &num_events))
5401 	return enif_make_badarg(env);
5402     if(o_queue->version >= 12) {
5403 	err = eclEnqueueWaitForEvents(o_queue->queue,
5404 				      num_events,
5405 				      num_events ? wait_list : NULL);
5406     } else {
5407 	err = ECL_CALL(clEnqueueWaitForEvents)(o_queue->queue,
5408 					       num_events,
5409 					       num_events ? wait_list : NULL);
5410     }
5411     if (!err)
5412 	return ATOM(ok);
5413     return ecl_make_error(env, err);
5414 }
5415 //
5416 // cl:enqueue_read_buffer(Queue::cl_queue(), Buffer::cl_mem(),
5417 //                        Offset::non_neg_integer(),
5418 //                           Size::non_neg_integer(),
5419 //                           WaitList::[cl_event()]) ->
5420 //    {'ok', cl_event()} | {'error', cl_error()}
ecl_enqueue_read_buffer(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])5421 static ERL_NIF_TERM ecl_enqueue_read_buffer(ErlNifEnv* env, int argc,
5422 					    const ERL_NIF_TERM argv[])
5423 {
5424     ecl_object_t*    o_queue;
5425     cl_mem           buffer;
5426     size_t           offset;
5427     size_t           size;
5428     cl_event         wait_list[MAX_WAIT_LIST];
5429     cl_uint          num_events = MAX_WAIT_LIST;
5430     cl_event         event;
5431     ErlNifBinary*    bin;
5432     cl_int           err;
5433     UNUSED(argc);
5434 
5435     if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
5436 	return enif_make_badarg(env);
5437     if (!get_object(env, argv[1], &mem_r, false, (void**)&buffer))
5438 	return enif_make_badarg(env);
5439     if (!ecl_get_sizet(env, argv[2], &offset))
5440 	return enif_make_badarg(env);
5441     if (!ecl_get_sizet(env, argv[3], &size))
5442 	return enif_make_badarg(env);
5443     if (!get_object_list(env, argv[4], &event_r, false,
5444 			 (void**) wait_list, &num_events))
5445 	return enif_make_badarg(env);
5446     if (!(bin = enif_alloc(sizeof(ErlNifBinary))))
5447 	return ecl_make_error(env, CL_OUT_OF_RESOURCES);  // enomem?
5448     if (!enif_alloc_binary(size, bin)) {
5449 	enif_free(bin);
5450 	return ecl_make_error(env, CL_OUT_OF_RESOURCES);  // enomem?
5451     }
5452     err = ECL_CALL(clEnqueueReadBuffer)
5453 	(o_queue->queue, buffer,
5454 	 CL_FALSE,
5455 	 offset,
5456 	 size,
5457 	 bin->data,
5458 	 num_events,
5459 	 num_events ? wait_list : 0,
5460 	 &event);
5461     if (!err) {
5462 	ERL_NIF_TERM t;
5463 	t = ecl_make_event(env, event, true, false, 0, bin, o_queue);
5464 	return enif_make_tuple2(env, ATOM(ok), t);
5465     }
5466     else {
5467 	enif_free(bin);
5468 	return ecl_make_error(env, err);
5469     }
5470 }
5471 //
5472 // cl:enqueue_write_buffer(Queue::cl_queue(), Buffer::cl_mem(),
5473 //                         Offset::non_neg_integer(),
5474 //                         Size::non_neg_integer(),
5475 //                         Data::binary(),
5476 //                         WaitList::[cl_event()],
5477 //                         WantEvent::boolean()
5478 //                       ) ->
5479 //    {'ok', cl_event()} | {'error', cl_error()}
5480 //
ecl_enqueue_write_buffer(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])5481 static ERL_NIF_TERM ecl_enqueue_write_buffer(ErlNifEnv* env, int argc,
5482 					     const ERL_NIF_TERM argv[])
5483 {
5484     ecl_object_t*    o_queue;
5485     cl_mem           buffer;
5486     size_t           offset;
5487     size_t           size;
5488     cl_event         wait_list[MAX_WAIT_LIST];
5489     cl_uint          num_events = MAX_WAIT_LIST;
5490     cl_event         event;
5491     ErlNifBinary     bin;
5492     ErlNifEnv*       bin_env;
5493     cl_int           err;
5494     cl_bool          want_event;
5495     UNUSED(argc);
5496 
5497     if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
5498 	return enif_make_badarg(env);
5499     if (!get_object(env, argv[1], &mem_r, false, (void**)&buffer))
5500 	return enif_make_badarg(env);
5501     if (!ecl_get_sizet(env, argv[2], &offset))
5502 	return enif_make_badarg(env);
5503     if (!ecl_get_sizet(env, argv[3], &size))
5504 	return enif_make_badarg(env);
5505     /*  Check argv[4] (bin) last */
5506     if (!get_object_list(env, argv[5], &event_r, false,
5507 			 (void**) wait_list, &num_events))
5508 	return enif_make_badarg(env);
5509     if (!get_bool(env, argv[6], &want_event))
5510 	return enif_make_badarg(env);
5511 
5512     if (!(bin_env = enif_alloc_env())) {  // create binary environment
5513 	return ecl_make_error(env, CL_OUT_OF_RESOURCES);  // enomem?
5514     }
5515     if (!ecl_make_binary(env, argv[4], bin_env, &bin)) {
5516 	enif_free_env(bin_env);
5517 	return enif_make_badarg(env);
5518     }
5519 
5520     // handle binary and iolist as binary
5521     if (bin.size < size) {   // FIXME: handle offset!
5522 	return enif_make_badarg(env);
5523     }
5524 
5525     err = ECL_CALL(clEnqueueWriteBuffer)(o_queue->queue, buffer,
5526 			       !want_event, // FALSE for async
5527 			       offset,
5528 			       size,
5529 			       bin.data,
5530 			       num_events,
5531 			       num_events ? wait_list : NULL,
5532 			       want_event ? &event : NULL);
5533     if (!err) {
5534 	if (want_event) {
5535 	    ERL_NIF_TERM t;
5536 	    t = ecl_make_event(env, event, false, true, bin_env, NULL, o_queue);
5537 	    return enif_make_tuple2(env, ATOM(ok), t);
5538 	} else {
5539 	    enif_free_env(bin_env);
5540 	}
5541 	return ATOM(ok);
5542     }
5543     else {
5544 	enif_free_env(bin_env);
5545 	return ecl_make_error(env, err);
5546     }
5547 }
5548 
5549 //
5550 // enqueue_read_image(_Queue, _Image, _Origin, _Region, _RowPitch, _SlicePitch,
5551 //		   _WaitList) -> {'ok',Event} | {error,Error}
5552 //
ecl_enqueue_read_image(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])5553 static ERL_NIF_TERM ecl_enqueue_read_image(ErlNifEnv* env, int argc,
5554 					   const ERL_NIF_TERM argv[])
5555 {
5556     ecl_object_t*    o_queue;
5557     cl_mem           buffer;
5558     size_t           origin[3];
5559     size_t           region[3];
5560     size_t           row_pitch;
5561     size_t           slice_pitch;
5562     cl_event         wait_list[MAX_WAIT_LIST];
5563     cl_uint          num_events = MAX_WAIT_LIST;
5564     size_t           num_origin = 3;
5565     size_t           num_region = 3;
5566     size_t           psize;
5567     size_t           size;
5568     cl_event         event;
5569     ErlNifBinary*    bin;
5570     cl_int           err;
5571     UNUSED(argc);
5572 
5573     if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
5574 	return enif_make_badarg(env);
5575     if (!get_object(env, argv[1], &mem_r, false, (void**)&buffer))
5576 	return enif_make_badarg(env);
5577     origin[0] = origin[1] = origin[2] = 0;
5578     if (!get_sizet_list(env, argv[2], origin, &num_origin))
5579 	return enif_make_badarg(env);
5580     region[0] = region[1] = region[2] = 1;
5581     if (!get_sizet_list(env, argv[3], region, &num_region))
5582 	return enif_make_badarg(env);
5583     if (!ecl_get_sizet(env, argv[4], &row_pitch))
5584 	return enif_make_badarg(env);
5585     if (!ecl_get_sizet(env, argv[5], &slice_pitch))
5586 	return enif_make_badarg(env);
5587     if (!get_object_list(env, argv[6], &event_r, false,
5588 			 (void**) wait_list, &num_events))
5589 	return enif_make_badarg(env);
5590     if (!(bin = enif_alloc(sizeof(ErlNifBinary))))
5591 	return ecl_make_error(env, CL_OUT_OF_RESOURCES);  // enomem?
5592 
5593     // calculate the read size of the image, FIXME: check error return
5594     clGetImageInfo(buffer, CL_IMAGE_ELEMENT_SIZE, sizeof(psize), &psize, 0);
5595     size = region[0]*region[1]*region[2]*psize;
5596     if (!enif_alloc_binary(size, bin)) {
5597 	enif_free(bin);
5598 	return ecl_make_error(env, CL_OUT_OF_RESOURCES);  // enomem?
5599     }
5600     err = ECL_CALL(clEnqueueReadImage)(o_queue->queue, buffer,
5601 			     CL_FALSE,
5602 			     origin,
5603 			     region,
5604 			     row_pitch,
5605 			     slice_pitch,
5606 			     bin->data,
5607 			     num_events,
5608 			     num_events ? wait_list : 0,
5609 			     &event);
5610     if (!err) {
5611 	ERL_NIF_TERM t;
5612 	t = ecl_make_event(env, event, true, false, 0, bin, o_queue);
5613 	return enif_make_tuple2(env, ATOM(ok), t);
5614     }
5615     else {
5616 	enif_free(bin);
5617 	return ecl_make_error(env, err);
5618     }
5619 }
5620 
5621 //
5622 // enqueue_read_buffer_rect(_Queue, _Buffer,
5623 //    BufferOrigin, HostOrigin, Region,
5624 //    BufferRowPitch, BufferSlicePitch,
5625 //    HostRowPitch, HostSlicePitch,
5626 //    WaitList) -> {'ok',Event} | {error,Error}
5627 //
5628 #if CL_VERSION_1_1 == 1
ecl_enqueue_read_buffer_rect(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])5629 static ERL_NIF_TERM ecl_enqueue_read_buffer_rect(ErlNifEnv* env, int argc,
5630 						 const ERL_NIF_TERM argv[])
5631 {
5632     ecl_object_t*    o_queue;
5633     cl_mem           buffer;
5634     size_t           buffer_origin[3];
5635     size_t           host_origin[3];
5636     size_t           region[3];
5637     size_t           buffer_row_pitch;
5638     size_t           buffer_slice_pitch;
5639     size_t           host_row_pitch;
5640     size_t           host_slice_pitch;
5641     cl_event         wait_list[MAX_WAIT_LIST];
5642     cl_uint          num_events = MAX_WAIT_LIST;
5643     size_t           num_buffer_origin = 3;
5644     size_t           num_host_origin = 3;
5645     size_t           num_region = 3;
5646     size_t           size;
5647     cl_event         event;
5648     ErlNifBinary*    bin;
5649     cl_int           err;
5650     UNUSED(argc);
5651 
5652     if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
5653 	return enif_make_badarg(env);
5654     if (!get_object(env, argv[1], &mem_r, false, (void**)&buffer))
5655 	return enif_make_badarg(env);
5656     buffer_origin[0] = buffer_origin[1] = buffer_origin[2] = 0;
5657     if (!get_sizet_list(env, argv[2], buffer_origin, &num_buffer_origin))
5658 	return enif_make_badarg(env);
5659     host_origin[0] = host_origin[1] = host_origin[2] = 0;
5660     if (!get_sizet_list(env, argv[3], host_origin, &num_host_origin))
5661 	return enif_make_badarg(env);
5662     region[0] = region[1] = region[2] = 1;
5663     if (!get_sizet_list(env, argv[4], region, &num_region))
5664 	return enif_make_badarg(env);
5665     if (!ecl_get_sizet(env, argv[5], &buffer_row_pitch))
5666 	return enif_make_badarg(env);
5667     if (!ecl_get_sizet(env, argv[6], &buffer_slice_pitch))
5668 	return enif_make_badarg(env);
5669     if (!ecl_get_sizet(env, argv[7], &host_row_pitch))
5670 	return enif_make_badarg(env);
5671     if (!ecl_get_sizet(env, argv[8], &host_slice_pitch))
5672 	return enif_make_badarg(env);
5673     if (!get_object_list(env, argv[9], &event_r, false,
5674 			 (void**) wait_list, &num_events))
5675 	return enif_make_badarg(env);
5676     if (!(bin = enif_alloc(sizeof(ErlNifBinary))))
5677 	return ecl_make_error(env, CL_OUT_OF_RESOURCES);  // enomem?
5678 
5679     // calculate the read size of the image, FIXME: check error return
5680     size = (host_origin[0]+region[0])*(host_origin[1]+region[1])*
5681 	(host_origin[2]+region[2]);
5682     if (!enif_alloc_binary(size, bin)) {
5683 	enif_free(bin);
5684 	return ecl_make_error(env, CL_OUT_OF_RESOURCES);  // enomem?
5685     }
5686     err = ECL_CALL(clEnqueueReadBufferRect)(o_queue->queue, buffer,
5687 				  CL_FALSE,
5688 				  buffer_origin,
5689 				  host_origin,
5690 				  region,
5691 				  buffer_row_pitch,
5692 				  buffer_slice_pitch,
5693 				  host_row_pitch,
5694 				  host_slice_pitch,
5695 				  bin->data,
5696 				  num_events,
5697 				  num_events ? wait_list : 0,
5698 				  &event);
5699     if (!err) {
5700 	ERL_NIF_TERM t;
5701 	t = ecl_make_event(env, event, true, false, 0, bin, o_queue);
5702 	return enif_make_tuple2(env, ATOM(ok), t);
5703     }
5704     else {
5705 	enif_free(bin);
5706 	return ecl_make_error(env, err);
5707     }
5708 }
5709 #endif
5710 
5711 
5712 //
5713 // enqueue_write_buffer_rect(_Queue, _Buffer,
5714 //    BufferOrigin, HostOrigin, Region,
5715 //    BufferRowPitch, BufferSlicePitch,
5716 //    HostRowPitch, HostSlicePitch,
5717 //    Data::binary(),
5718 //    WaitList) -> {'ok',Event} | {error,Error}
5719 //
5720 #if CL_VERSION_1_1 == 1
ecl_enqueue_write_buffer_rect(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])5721 static ERL_NIF_TERM ecl_enqueue_write_buffer_rect(ErlNifEnv* env, int argc,
5722 						  const ERL_NIF_TERM argv[])
5723 {
5724     ecl_object_t*    o_queue;
5725     cl_mem           buffer;
5726     size_t           buffer_origin[3];
5727     size_t           host_origin[3];
5728     size_t           region[3];
5729     size_t           buffer_row_pitch;
5730     size_t           buffer_slice_pitch;
5731     size_t           host_row_pitch;
5732     size_t           host_slice_pitch;
5733     cl_event         wait_list[MAX_WAIT_LIST];
5734     cl_uint          num_events = MAX_WAIT_LIST;
5735     size_t           num_buffer_origin = 3;
5736     size_t           num_host_origin = 3;
5737     size_t           num_region = 3;
5738     size_t           size;
5739     cl_event         event;
5740     ErlNifBinary     bin;
5741     ErlNifEnv*       bin_env;
5742     cl_int           err;
5743     UNUSED(argc);
5744 
5745     if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
5746 	return enif_make_badarg(env);
5747     if (!get_object(env, argv[1], &mem_r, false, (void**)&buffer))
5748 	return enif_make_badarg(env);
5749     buffer_origin[0] = buffer_origin[1] = buffer_origin[2] = 0;
5750     if (!get_sizet_list(env, argv[2], buffer_origin, &num_buffer_origin))
5751 	return enif_make_badarg(env);
5752     host_origin[0] = host_origin[1] = host_origin[2] = 0;
5753     if (!get_sizet_list(env, argv[3], host_origin, &num_host_origin))
5754 	return enif_make_badarg(env);
5755     region[0] = region[1] = region[2] = 1;
5756     if (!get_sizet_list(env, argv[4], region, &num_region))
5757 	return enif_make_badarg(env);
5758 
5759     if (!ecl_get_sizet(env, argv[5], &buffer_row_pitch))
5760 	return enif_make_badarg(env);
5761     if (!ecl_get_sizet(env, argv[6], &buffer_slice_pitch))
5762 	return enif_make_badarg(env);
5763 
5764     if (!ecl_get_sizet(env, argv[7], &host_row_pitch))
5765 	return enif_make_badarg(env);
5766     if (!ecl_get_sizet(env, argv[8], &host_slice_pitch))
5767 	return enif_make_badarg(env);
5768     /*  Check argv[9] (bin) last */
5769     if (!get_object_list(env, argv[10], &event_r, false,
5770 			 (void**) wait_list, &num_events))
5771 	return enif_make_badarg(env);
5772 
5773     if (!(bin_env = enif_alloc_env())) {  // create binary environment
5774 	return ecl_make_error(env, CL_OUT_OF_RESOURCES);  // enomem?
5775     }
5776     if (!ecl_make_binary(env, argv[9], bin_env, &bin)) {
5777 	enif_free_env(bin_env);
5778 	return enif_make_badarg(env);
5779     }
5780     size = (host_origin[0]+region[0])*(host_origin[1]+region[1])*
5781 	(host_origin[2]+region[2]);
5782     // handle binary and iolist as binary
5783     if (bin.size < size) {   // FIXME: handle offset!
5784 	return enif_make_badarg(env);
5785     }
5786     err = ECL_CALL(clEnqueueWriteBufferRect)(o_queue->queue, buffer,
5787 				   CL_FALSE,
5788 				   buffer_origin,
5789 				   host_origin,
5790 				   region,
5791 				   buffer_row_pitch,
5792 				   buffer_slice_pitch,
5793 				   host_row_pitch,
5794 				   host_slice_pitch,
5795 				   bin.data,
5796 				   num_events,
5797 				   num_events ? wait_list : 0,
5798 				   &event);
5799     if (!err) {
5800 	ERL_NIF_TERM t;
5801 	t = ecl_make_event(env, event, false, true, bin_env, NULL, o_queue);
5802 	return enif_make_tuple2(env, ATOM(ok), t);
5803     }
5804     else {
5805 	enif_free_env(bin_env);
5806 	return ecl_make_error(env, err);
5807     }
5808 }
5809 #endif
5810 
5811 //
5812 // cl:enqueue_fill_buffer(Queue, Buffer, Pattern, Offset, Size, WaitList) ->
5813 //   {ok,Event} | {error,Reason}
5814 //
5815 #if CL_VERSION_1_2 == 1
ecl_enqueue_fill_buffer(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])5816 static ERL_NIF_TERM ecl_enqueue_fill_buffer(ErlNifEnv* env, int argc,
5817 					    const ERL_NIF_TERM argv[])
5818 {
5819     ecl_object_t*    o_queue;
5820     cl_mem           buffer;
5821     ErlNifBinary     pattern;
5822     size_t           offset;
5823     size_t           size;
5824     cl_event         wait_list[MAX_WAIT_LIST];
5825     cl_uint          num_events = MAX_WAIT_LIST;
5826     cl_event         event;
5827     cl_int           err;
5828     UNUSED(argc);
5829 
5830     if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
5831 	return enif_make_badarg(env);
5832     if (!get_object(env, argv[1], &mem_r, false, (void**)&buffer))
5833 	return enif_make_badarg(env);
5834     if (!enif_inspect_binary(env, argv[2], &pattern))
5835 	return enif_make_badarg(env);
5836     if (!ecl_get_sizet(env, argv[3], &offset))
5837 	return enif_make_badarg(env);
5838     if (!ecl_get_sizet(env, argv[4], &size))
5839 	return enif_make_badarg(env);
5840     if (!get_object_list(env, argv[5], &event_r, false,
5841 			 (void**) wait_list, &num_events))
5842 	return enif_make_badarg(env);
5843 
5844     // Note: pattern must not be retained, it can be freed after this call
5845     // according to spec.
5846     err = ECL_CALL(clEnqueueFillBuffer)(o_queue->queue, buffer,
5847 			      pattern.data,
5848 			      pattern.size,
5849 			      offset,
5850 			      size,
5851 			      num_events,
5852 			      num_events ? wait_list : 0,
5853 			      &event);
5854     if (!err) {
5855 	ERL_NIF_TERM t;
5856 	t = ecl_make_event(env, event, false, false, 0, 0, o_queue);
5857 	return enif_make_tuple2(env, ATOM(ok), t);
5858     }
5859     return ecl_make_error(env, err);
5860 }
5861 #endif
5862 
5863 
5864 //
5865 // enqueue_write_image(_Queue, _Image, _Origin, _Region, _RowPitch, _SlicePitch,
5866 //		    _Data, _WaitList, _WantEvent) ->
5867 //
ecl_enqueue_write_image(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])5868 static ERL_NIF_TERM ecl_enqueue_write_image(ErlNifEnv* env, int argc,
5869 					    const ERL_NIF_TERM argv[])
5870 {
5871     ecl_object_t*    o_queue;
5872     cl_mem           buffer;
5873     size_t           origin[3];
5874     size_t           region[3];
5875     size_t           row_pitch;
5876     size_t           slice_pitch;
5877     cl_event         wait_list[MAX_WAIT_LIST];
5878     cl_uint          num_events = MAX_WAIT_LIST;
5879     size_t           num_origin = 3;
5880     size_t           num_region = 3;
5881     size_t           psize;
5882     size_t           size;
5883     cl_event         event;
5884     ErlNifBinary     bin;
5885     ErlNifEnv*       bin_env;
5886     cl_int           err;
5887     cl_bool          want_event;
5888     UNUSED(argc);
5889 
5890     if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
5891 	return enif_make_badarg(env);
5892     if (!get_object(env, argv[1], &mem_r, false, (void**)&buffer))
5893 	return enif_make_badarg(env);
5894     origin[0] = origin[1] = origin[2] = 0;
5895     if (!get_sizet_list(env, argv[2], origin, &num_origin))
5896 	return enif_make_badarg(env);
5897     region[0] = region[1] = region[2] = 1;
5898     if (!get_sizet_list(env, argv[3], region, &num_region))
5899 	return enif_make_badarg(env);
5900     if (!ecl_get_sizet(env, argv[4], &row_pitch))
5901 	return enif_make_badarg(env);
5902     if (!ecl_get_sizet(env, argv[5], &slice_pitch))
5903 	return enif_make_badarg(env);
5904     /*  Check argv[6] (bin) last */
5905     if (!get_object_list(env, argv[7], &event_r, false,
5906 			 (void**) wait_list, &num_events))
5907 	return enif_make_badarg(env);
5908     if (!get_bool(env, argv[8], &want_event))
5909         return enif_make_badarg(env);
5910     if (!(bin_env = enif_alloc_env())) {  // create binary environment
5911         return ecl_make_error(env, CL_OUT_OF_RESOURCES);  // enomem?
5912     }
5913     if (!ecl_make_binary(env, argv[6], bin_env, &bin)) {
5914        enif_free_env(bin_env);
5915        return enif_make_badarg(env);
5916     }
5917 
5918     // calculate the read size of the image FIXME: check error return
5919     clGetImageInfo(buffer, CL_IMAGE_ELEMENT_SIZE, sizeof(psize), &psize, 0);
5920     size = region[0]*region[1]*region[2]*psize;
5921     if (bin.size < size) {
5922 	return enif_make_badarg(env);
5923     }
5924 
5925     err = ECL_CALL(clEnqueueWriteImage)(o_queue->queue, buffer,
5926 			      !want_event, // FALSE for ASYNC
5927 			      origin,
5928 			      region,
5929 			      row_pitch,
5930 			      slice_pitch,
5931 			      bin.data,
5932 			      num_events,
5933 			      num_events ? wait_list : NULL,
5934 			      want_event ? &event : NULL );
5935     if (!err) {
5936 	if (want_event) {
5937 	    ERL_NIF_TERM t;
5938 	    t = ecl_make_event(env, event, false, true, bin_env, NULL, o_queue);
5939 	    return enif_make_tuple2(env, ATOM(ok), t);
5940 	} else {
5941 	    enif_free_env(bin_env);
5942 	}
5943 	return ATOM(ok);
5944     }
5945     else {
5946 	enif_free_env(bin_env);
5947 	return ecl_make_error(env, err);
5948     }
5949 }
5950 
5951 //
5952 // cl:enqueue_copy_buffer(Queue, SrcBuffer, DstBuffer,
5953 //                        SrcOffset, DstOffset, Cb,
5954 //                        WaitList) ->
5955 //
ecl_enqueue_copy_buffer(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])5956 static ERL_NIF_TERM ecl_enqueue_copy_buffer(ErlNifEnv* env, int argc,
5957 					    const ERL_NIF_TERM argv[])
5958 {
5959     ecl_object_t*    o_queue;
5960     cl_mem           src_buffer;
5961     cl_mem           dst_buffer;
5962     size_t           src_offset;
5963     size_t           dst_offset;
5964     size_t           cb;
5965     cl_event         wait_list[MAX_WAIT_LIST];
5966     cl_uint          num_events = MAX_WAIT_LIST;
5967     cl_event         event;
5968     cl_int           err;
5969     UNUSED(argc);
5970 
5971     if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
5972 	return enif_make_badarg(env);
5973     if (!get_object(env, argv[1], &mem_r, false, (void**)&src_buffer))
5974 	return enif_make_badarg(env);
5975     if (!get_object(env, argv[2], &mem_r, false, (void**)&dst_buffer))
5976 	return enif_make_badarg(env);
5977     if (!ecl_get_sizet(env, argv[3], &src_offset))
5978 	return enif_make_badarg(env);
5979     if (!ecl_get_sizet(env, argv[4], &dst_offset))
5980 	return enif_make_badarg(env);
5981     if (!ecl_get_sizet(env, argv[5], &cb))
5982 	return enif_make_badarg(env);
5983     if (!get_object_list(env, argv[6], &event_r, false,
5984 			 (void**) wait_list, &num_events))
5985 	return enif_make_badarg(env);
5986     err = ECL_CALL(clEnqueueCopyBuffer)(o_queue->queue,
5987 			      src_buffer,
5988 			      dst_buffer,
5989 			      src_offset,
5990 			      dst_offset,
5991 			      cb,
5992 			      num_events,
5993 			      num_events ? wait_list : 0,
5994 			      &event);
5995     if (!err) {
5996 	ERL_NIF_TERM t;
5997 	t = ecl_make_event(env, event, false, false, 0, 0, o_queue);
5998 	return enif_make_tuple2(env, ATOM(ok), t);
5999     }
6000     return ecl_make_error(env, err);
6001 }
6002 
6003 //
6004 // enqueue_copy_buffer_rect(_Queue, _SrcBuffer, _DstBuffer,
6005 //    SrcOrigin, DstOrigin, Region,
6006 //    SrcRowPitch, SrcSlicePitch,
6007 //    DstRowPitch, DstSlicePitch,
6008 //    WaitList) -> {'ok',Event} | {error,Error}
6009 //
6010 #if CL_VERSION_1_1 == 1
ecl_enqueue_copy_buffer_rect(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])6011 static ERL_NIF_TERM ecl_enqueue_copy_buffer_rect(ErlNifEnv* env, int argc,
6012 						 const ERL_NIF_TERM argv[])
6013 {
6014     ecl_object_t*    o_queue;
6015     cl_mem           src_buffer;
6016     cl_mem           dst_buffer;
6017     size_t           src_origin[3];
6018     size_t           dst_origin[3];
6019     size_t           region[3];
6020     size_t           src_row_pitch;
6021     size_t           src_slice_pitch;
6022     size_t           dst_row_pitch;
6023     size_t           dst_slice_pitch;
6024     cl_event         wait_list[MAX_WAIT_LIST];
6025     cl_uint          num_events = MAX_WAIT_LIST;
6026     size_t           num_src_origin = 3;
6027     size_t           num_dst_origin = 3;
6028     size_t           num_region = 3;
6029     cl_event         event;
6030     ErlNifBinary*    bin;
6031     cl_int           err;
6032     UNUSED(argc);
6033 
6034     if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
6035 	return enif_make_badarg(env);
6036     if (!get_object(env, argv[1], &mem_r, false, (void**)&src_buffer))
6037 	return enif_make_badarg(env);
6038     if (!get_object(env, argv[2], &mem_r, false, (void**)&dst_buffer))
6039 	return enif_make_badarg(env);
6040     src_origin[0] = src_origin[1] = src_origin[2] = 0;
6041     if (!get_sizet_list(env, argv[3], src_origin, &num_src_origin))
6042 	return enif_make_badarg(env);
6043     dst_origin[0] = dst_origin[1] = dst_origin[2] = 0;
6044     if (!get_sizet_list(env, argv[4], dst_origin, &num_dst_origin))
6045 	return enif_make_badarg(env);
6046     region[0] = region[1] = region[2] = 1;
6047     if (!get_sizet_list(env, argv[5], region, &num_region))
6048 	return enif_make_badarg(env);
6049 
6050     if (!ecl_get_sizet(env, argv[6], &src_row_pitch))
6051 	return enif_make_badarg(env);
6052     if (!ecl_get_sizet(env, argv[7], &src_slice_pitch))
6053 	return enif_make_badarg(env);
6054 
6055     if (!ecl_get_sizet(env, argv[8], &dst_row_pitch))
6056 	return enif_make_badarg(env);
6057     if (!ecl_get_sizet(env, argv[9], &dst_slice_pitch))
6058 	return enif_make_badarg(env);
6059 
6060     if (!get_object_list(env, argv[10], &event_r, false,
6061 			 (void**) wait_list, &num_events))
6062 	return enif_make_badarg(env);
6063     if (!(bin = enif_alloc(sizeof(ErlNifBinary))))
6064 	return ecl_make_error(env, CL_OUT_OF_RESOURCES);  // enomem?
6065 
6066     err = ECL_CALL(clEnqueueCopyBufferRect)(o_queue->queue,
6067 				  src_buffer, dst_buffer,
6068 				  src_origin, dst_origin,
6069 				  region,
6070 				  src_row_pitch, src_slice_pitch,
6071 				  dst_row_pitch, dst_slice_pitch,
6072 				  num_events,
6073 				  num_events ? wait_list : 0,
6074 				  &event);
6075     if (!err) {
6076 	ERL_NIF_TERM t;
6077 	t = ecl_make_event(env, event, true, false, 0, bin, o_queue);
6078 	return enif_make_tuple2(env, ATOM(ok), t);
6079     }
6080     else {
6081 	enif_free(bin);
6082 	return ecl_make_error(env, err);
6083     }
6084 }
6085 #endif
6086 
ecl_enqueue_copy_image(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])6087 static ERL_NIF_TERM ecl_enqueue_copy_image(ErlNifEnv* env, int argc,
6088 					   const ERL_NIF_TERM argv[])
6089 {
6090     ecl_object_t*    o_queue;
6091     cl_mem           src_image;
6092     cl_mem           dst_image;
6093     size_t           src_origin[3];
6094     size_t           dst_origin[3];
6095     size_t           region[3];
6096     cl_event         wait_list[MAX_WAIT_LIST];
6097     cl_uint          num_events = MAX_WAIT_LIST;
6098     size_t           num_src_origin = 3;
6099     size_t           num_dst_origin = 3;
6100     size_t           num_region = 3;
6101     cl_event         event;
6102     cl_int           err;
6103     UNUSED(argc);
6104 
6105     if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
6106 	return enif_make_badarg(env);
6107     if (!get_object(env, argv[1], &mem_r, false, (void**)&src_image))
6108 	return enif_make_badarg(env);
6109     if (!get_object(env, argv[2], &mem_r, false, (void**)&dst_image))
6110 	return enif_make_badarg(env);
6111     src_origin[0] = src_origin[1] = src_origin[2] = 0;
6112     if (!get_sizet_list(env, argv[3], src_origin, &num_src_origin))
6113 	return enif_make_badarg(env);
6114     dst_origin[0] = dst_origin[1] = dst_origin[2] = 0;
6115     if (!get_sizet_list(env, argv[4], dst_origin, &num_dst_origin))
6116 	return enif_make_badarg(env);
6117     region[0] = region[1] = region[2] = 1;
6118     if (!get_sizet_list(env, argv[5], region, &num_region))
6119 	return enif_make_badarg(env);
6120     if (!get_object_list(env, argv[6], &event_r, false,
6121 			 (void**) wait_list, &num_events))
6122 	return enif_make_badarg(env);
6123     err = ECL_CALL(clEnqueueCopyImage)(o_queue->queue, src_image, dst_image,
6124 			     src_origin,
6125 			     dst_origin,
6126 			     region,
6127 			     num_events,
6128 			     num_events ? wait_list : 0,
6129 			     &event);
6130     if (!err) {
6131 	ERL_NIF_TERM t;
6132 	t = ecl_make_event(env, event, false, false, 0, 0, o_queue);
6133 	return enif_make_tuple2(env, ATOM(ok), t);
6134     }
6135     return ecl_make_error(env, err);
6136 }
6137 
6138 //
6139 //  cl:enqueue_fill_image(Queue,Image,FillColor,Origin,Region,WaitList) ->
6140 //  FillColor = <<R:32/unsigned,G:32/unsigned,B:32/unsigned,A:32/unsigned>>
6141 //            | <<R:32/signed,G:32/signed,B:32/signed,A:32/signed>>
6142 //            | <<R:32/float,G:32/float,B:32/float,A:32/float>>
6143 //            Use device endian! check device_info(D, endian_little)
6144 //
6145 //
6146 #if CL_VERSION_1_2 == 1
6147 
ecl_enqueue_fill_image(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])6148 static ERL_NIF_TERM ecl_enqueue_fill_image(ErlNifEnv* env, int argc,
6149 					   const ERL_NIF_TERM argv[])
6150 {
6151     ecl_object_t*    o_queue;
6152     cl_mem           image;
6153     ErlNifBinary     fill_color;
6154     size_t           origin[3];
6155     size_t           region[3];
6156     cl_event         wait_list[MAX_WAIT_LIST];
6157     cl_uint          num_events = MAX_WAIT_LIST;
6158     size_t           num_origin = 3;
6159     size_t           num_region = 3;
6160     cl_event         event;
6161     cl_int           err;
6162     UNUSED(argc);
6163 
6164     if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
6165 	return enif_make_badarg(env);
6166     if (!get_object(env, argv[1], &mem_r, false, (void**)&image))
6167 	return enif_make_badarg(env);
6168     if (!enif_inspect_binary(env, argv[2], &fill_color))
6169 	return enif_make_badarg(env);
6170     if (fill_color.size != 4*4)
6171 	return enif_make_badarg(env);
6172     origin[0] = origin[1] = origin[2] = 0;
6173     if (!get_sizet_list(env, argv[3], origin, &num_origin))
6174 	return enif_make_badarg(env);
6175     region[0] = region[1] = region[2] = 1;
6176     if (!get_sizet_list(env, argv[4], region, &num_region))
6177 	return enif_make_badarg(env);
6178     if (!get_object_list(env, argv[5], &event_r, false,
6179 			 (void**) wait_list, &num_events))
6180 	return enif_make_badarg(env);
6181 
6182     err = ECL_CALL(clEnqueueFillImage)(o_queue->queue, image,
6183 			     fill_color.data, // validate size etc!
6184 			     origin,
6185 			     region,
6186 			     num_events,
6187 			     num_events ? wait_list : 0,
6188 			     &event);
6189     if (!err) {
6190 	ERL_NIF_TERM t;
6191 	t = ecl_make_event(env, event, false, false, 0, 0, o_queue);
6192 	return enif_make_tuple2(env, ATOM(ok), t);
6193     }
6194     return ecl_make_error(env, err);
6195 }
6196 #endif
6197 
6198 // cl:enqueue_copy_image_to_buffer(_Queue, _SrcImage, _DstBuffer,
6199 //                                 _Origin, _Region,
6200 //			           _DstOffset, _WaitList) ->
ecl_enqueue_copy_image_to_buffer(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])6201 static ERL_NIF_TERM ecl_enqueue_copy_image_to_buffer(ErlNifEnv* env, int argc,
6202 						     const ERL_NIF_TERM argv[])
6203 {
6204     ecl_object_t*    o_queue;
6205     cl_mem           src_image;
6206     cl_mem           dst_buffer;
6207     size_t           origin[3];
6208     size_t           region[3];
6209     size_t           dst_offset;
6210     cl_event         wait_list[MAX_WAIT_LIST];
6211     cl_uint          num_events = MAX_WAIT_LIST;
6212     size_t           num_src_origin = 3;
6213     size_t           num_region = 3;
6214     cl_event         event;
6215     cl_int           err;
6216     UNUSED(argc);
6217 
6218     if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
6219 	return enif_make_badarg(env);
6220     if (!get_object(env, argv[1], &mem_r, false, (void**)&src_image))
6221 	return enif_make_badarg(env);
6222     if (!get_object(env, argv[2], &mem_r, false, (void**)&dst_buffer))
6223 	return enif_make_badarg(env);
6224     origin[0] =  origin[1] = origin[2] = 0;
6225     if (!get_sizet_list(env, argv[3], origin, &num_src_origin))
6226 	return enif_make_badarg(env);
6227     region[0] = region[1] = region[2] = 1;
6228     if (!get_sizet_list(env, argv[4], region, &num_region))
6229 	return enif_make_badarg(env);
6230     if (!ecl_get_sizet(env, argv[5], &dst_offset))
6231 	return enif_make_badarg(env);
6232     if (!get_object_list(env, argv[6], &event_r, false,
6233 			 (void**) wait_list, &num_events))
6234 	return enif_make_badarg(env);
6235     err = ECL_CALL(clEnqueueCopyImageToBuffer)(o_queue->queue,
6236 				     src_image,
6237 				     dst_buffer,
6238 				     origin,
6239 				     region,
6240 				     dst_offset,
6241 				     num_events,
6242 				     num_events ? wait_list : 0,
6243 				     &event);
6244     if (!err) {
6245 	ERL_NIF_TERM t;
6246 	t = ecl_make_event(env, event, false, false, 0, 0, o_queue);
6247 	return enif_make_tuple2(env, ATOM(ok), t);
6248     }
6249     return ecl_make_error(env, err);
6250 }
6251 //
6252 // cl:enqueue_copy_buffer_to_image(_Queue, _SrcBuffer, _DstImage,
6253 //                                  _SrcOffset, _DstOrigin,
6254 //                                _Region, _WaitList) ->
6255 //
ecl_enqueue_copy_buffer_to_image(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])6256 static ERL_NIF_TERM ecl_enqueue_copy_buffer_to_image(ErlNifEnv* env, int argc,
6257 						     const ERL_NIF_TERM argv[])
6258 {
6259     ecl_object_t*    o_queue;
6260     cl_mem           src_buffer;
6261     cl_mem           dst_image;
6262     size_t           src_offset;
6263     size_t           origin[3];
6264     size_t           region[3];
6265     cl_event         wait_list[MAX_WAIT_LIST];
6266     cl_uint          num_events = MAX_WAIT_LIST;
6267     size_t           num_src_origin = 3;
6268     size_t           num_region = 3;
6269     cl_event         event;
6270     cl_int           err;
6271     UNUSED(argc);
6272 
6273     if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
6274 	return enif_make_badarg(env);
6275     if (!get_object(env, argv[1], &mem_r, false, (void**)&src_buffer))
6276 	return enif_make_badarg(env);
6277     if (!get_object(env, argv[2], &mem_r, false, (void**)&dst_image))
6278 	return enif_make_badarg(env);
6279     if (!ecl_get_sizet(env, argv[3], &src_offset))
6280 	return enif_make_badarg(env);
6281     origin[0] =  origin[1] = origin[2] = 0;
6282     if (!get_sizet_list(env, argv[4], origin, &num_src_origin))
6283 	return enif_make_badarg(env);
6284     region[0] = region[1] = region[2] = 1;
6285     if (!get_sizet_list(env, argv[5], region, &num_region))
6286 	return enif_make_badarg(env);
6287     if (!get_object_list(env, argv[6], &event_r, false,
6288 			 (void**) wait_list, &num_events))
6289 	return enif_make_badarg(env);
6290     err = ECL_CALL(clEnqueueCopyBufferToImage)(o_queue->queue,
6291 				     src_buffer,
6292 				     dst_image,
6293 				     src_offset,
6294 				     origin,
6295 				     region,
6296 				     num_events,
6297 				     num_events ? wait_list : 0,
6298 				     &event);
6299     if (!err) {
6300 	ERL_NIF_TERM t;
6301 	t = ecl_make_event(env, event, false, false, 0, 0, o_queue);
6302 	return enif_make_tuple2(env, ATOM(ok), t);
6303     }
6304     return ecl_make_error(env, err);
6305 }
6306 
ecl_enqueue_map_buffer(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])6307 static ERL_NIF_TERM ecl_enqueue_map_buffer(ErlNifEnv* env, int argc,
6308 					   const ERL_NIF_TERM argv[])
6309 {
6310     ecl_object_t*    o_queue;
6311     cl_mem           buffer;
6312     cl_map_flags     map_flags;
6313     size_t           offset;
6314     size_t           size;
6315     cl_event         wait_list[MAX_WAIT_LIST];
6316     cl_uint          num_events = MAX_WAIT_LIST;
6317     cl_event         event;
6318     cl_int           err;
6319     void*            ptr;
6320     UNUSED(argc);
6321     UNUSED(ptr);
6322 
6323     if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
6324 	return enif_make_badarg(env);
6325     if (!get_object(env, argv[1], &mem_r, false, (void**)&buffer))
6326 	return enif_make_badarg(env);
6327     if (!get_bitfields(env, argv[2], &map_flags, kv_map_flags))
6328 	return enif_make_badarg(env);
6329     if (!ecl_get_sizet(env, argv[3], &offset))
6330 	return enif_make_badarg(env);
6331     if (!ecl_get_sizet(env, argv[4], &size))
6332 	return enif_make_badarg(env);
6333     if (!get_object_list(env, argv[5], &event_r, false,
6334 			 (void**) wait_list, &num_events))
6335 	return enif_make_badarg(env);
6336 
6337     ptr = clEnqueueMapBuffer(o_queue->queue,
6338 			     buffer,
6339 			     CL_FALSE,
6340 			     map_flags,
6341 			     offset,
6342 			     size,
6343 			     num_events,
6344 			     num_events ? wait_list : 0,
6345 			     &event,
6346 			     &err);
6347     if (!err) {
6348 	ERL_NIF_TERM t;
6349 	// FIXME: how should we handle ptr????
6350 	t = ecl_make_event(env, event, false, false, 0, 0, o_queue);
6351 	return enif_make_tuple2(env, ATOM(ok), t);
6352     }
6353     return ecl_make_error(env, err);
6354 }
6355 
6356 //
6357 // enqueue_map_image(_Queue, _Image, _MapFlags, _Origin, _Region, _WaitList) ->
6358 //
ecl_enqueue_map_image(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])6359 static ERL_NIF_TERM ecl_enqueue_map_image(ErlNifEnv* env, int argc,
6360 					  const ERL_NIF_TERM argv[])
6361 {
6362     ecl_object_t*    o_queue;
6363     cl_mem           image;
6364     cl_map_flags     map_flags;
6365     size_t           origin[3];
6366     size_t           region[3];
6367     size_t           row_pitch;
6368     size_t           slice_pitch;
6369     cl_event         wait_list[MAX_WAIT_LIST];
6370     cl_uint          num_events = MAX_WAIT_LIST;
6371     size_t           num_origin = 3;
6372     size_t           num_region = 3;
6373     cl_event         event;
6374     cl_int           err;
6375     void*            ptr;
6376     UNUSED(argc);
6377     UNUSED(ptr);
6378 
6379     if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
6380 	return enif_make_badarg(env);
6381     if (!get_object(env, argv[1], &mem_r, false, (void**)&image))
6382 	return enif_make_badarg(env);
6383     if (!get_bitfields(env, argv[2], &map_flags, kv_map_flags))
6384 	return enif_make_badarg(env);
6385     origin[0] = origin[1] = origin[2] = 0;
6386     if (!get_sizet_list(env, argv[3], origin, &num_origin))
6387 	return enif_make_badarg(env);
6388     region[0] = region[1] = region[2] = 1;
6389     if (!get_sizet_list(env, argv[4], region, &num_region))
6390 	return enif_make_badarg(env);
6391     if (!get_object_list(env, argv[5], &event_r, false,
6392 			 (void**) wait_list, &num_events))
6393 	return enif_make_badarg(env);
6394 
6395     ptr = clEnqueueMapImage(o_queue->queue,
6396 			    image,
6397 			    CL_FALSE,
6398 			    map_flags,
6399 			    origin,
6400 			    region,
6401 			    &row_pitch,
6402 			    &slice_pitch,
6403 			    num_events,
6404 			    num_events ? wait_list : 0,
6405 			    &event,
6406 			    &err);
6407     if (!err) {
6408 	ERL_NIF_TERM t;
6409 	// FIXME: send binary+event to event thread
6410 	t = ecl_make_event(env, event, false, false, 0, 0, o_queue);
6411 	return enif_make_tuple2(env, ATOM(ok), t);
6412     }
6413     return ecl_make_error(env, err);
6414 }
6415 
6416 //
6417 //  enqueue_unmap_mem_object(_Queue, _Mem, _WaitList) ->
6418 //
6419 //
ecl_enqueue_unmap_mem_object(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])6420 static ERL_NIF_TERM ecl_enqueue_unmap_mem_object(ErlNifEnv* env, int argc,
6421 						 const ERL_NIF_TERM argv[])
6422 {
6423     ecl_object_t*    o_queue;
6424     cl_mem           memobj;
6425     cl_event         wait_list[MAX_WAIT_LIST];
6426     cl_uint          num_events = MAX_WAIT_LIST;
6427     cl_event         event;
6428     void* mapped_ptr;
6429     cl_int err;
6430     UNUSED(argc);
6431 
6432     if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
6433 	return enif_make_badarg(env);
6434     if (!get_object(env, argv[1], &mem_r, false, (void**)&memobj))
6435 	return enif_make_badarg(env);
6436     if (!get_object_list(env, argv[3], &event_r, false,
6437 			 (void**) wait_list, &num_events))
6438 	return enif_make_badarg(env);
6439     mapped_ptr = 0;  // FIXME!!!!
6440 
6441     err = ECL_CALL(clEnqueueUnmapMemObject)(o_queue->queue, memobj,
6442 				  mapped_ptr,
6443 				  num_events,
6444 				  num_events ? wait_list : 0,
6445 				  &event);
6446     if (!err) {
6447 	ERL_NIF_TERM t;
6448 	t = ecl_make_event(env, event, false, false, 0, 0, o_queue);
6449 	return enif_make_tuple2(env, ATOM(ok), t);
6450     }
6451     return ecl_make_error(env, err);
6452 }
6453 
6454 #if CL_VERSION_1_2 == 1
6455 //
ecl_enqueue_migrate_mem_objects(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])6456 static ERL_NIF_TERM ecl_enqueue_migrate_mem_objects(ErlNifEnv* env, int argc,
6457 						    const ERL_NIF_TERM argv[])
6458 {
6459     ecl_object_t*    o_queue;
6460     cl_uint          num_mem_objects = MAX_MEM_OBJECTS;
6461     cl_mem           mem_objects[MAX_MEM_OBJECTS];
6462     cl_mem_migration_flags flags = 0;
6463     cl_event         wait_list[MAX_WAIT_LIST];
6464     cl_uint          num_events = MAX_WAIT_LIST;
6465     cl_event         event;
6466     cl_int err;
6467     UNUSED(argc);
6468 
6469     if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
6470 	return enif_make_badarg(env);
6471     if (!get_object_list(env, argv[1], &mem_r, false,
6472 			 (void**) mem_objects, &num_mem_objects))
6473 	return enif_make_badarg(env);
6474     if (!get_bitfields(env, argv[2], &flags, kv_migration_flags))
6475 	return enif_make_badarg(env);
6476 
6477     if (!get_object_list(env, argv[3], &event_r, false,
6478 			 (void**) wait_list, &num_events))
6479 	return enif_make_badarg(env);
6480 
6481     err = ECL_CALL(clEnqueueMigrateMemObjects)(o_queue->queue,
6482 				     num_mem_objects,
6483 				     num_mem_objects ? mem_objects : NULL,
6484 				     flags,
6485 				     num_events,
6486 				     num_events ? wait_list : 0,
6487 				     &event);
6488     if (!err) {
6489 	ERL_NIF_TERM t;
6490 	t = ecl_make_event(env, event, false, false, 0, 0, o_queue);
6491 	return enif_make_tuple2(env, ATOM(ok), t);
6492     }
6493     return ecl_make_error(env, err);
6494 }
6495 #endif
6496 
eclEnqueueBarrier(cl_command_queue queue)6497 cl_int eclEnqueueBarrier(cl_command_queue queue)
6498 {
6499     return ECL_CALL(clEnqueueBarrierWithWaitList)(queue,0,NULL,NULL);
6500 }
6501 
ecl_enqueue_barrier(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])6502 static ERL_NIF_TERM ecl_enqueue_barrier(ErlNifEnv* env, int argc,
6503 					const ERL_NIF_TERM argv[])
6504 {
6505     ecl_object_t* o_queue;
6506     cl_int           err;
6507     UNUSED(argc);
6508 
6509     if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
6510 	return enif_make_badarg(env);
6511     if(o_queue->version >= 12) {
6512 	if (!(err = eclEnqueueBarrier(o_queue->queue))) {
6513 	    return ATOM(ok);
6514 	}
6515     } else {  // deprecated in 1.2, available in 1.1
6516 	if (!(err = ECL_CALL(clEnqueueBarrier)(o_queue->queue))) {
6517 	    return ATOM(ok);
6518 	}
6519     }
6520     return ecl_make_error(env, err);
6521 }
6522 
6523 #if CL_VERSION_1_2 == 1
6524 //
6525 // cl:enqueue_barrier_with_wait_list(Queue::cl_queue(),
6526 //                                   WaitList::[cl_event()]) ->
6527 //    {'ok',cl_event()} | {'error', cl_error()}
6528 //
ecl_enqueue_barrier_with_wait_list(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])6529 static ERL_NIF_TERM ecl_enqueue_barrier_with_wait_list(ErlNifEnv* env,
6530 						       int argc,
6531 						       const ERL_NIF_TERM argv[])
6532 {
6533     ecl_object_t*    o_queue;
6534     cl_event         wait_list[MAX_WAIT_LIST];
6535     cl_uint          num_events = MAX_WAIT_LIST;
6536     cl_event         event;
6537     cl_int           err;
6538     cl_bool          want_event = true;  // make this an arg?
6539     UNUSED(argc);
6540 
6541     if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
6542 	return enif_make_badarg(env);
6543     if (!get_object_list(env, argv[1], &event_r, false,
6544 			 (void**) wait_list, &num_events))
6545 	return enif_make_badarg(env);
6546     err = ECL_CALL(clEnqueueBarrierWithWaitList)(o_queue->queue,num_events,
6547 						 num_events ? wait_list : NULL,
6548 						 want_event ? &event : NULL );
6549     if (!err) {
6550 	if (want_event) {
6551 	    ERL_NIF_TERM t;
6552 	    t = ecl_make_event(env, event, false, false, 0, 0, o_queue);
6553 	    return enif_make_tuple2(env, ATOM(ok), t);
6554 	}
6555 	return ATOM(ok);
6556     }
6557     return ecl_make_error(env, err);
6558 }
6559 
6560 //
6561 // cl:enqueue_marker_with_wait_list(Queue::cl_queue(),
6562 //                                   WaitList::[cl_event()]) ->
6563 //    {'ok',cl_event()} | {'error', cl_error()}
6564 //
ecl_enqueue_marker_with_wait_list(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])6565 static ERL_NIF_TERM ecl_enqueue_marker_with_wait_list(ErlNifEnv* env,
6566 						      int argc,
6567 						      const ERL_NIF_TERM argv[])
6568 {
6569     ecl_object_t*    o_queue;
6570     cl_event      wait_list[MAX_WAIT_LIST];
6571     cl_uint        num_events = MAX_WAIT_LIST;
6572     cl_int           err;
6573     cl_event         event;
6574     cl_bool          want_event = true;  // make this an arg?
6575     UNUSED(argc);
6576 
6577     if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
6578 	return enif_make_badarg(env);
6579     if (!get_object_list(env, argv[1], &event_r, false,
6580 			 (void**) wait_list, &num_events))
6581 	return enif_make_badarg(env);
6582     err = ECL_CALL(clEnqueueMarkerWithWaitList)(o_queue->queue,num_events,
6583 						num_events ? wait_list : NULL,
6584 						want_event ? &event : NULL );
6585     if (!err) {
6586 	if (want_event) {
6587 	    ERL_NIF_TERM t;
6588 	    t = ecl_make_event(env, event, false, false, 0, 0, o_queue);
6589 	    return enif_make_tuple2(env, ATOM(ok), t);
6590 	}
6591 	return ATOM(ok);
6592     }
6593     return ecl_make_error(env, err);
6594 }
6595 #endif
6596 
6597 
6598 //
6599 // cl:async_flush(Queue::cl_queue()) -> reference()
6600 //
ecl_async_flush(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])6601 static ERL_NIF_TERM ecl_async_flush(ErlNifEnv* env, int argc,
6602 				    const ERL_NIF_TERM argv[])
6603 {
6604     ecl_object_t* o_queue;
6605     ecl_context_t* o_context;
6606     ecl_message_t m;
6607     ERL_NIF_TERM ref;
6608     UNUSED(argc);
6609 
6610     if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
6611 	return enif_make_badarg(env);
6612     if (!(o_context = (ecl_context_t*) o_queue->parent)) // must have context
6613 	return enif_make_badarg(env);
6614     if (!(m.env = enif_alloc_env()))
6615 	return ecl_make_error(env, CL_OUT_OF_RESOURCES);  // enomem?
6616     ref = enif_make_ref(env);
6617 
6618     m.type   = ECL_MESSAGE_FLUSH;
6619     (void) enif_self(env, &m.sender);
6620     m.ref    = enif_make_copy(m.env, ref);
6621     m.queue  = o_queue;
6622     // keep while operation is running, release after operation in the thread
6623     enif_keep_resource(o_queue);
6624     ecl_message_send(o_context->thr, &m);
6625     return enif_make_tuple2(env, ATOM(ok), ref);
6626 }
6627 
6628 //
6629 // cl:async_finish(Queue::cl_queue()) -> reference()
6630 //
ecl_async_finish(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])6631 static ERL_NIF_TERM ecl_async_finish(ErlNifEnv* env, int argc,
6632 				     const ERL_NIF_TERM argv[])
6633 {
6634     ecl_object_t* o_queue;
6635     ecl_context_t* o_context;
6636     ecl_message_t m;
6637     ERL_NIF_TERM ref;
6638     UNUSED(argc);
6639 
6640     if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
6641 	return enif_make_badarg(env);
6642     if (!(o_context = (ecl_context_t*) o_queue->parent)) // must have context
6643 	return enif_make_badarg(env);
6644     if (!(m.env = enif_alloc_env()))
6645 	return ecl_make_error(env, CL_OUT_OF_RESOURCES);  // enomem?
6646     ref = enif_make_ref(env);
6647 
6648     m.type   =  ECL_MESSAGE_FINISH;
6649     (void) enif_self(env, &m.sender);
6650     m.ref    = enif_make_copy(m.env, ref);
6651     m.queue  = o_queue;
6652     // keep while operation is running, release after operation in the thread
6653     enif_keep_resource(o_queue);
6654     ecl_message_send(o_context->thr, &m);
6655     return enif_make_tuple2(env, ATOM(ok), ref);
6656 }
6657 //
6658 // cl:async_wait_for_event(Event) -> {ok,Ref} | {error,Reason}
6659 // async reply {cl_event, Ref, Result}
6660 //
ecl_async_wait_for_event(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])6661 static ERL_NIF_TERM ecl_async_wait_for_event(ErlNifEnv* env, int argc,
6662 					     const ERL_NIF_TERM argv[])
6663 {
6664     ecl_event_t* o_event;
6665     ecl_object_t* o_queue;
6666     ecl_context_t* o_context;
6667     ecl_message_t m;
6668     ERL_NIF_TERM ref;
6669     UNUSED(argc);
6670 
6671     if (!get_ecl_object(env, argv[0],&event_r,false,(ecl_object_t**)&o_event))
6672 	return enif_make_badarg(env);
6673     if (!(o_queue = o_event->obj.parent))  // queue not found !
6674 	return enif_make_badarg(env);
6675     if (!(o_context = (ecl_context_t*) o_queue->parent)) // must have context
6676 	return enif_make_badarg(env);
6677     if (!(m.env = enif_alloc_env()))
6678 	return ecl_make_error(env, CL_OUT_OF_RESOURCES);  // enomem?
6679     ref = enif_make_ref(env);
6680 
6681     m.type   = ECL_MESSAGE_WAIT_FOR_EVENT;
6682     (void) enif_self(env, &m.sender);
6683     m.ref    = enif_make_copy(m.env, ref);
6684     m.event  = o_event;
6685     // keep while operation is running, release after operation in the thread
6686     enif_keep_resource(o_event);
6687     ecl_message_send(o_context->thr, &m);
6688     return enif_make_tuple2(env, ATOM(ok), ref);
6689 }
6690 
6691 // return event info
ecl_get_event_info(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])6692 static ERL_NIF_TERM ecl_get_event_info(ErlNifEnv* env, int argc,
6693 				       const ERL_NIF_TERM argv[])
6694 {
6695     ecl_object_t* o_event;
6696     UNUSED(argc);
6697 
6698     if (!get_ecl_object(env, argv[0], &event_r, false, &o_event))
6699 	return enif_make_badarg(env);
6700     return make_object_info(env, argv[1], o_event,
6701 			    (info_fn_t*) clGetEventInfo,
6702 			    event_info,
6703 			    sizeof_array(event_info));
6704 }
6705 
get_version(char * version)6706 static cl_uint get_version(char *version)
6707 {
6708     cl_uint ver = 0;
6709     version += 7;
6710     if(*version >= 48 && *version <= 57)
6711 	ver += (*version-48)*10;
6712     version++;
6713     if(*version == 46) {
6714 	version++;
6715 	if(*version >= 48 && *version <= 57)
6716 	    ver += (*version-48);
6717     }
6718     /* fprintf(stderr, "V3 %s %d\r\n", version, ver); */
6719     return ver;
6720 }
6721 
6722 // pre-Load Platform Ids and Device Ids, this will make the
6723 // internal IDs kind of static for the application code. The IDs
6724 // can then be used in matching etc.
6725 
ecl_pre_load(ErlNifEnv * env,ecl_env_t * ecl,cl_int * rerr)6726 static int ecl_pre_load(ErlNifEnv* env, ecl_env_t* ecl, cl_int* rerr)
6727 {
6728     cl_platform_id   platform_id[MAX_PLATFORMS];
6729     cl_uint          num_platforms;
6730     cl_uint          i;
6731     cl_int           err;
6732 
6733     if ((err = ECL_CALL(clGetPlatformIDs)
6734 	 (MAX_PLATFORMS, platform_id, &num_platforms))) {
6735 	*rerr = err;
6736 	return -1;
6737     }
6738 
6739     ecl->platform = enif_alloc(num_platforms*sizeof(ecl_platform_t*));
6740     ecl->nplatforms = num_platforms;
6741     ecl->icd_version = 11;
6742 
6743     for (i = 0; i < num_platforms; i++) {
6744 	ecl_object_t* obj;
6745 	cl_device_id     device_id[MAX_DEVICES];
6746 	cl_uint          num_devices;
6747 	cl_uint          j;
6748 	char             version[128];
6749 	cl_int           ver = -1;
6750 
6751 	if(CL_SUCCESS == ECL_CALL(clGetPlatformInfo)
6752 	   (platform_id[i], CL_PLATFORM_VERSION, 64, version, NULL)) {
6753 	    if((ver = get_version(version)) > ecl->icd_version)
6754 		ecl->icd_version = ver;
6755 	}
6756 	obj = ecl_new(env, &platform_r,platform_id[i],0,ver);
6757 	ecl->platform[i].o_platform = obj;
6758 
6759 	if ((err = ECL_CALL(clGetDeviceIDs)
6760 	     (platform_id[i], CL_DEVICE_TYPE_ALL,
6761 	      MAX_DEVICES, device_id, &num_devices))) {
6762 	    *rerr = err;
6763 	    return -1;
6764 	}
6765 	ecl->platform[i].o_device=enif_alloc(num_devices*sizeof(ecl_object_t));
6766 	ecl->platform[i].ndevices = num_devices;
6767 	for (j = 0; j < num_devices; j++) {
6768 	    ver = ecl->icd_version;
6769 	    if(CL_SUCCESS == ECL_CALL(clGetDeviceInfo)
6770 	       (device_id[j], CL_DEVICE_VERSION, 64, version, NULL)) {
6771 		ver = get_version(version);
6772 	    }
6773 	    obj = ecl_new(env, &device_r, device_id[j],0, ver);
6774 	    ecl->platform[i].o_device[j] = obj;
6775 	}
6776     }
6777 
6778     return 0;
6779 }
6780 
ecl_load(ErlNifEnv * env,void ** priv_data,ERL_NIF_TERM load_info)6781 static int  ecl_load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info)
6782 {
6783     ErlNifResourceFlags tried;
6784     ecl_env_t* ecl;
6785     cl_int err;
6786     lhash_func_t func = { ref_hash, ref_cmp, ref_release, 0 };
6787     UNUSED(env);
6788     UNUSED(load_info);
6789 
6790     DBG("ecl_load");
6791 
6792     if (!(ecl = enif_alloc(sizeof(ecl_env_t))))
6793 	return -1;
6794     ecl->ref_count = 1;
6795     ecl->context_list = NULL;
6796 
6797     if (!(ecl->ref_lock = enif_rwlock_create("ref_lock")))
6798 	return -1;
6799     if (!(ecl->context_list_lock = enif_rwlock_create("context_list_lock")))
6800 	return -1;
6801     if (ecl_queue_init(&ecl->q) < 0)
6802 	return -1;
6803     lhash_init(&ecl->ref, "ref", 2, &func);
6804 
6805     DBG("ecl_load: ecl=%p", ecl);
6806     DBG("ecl_load: ecl->context_list_lock=%p", ecl->context_list_lock);
6807 
6808     // load OpenCL functions
6809     if (ecl_load_dynfunctions(ecl) < 0)
6810 	return -1;
6811 
6812     // Load atoms
6813 
6814     // General atoms
6815     LOAD_ATOM(ok);
6816     LOAD_ATOM(error);
6817     LOAD_ATOM(unknown);
6818     LOAD_ATOM(undefined);
6819     LOAD_ATOM(true);
6820     LOAD_ATOM(false);
6821 
6822     // async messages
6823     LOAD_ATOM(cl_async);
6824     LOAD_ATOM(cl_event);
6825 
6826     // Type names
6827     LOAD_ATOM(platform_t);
6828     LOAD_ATOM(device_t);
6829     LOAD_ATOM(context_t);
6830     LOAD_ATOM(command_queue_t);
6831     LOAD_ATOM(mem_t);
6832     LOAD_ATOM(sampler_t);
6833     LOAD_ATOM(program_t);
6834     LOAD_ATOM(kernel_t);
6835     LOAD_ATOM(event_t);
6836 
6837     LOAD_ATOM(char);
6838     LOAD_ATOM(char2);
6839     LOAD_ATOM(char4);
6840     LOAD_ATOM(char8);
6841     LOAD_ATOM(char16);
6842 
6843     LOAD_ATOM(uchar);
6844     LOAD_ATOM(uchar2);
6845     LOAD_ATOM(uchar4);
6846     LOAD_ATOM(uchar8);
6847     LOAD_ATOM(uchar16);
6848 
6849     LOAD_ATOM(short);
6850     LOAD_ATOM(short2);
6851     LOAD_ATOM(short4);
6852     LOAD_ATOM(short8);
6853     LOAD_ATOM(short16);
6854 
6855     LOAD_ATOM(ushort);
6856     LOAD_ATOM(ushort2);
6857     LOAD_ATOM(ushort4);
6858     LOAD_ATOM(ushort8);
6859     LOAD_ATOM(ushort16);
6860 
6861     LOAD_ATOM(int);
6862     LOAD_ATOM(int2);
6863     LOAD_ATOM(int4);
6864     LOAD_ATOM(int8);
6865     LOAD_ATOM(int16);
6866 
6867     LOAD_ATOM(uint);
6868     LOAD_ATOM(uint2);
6869     LOAD_ATOM(uint4);
6870     LOAD_ATOM(uint8);
6871     LOAD_ATOM(uint16);
6872 
6873     LOAD_ATOM(long);
6874     LOAD_ATOM(long2);
6875     LOAD_ATOM(long4);
6876     LOAD_ATOM(long8);
6877     LOAD_ATOM(long16);
6878 
6879     LOAD_ATOM(ulong);
6880     LOAD_ATOM(ulong2);
6881     LOAD_ATOM(ulong4);
6882     LOAD_ATOM(ulong8);
6883     LOAD_ATOM(ulong16);
6884 
6885     LOAD_ATOM(half);
6886 
6887     LOAD_ATOM(float);
6888     LOAD_ATOM(float2);
6889     LOAD_ATOM(float4);
6890     LOAD_ATOM(float8);
6891     LOAD_ATOM(float16);
6892 
6893     LOAD_ATOM(double);
6894     LOAD_ATOM(double2);
6895     LOAD_ATOM(double4);
6896     LOAD_ATOM(double8);
6897     LOAD_ATOM(double16);
6898 
6899     // records
6900     LOAD_ATOM(cl_image_desc);
6901     LOAD_ATOM(cl_image_format);
6902 
6903     // channel type
6904     LOAD_ATOM(snorm_int8);
6905     LOAD_ATOM(snorm_int16);
6906     LOAD_ATOM(unorm_int8);
6907     LOAD_ATOM(unorm_int16);
6908     LOAD_ATOM(unorm_int24);
6909     LOAD_ATOM(unorm_short_565);
6910     LOAD_ATOM(unorm_short_555);
6911     LOAD_ATOM(unorm_int_101010);
6912     LOAD_ATOM(signed_int8);
6913     LOAD_ATOM(signed_int16);
6914     LOAD_ATOM(signed_int32);
6915     LOAD_ATOM(unsigned_int8);
6916     LOAD_ATOM(unsigned_int16);
6917     LOAD_ATOM(unsigned_int32);
6918     LOAD_ATOM(half_float);
6919 
6920     // channel order
6921     LOAD_ATOM(r);
6922     LOAD_ATOM(a);
6923     LOAD_ATOM(rg);
6924     LOAD_ATOM(ra);
6925     LOAD_ATOM(rgb);
6926     LOAD_ATOM(rgba);
6927     LOAD_ATOM(bgra);
6928     LOAD_ATOM(argb);
6929     LOAD_ATOM(intensity);
6930     LOAD_ATOM(luminance);
6931     LOAD_ATOM(rx);
6932     LOAD_ATOM(rgx);
6933     LOAD_ATOM(rgbx);
6934     LOAD_ATOM(depth);
6935     LOAD_ATOM(depth_stencil);
6936 
6937     // partition_property
6938     LOAD_ATOM(equally);
6939     LOAD_ATOM(by_counts);
6940     LOAD_ATOM(by_counts_list_end);
6941     LOAD_ATOM(by_affinity_domain);
6942 
6943     // affinity_domain
6944     LOAD_ATOM(numa);
6945     LOAD_ATOM(l4_cache);
6946     LOAD_ATOM(l3_cache);
6947     LOAD_ATOM(l2_cache);
6948     LOAD_ATOM(l1_cache);
6949     LOAD_ATOM(next_partitionable);
6950 
6951     // Load options & flags
6952 
6953     // Device info
6954     LOAD_ATOM(type);
6955     LOAD_ATOM(vendor_id);
6956     LOAD_ATOM(max_compute_units);
6957     LOAD_ATOM(max_work_item_dimensions);
6958     LOAD_ATOM(max_work_group_size);
6959     LOAD_ATOM(max_work_item_sizes);
6960     LOAD_ATOM(preferred_vector_width_char);
6961     LOAD_ATOM(preferred_vector_width_short);
6962     LOAD_ATOM(preferred_vector_width_int);
6963     LOAD_ATOM(preferred_vector_width_long);
6964     LOAD_ATOM(preferred_vector_width_float);
6965     LOAD_ATOM(preferred_vector_width_double);
6966     LOAD_ATOM(max_clock_frequency);
6967     LOAD_ATOM(address_bits);
6968     LOAD_ATOM(max_read_image_args);
6969     LOAD_ATOM(max_write_image_args);
6970     LOAD_ATOM(max_mem_alloc_size);
6971     LOAD_ATOM(image2d_max_width);
6972     LOAD_ATOM(image2d_max_height);
6973     LOAD_ATOM(image3d_max_width);
6974     LOAD_ATOM(image3d_max_height);
6975     LOAD_ATOM(image3d_max_depth);
6976     LOAD_ATOM(image_support);
6977     LOAD_ATOM(max_parameter_size);
6978     LOAD_ATOM(max_samplers);
6979     LOAD_ATOM(mem_base_addr_align);
6980     LOAD_ATOM(min_data_type_align_size);
6981     LOAD_ATOM(single_fp_config);
6982     LOAD_ATOM(global_mem_cache_type);
6983     LOAD_ATOM(global_mem_cacheline_size);
6984     LOAD_ATOM(global_mem_cache_size);
6985     LOAD_ATOM(global_mem_size);
6986     LOAD_ATOM(max_constant_buffer_size);
6987     LOAD_ATOM(max_constant_args);
6988     LOAD_ATOM(local_mem_type);
6989     LOAD_ATOM(local_mem_size);
6990     LOAD_ATOM(error_correction_support);
6991     LOAD_ATOM(profiling_timer_resolution);
6992     LOAD_ATOM(endian_little);
6993     LOAD_ATOM(available);
6994     LOAD_ATOM(compiler_available);
6995     LOAD_ATOM(execution_capabilities);
6996     LOAD_ATOM(queue_properties);
6997     LOAD_ATOM(name);
6998     LOAD_ATOM(vendor);
6999     LOAD_ATOM(driver_version);
7000     LOAD_ATOM(profile);
7001     LOAD_ATOM(version);
7002     LOAD_ATOM(extensions);
7003     LOAD_ATOM(platform);
7004 
7005     LOAD_ATOM(double_fp_config);
7006     LOAD_ATOM(half_fp_config);
7007     LOAD_ATOM(preferred_vector_width_half);
7008     LOAD_ATOM(host_unified_memory);
7009     LOAD_ATOM(native_vector_width_char);
7010     LOAD_ATOM(native_vector_width_short);
7011     LOAD_ATOM(native_vector_width_int);
7012     LOAD_ATOM(native_vector_width_long);
7013     LOAD_ATOM(native_vector_width_float);
7014     LOAD_ATOM(native_vector_width_double);
7015     LOAD_ATOM(native_vector_width_half);
7016     LOAD_ATOM(opencl_c_version);
7017     LOAD_ATOM(linker_available);
7018     LOAD_ATOM(built_in_kernels);
7019     LOAD_ATOM(image_max_buffer_size);
7020     LOAD_ATOM(image_max_array_size);
7021     LOAD_ATOM(parent_device);
7022     LOAD_ATOM(partition_max_sub_devices);
7023     LOAD_ATOM(partition_properties);
7024     LOAD_ATOM(partition_affinity_domain);
7025     LOAD_ATOM(partition_type);
7026     LOAD_ATOM(reference_count);
7027     LOAD_ATOM(preferred_interop_user_sync);
7028     LOAD_ATOM(printf_buffer_size);
7029     LOAD_ATOM(image_pitch_alignment);
7030     LOAD_ATOM(image_base_address_alignment);
7031     // cl_nv_device_attribute_query extension
7032     LOAD_ATOM(compute_capability_major_nv);
7033     LOAD_ATOM(compute_capability_minor_nv);
7034     LOAD_ATOM(registers_per_block_nv);
7035     LOAD_ATOM(warp_size_nv);
7036     LOAD_ATOM(gpu_overlap_nv);
7037     LOAD_ATOM(kernel_exec_timeout_nv);
7038     LOAD_ATOM(device_integrated_memory_nv);
7039 
7040      // Platform info
7041     LOAD_ATOM(profile);
7042     LOAD_ATOM(version);
7043     LOAD_ATOM(name);
7044     LOAD_ATOM(vendor);
7045     LOAD_ATOM(extensions);
7046 
7047      // Context info
7048     LOAD_ATOM(reference_count);
7049     LOAD_ATOM(devices);
7050     LOAD_ATOM(properties);
7051 
7052     // Queue info
7053     LOAD_ATOM(context);
7054     LOAD_ATOM(num_devices);
7055     LOAD_ATOM(device);
7056     LOAD_ATOM(reference_count);
7057     LOAD_ATOM(properties);
7058 
7059     // Mem info
7060     LOAD_ATOM(object_type);
7061     LOAD_ATOM(flags);
7062     LOAD_ATOM(size);
7063     LOAD_ATOM(host_ptr);
7064     LOAD_ATOM(map_count);
7065     LOAD_ATOM(reference_count);
7066     LOAD_ATOM(context);
7067 
7068     // Image info
7069     LOAD_ATOM(format);
7070     LOAD_ATOM(element_size);
7071     LOAD_ATOM(row_pitch);
7072     LOAD_ATOM(slice_pitch);
7073     LOAD_ATOM(width);
7074     LOAD_ATOM(height);
7075     LOAD_ATOM(depth);
7076 
7077     // Sampler info
7078     LOAD_ATOM(reference_count);
7079     LOAD_ATOM(context);
7080     LOAD_ATOM(normalized_coords);
7081     LOAD_ATOM(addressing_mode);
7082     LOAD_ATOM(filter_mode);
7083 
7084     // Program info
7085     LOAD_ATOM(reference_count);
7086     LOAD_ATOM(context);
7087     LOAD_ATOM(num_decices);
7088     LOAD_ATOM(devices);
7089     LOAD_ATOM(source);
7090     LOAD_ATOM(binary_sizes);
7091     LOAD_ATOM(binaries);
7092 
7093     // Build Info
7094     LOAD_ATOM(status);
7095     LOAD_ATOM(options);
7096     LOAD_ATOM(log);
7097     LOAD_ATOM(binary_type);
7098 
7099     // Kernel Info
7100     LOAD_ATOM(function_name);
7101     LOAD_ATOM(num_args);
7102     LOAD_ATOM(reference_count);
7103     LOAD_ATOM(context);
7104     LOAD_ATOM(program);
7105 
7106     // Event Info
7107     LOAD_ATOM(command_queue);
7108     LOAD_ATOM(command_type);
7109     LOAD_ATOM(reference_count);
7110     LOAD_ATOM(execution_status);
7111 
7112     // Workgroup info
7113     LOAD_ATOM(work_group_size);
7114     LOAD_ATOM(compile_work_group_size);
7115     LOAD_ATOM(local_mem_size);
7116     LOAD_ATOM(preferred_work_group_size_multiple);
7117     LOAD_ATOM(private_mem_size);
7118     LOAD_ATOM(global_work_size);
7119 
7120     // Error codes
7121     LOAD_ATOM(device_not_found);
7122     LOAD_ATOM(device_not_available);
7123     LOAD_ATOM(compiler_not_available);
7124     LOAD_ATOM(mem_object_allocation_failure);
7125     LOAD_ATOM(out_of_resources);
7126     LOAD_ATOM(out_of_host_memory);
7127     LOAD_ATOM(profiling_info_not_available);
7128     LOAD_ATOM(mem_copy_overlap);
7129     LOAD_ATOM(image_format_mismatch);
7130     LOAD_ATOM(image_format_not_supported);
7131     LOAD_ATOM(build_program_failure);
7132     LOAD_ATOM(map_failure);
7133     LOAD_ATOM(invalid_value);
7134     LOAD_ATOM(invalid_device_type);
7135     LOAD_ATOM(invalid_platform);
7136     LOAD_ATOM(invalid_device);
7137     LOAD_ATOM(invalid_context);
7138     LOAD_ATOM(invalid_queue_properties);
7139     LOAD_ATOM(invalid_command_queue);
7140     LOAD_ATOM(invalid_host_ptr);
7141     LOAD_ATOM(invalid_mem_object);
7142     LOAD_ATOM(invalid_image_format_descriptor);
7143     LOAD_ATOM(invalid_image_size);
7144     LOAD_ATOM(invalid_sampler);
7145     LOAD_ATOM(invalid_binary);
7146     LOAD_ATOM(invalid_build_options);
7147     LOAD_ATOM(invalid_program);
7148     LOAD_ATOM(invalid_program_executable);
7149     LOAD_ATOM(invalid_kernel_name);
7150     LOAD_ATOM(invalid_kernel_definition);
7151     LOAD_ATOM(invalid_kernel);
7152     LOAD_ATOM(invalid_arg_index);
7153     LOAD_ATOM(invalid_arg_value);
7154     LOAD_ATOM(invalid_arg_size);
7155     LOAD_ATOM(invalid_kernel_args);
7156     LOAD_ATOM(invalid_work_dimension);
7157     LOAD_ATOM(invalid_work_group_size);
7158     LOAD_ATOM(invalid_work_item_size);
7159     LOAD_ATOM(invalid_global_offset);
7160     LOAD_ATOM(invalid_event_wait_list);
7161     LOAD_ATOM(invalid_event);
7162     LOAD_ATOM(invalid_operation);
7163     LOAD_ATOM(invalid_gl_object);
7164     LOAD_ATOM(invalid_buffer_size);
7165     LOAD_ATOM(invalid_mip_level);
7166     LOAD_ATOM(invalid_global_work_size);
7167 
7168     // cl_device_type
7169     LOAD_ATOM(all);
7170     LOAD_ATOM(default);
7171     LOAD_ATOM(cpu);
7172     LOAD_ATOM(gpu);
7173     LOAD_ATOM(accelerator);
7174     LOAD_ATOM(custom);
7175 
7176     // fp_config
7177     LOAD_ATOM(denorm);
7178     LOAD_ATOM(inf_nan);
7179     LOAD_ATOM(round_to_nearest);
7180     LOAD_ATOM(round_to_zero);
7181     LOAD_ATOM(round_to_inf);
7182     LOAD_ATOM(fma);
7183     LOAD_ATOM(soft_float);
7184     LOAD_ATOM(correctly_rounded_divide_sqrt);
7185 
7186     // mem_cache_type
7187     LOAD_ATOM(none);
7188     LOAD_ATOM(read_only);
7189     LOAD_ATOM(read_write);
7190 
7191     // local_mem_type
7192     LOAD_ATOM(local);
7193     LOAD_ATOM(global);
7194 
7195     // exec capability
7196     LOAD_ATOM(kernel);
7197     LOAD_ATOM(native_kernel);
7198 
7199     // command_queue_properties
7200     LOAD_ATOM(out_of_order_exec_mode_enable);
7201     LOAD_ATOM(profiling_enable);
7202 
7203     // mem_flags
7204     LOAD_ATOM(read_write);
7205     LOAD_ATOM(write_only);
7206     LOAD_ATOM(read_only);
7207     LOAD_ATOM(use_host_ptr);
7208     LOAD_ATOM(alloc_host_ptr);
7209     LOAD_ATOM(copy_host_ptr);
7210 
7211     // migration_flags
7212     LOAD_ATOM(host);
7213     LOAD_ATOM(content_undefined);
7214 
7215     // mem_object_type
7216     LOAD_ATOM(buffer);
7217     LOAD_ATOM(image2d);
7218     LOAD_ATOM(image3d);
7219     LOAD_ATOM(image2d_array);
7220     LOAD_ATOM(image1d);
7221     LOAD_ATOM(image1d_array);
7222     LOAD_ATOM(image1d_buffer);
7223 
7224     // addressing_mode
7225     LOAD_ATOM(none);
7226     LOAD_ATOM(clamp_to_edge);
7227     LOAD_ATOM(clamp);
7228     LOAD_ATOM(repeat);
7229 
7230     // filter_mode
7231     LOAD_ATOM(nearest);
7232     LOAD_ATOM(linear);
7233 
7234     // map_flags
7235     LOAD_ATOM(read);
7236     LOAD_ATOM(write);
7237 
7238     // build_status
7239     LOAD_ATOM(success);
7240     LOAD_ATOM(none);
7241     LOAD_ATOM(error);
7242     LOAD_ATOM(in_progress);
7243 
7244     // program_binary_type
7245     LOAD_ATOM(none);
7246     LOAD_ATOM(compiled_object);
7247     LOAD_ATOM(library);
7248     LOAD_ATOM(executable);
7249 
7250     // command_type
7251     LOAD_ATOM(ndrange_kernel);
7252     LOAD_ATOM(task);
7253     LOAD_ATOM(native_kernel);
7254     LOAD_ATOM(read_buffer);
7255     LOAD_ATOM(write_buffer);
7256     LOAD_ATOM(copy_buffer);
7257     LOAD_ATOM(read_image);
7258     LOAD_ATOM(write_image);
7259     LOAD_ATOM(copy_image);
7260     LOAD_ATOM(copy_image_to_buffer);
7261     LOAD_ATOM(copy_buffer_to_image);
7262     LOAD_ATOM(map_buffer);
7263     LOAD_ATOM(map_image);
7264     LOAD_ATOM(unmap_mem_object);
7265     LOAD_ATOM(marker);
7266     LOAD_ATOM(aquire_gl_objects);
7267     LOAD_ATOM(release_gl_objects);
7268     LOAD_ATOM(migreate_mem_objects);
7269     LOAD_ATOM(fill_buffer);
7270     LOAD_ATOM(fill_image);
7271 
7272     // execution_status
7273     LOAD_ATOM(complete);
7274     LOAD_ATOM(running);
7275     LOAD_ATOM(submitted);
7276     LOAD_ATOM(queued);
7277 
7278     // arguments
7279     LOAD_ATOM(region);
7280 
7281     LOAD_ATOM(global);
7282     LOAD_ATOM(local);
7283     LOAD_ATOM(constant);
7284     LOAD_ATOM(private);
7285 
7286     LOAD_ATOM(read_only);
7287     LOAD_ATOM(write_only);
7288     LOAD_ATOM(read_write);
7289     LOAD_ATOM(none);
7290 
7291     LOAD_ATOM(none);
7292     LOAD_ATOM(const);
7293     LOAD_ATOM(restrict);
7294     LOAD_ATOM(volatile);
7295 
7296     LOAD_ATOM(address_qualifier);
7297     LOAD_ATOM(access_qualifier);
7298     LOAD_ATOM(type_name);
7299     LOAD_ATOM(type_qualifier);
7300     LOAD_ATOM(name);
7301 
7302     // Create resource types
7303     ecl_resource_init(env, &platform_r, "platform_t",
7304 		      sizeof(ecl_object_t),
7305 		      ecl_platform_dtor,
7306 		      ERL_NIF_RT_CREATE, &tried);
7307     ecl_resource_init(env, &device_r, "device_t",
7308 		      sizeof(ecl_object_t),
7309 		      ecl_device_dtor,
7310 		      ERL_NIF_RT_CREATE, &tried);
7311     ecl_resource_init(env, &context_r, "context_t",
7312 		      sizeof(ecl_context_t),     // NOTE! specialized!
7313 		      ecl_context_dtor,
7314 		      ERL_NIF_RT_CREATE, &tried);
7315     ecl_resource_init(env, &command_queue_r, "command_queue_t",
7316 		      sizeof(ecl_object_t),
7317 		      ecl_queue_dtor,
7318 		      ERL_NIF_RT_CREATE, &tried);
7319     ecl_resource_init(env, &mem_r, "mem_t",
7320 		      sizeof(ecl_object_t),
7321 		      ecl_mem_dtor,
7322 		      ERL_NIF_RT_CREATE, &tried);
7323     ecl_resource_init(env, &sampler_r, "sampler_t",
7324 		      sizeof(ecl_object_t),
7325 		      ecl_sampler_dtor,
7326 		      ERL_NIF_RT_CREATE, &tried);
7327     ecl_resource_init(env, &program_r, "program_t",
7328 		      sizeof(ecl_object_t),
7329 		      ecl_program_dtor,
7330 		      ERL_NIF_RT_CREATE, &tried);
7331     ecl_resource_init(env, &kernel_r, "kernel_t",
7332 		      sizeof(ecl_kernel_t),   // NOTE! specialized!
7333 		      ecl_kernel_dtor,
7334 		      ERL_NIF_RT_CREATE, &tried);
7335     ecl_resource_init(env, &event_r, "event_t",
7336 		      sizeof(ecl_event_t),    // NOTE! specialized!
7337 		      ecl_event_dtor,
7338 		      ERL_NIF_RT_CREATE, &tried);
7339     *priv_data = ecl;
7340 
7341     if (ecl_pre_load(env, ecl, &err) < 0) {
7342 	CL_ERROR("ecl_pre_load: error code = %d", err);
7343     }
7344 
7345     return 0;
7346 }
7347 
7348 #ifdef WIN32
7349 #define RTLD_LAZY 0
7350 #define OPENCL_LIB "opencl.dll"
7351 typedef HMODULE DL_LIB_P;
dlsym(HMODULE Lib,const char * func)7352 void * dlsym(HMODULE Lib, const char *func) {
7353     return (void *) GetProcAddress(Lib, func);
7354 }
7355 
dlopen(const CHAR * DLL,int unused)7356 HMODULE dlopen(const CHAR *DLL, int unused) {
7357   UNUSED(unused);
7358   return LoadLibrary(DLL);
7359 }
7360 #else
7361 typedef void * DL_LIB_P;
7362 # ifdef DARWIN
7363 #   define OPENCL_LIB "/System/Library/Frameworks/OpenCL.framework/OpenCL"
7364 # else
7365 #   define OPENCL_LIB "libOpenCL.so"
7366 # endif
7367 #endif
7368 
ecl_load_dynfunctions(ecl_env_t * ecl)7369 static int ecl_load_dynfunctions(ecl_env_t* ecl)
7370 {
7371     DL_LIB_P handle;
7372 //    if(ecl->icd_version < 12)
7373 //	return;
7374     if ((handle = dlopen(OPENCL_LIB, RTLD_LAZY))) {
7375 	int i = 0;
7376 
7377 	while(ecl_function[i].name != NULL) {
7378 	    if (ecl_function[i].func != NULL) {
7379 		fprintf(stderr, "function %s already loaded\r\n",
7380 			ecl_function[i].name);
7381 	    }
7382 	    else {
7383 		ecl_function[i].func = dlsym(handle, ecl_function[i].name);
7384 		if (ecl_function[i].func == NULL) {
7385 #ifdef DEBUG
7386 		    fprintf(stderr, "unabled to load function %s\r\n",
7387 			    ecl_function[i].name);
7388 #endif
7389 		}
7390 		else {
7391 #ifdef DEBUG
7392 		    fprintf(stderr, "load function %s/%d.%d @ %p\r\n",
7393 			    ecl_function[i].name,
7394 			    ecl_function[i].version / 10,
7395 			    ecl_function[i].version % 10,
7396 			    ecl_function[i].func);
7397 #endif
7398 		}
7399 	    }
7400 	    i++;
7401 	}
7402 	if (ecl_function[i_clCreateImage].func == NULL)
7403 	    ecl_function[i_clCreateImage].func = e_clCreateImage;
7404 
7405 	// patch functions not present or deprecated functions when possible
7406 	if (ecl->icd_version >= 12) {
7407 	    ecl_function[i_clCreateImage2D].func = eclCreateImage2D;
7408 	    ecl_function[i_clCreateImage3D].func = eclCreateImage3D;
7409 	}
7410 	return 0;
7411     }
7412     fprintf(stderr, "Failed open OpenCL dynamic library\r\n");
7413     return -1;
7414 }
7415 
ecl_upgrade(ErlNifEnv * env,void ** priv_data,void ** old_priv_data,ERL_NIF_TERM load_info)7416 static int ecl_upgrade(ErlNifEnv* env, void** priv_data, void** old_priv_data,
7417 			ERL_NIF_TERM load_info)
7418 {
7419     ErlNifResourceFlags tried;
7420     ecl_context_t* ctx;
7421     ecl_env_t* ecl = (ecl_env_t*) *old_priv_data;
7422     int sync_count;
7423     UNUSED(load_info);
7424 
7425     ecl->ref_count++;
7426     DBG("ecl_upgrade: ecl=%p", ecl, ecl->ref_count);
7427 
7428     // upgrade resource types
7429     ecl_resource_init(env, &platform_r, "platform_t",
7430 		      sizeof(ecl_object_t),
7431 		      ecl_platform_dtor,
7432 		      ERL_NIF_RT_CREATE|ERL_NIF_RT_TAKEOVER, &tried);
7433     ecl_resource_init(env, &device_r, "device_t",
7434 		      sizeof(ecl_object_t),
7435 		      ecl_device_dtor,
7436 		      ERL_NIF_RT_CREATE|ERL_NIF_RT_TAKEOVER, &tried);
7437 
7438     ecl_resource_init(env, &command_queue_r, "command_queue_t",
7439 		      sizeof(ecl_object_t),
7440 		      ecl_queue_dtor,
7441 		      ERL_NIF_RT_CREATE|ERL_NIF_RT_TAKEOVER, &tried);
7442     ecl_resource_init(env, &mem_r, "mem_t",
7443 		      sizeof(ecl_object_t),
7444 		      ecl_mem_dtor,
7445 		      ERL_NIF_RT_CREATE|ERL_NIF_RT_TAKEOVER, &tried);
7446     ecl_resource_init(env, &sampler_r, "sampler_t",
7447 		      sizeof(ecl_object_t),
7448 		      ecl_sampler_dtor,
7449 		      ERL_NIF_RT_CREATE|ERL_NIF_RT_TAKEOVER, &tried);
7450     ecl_resource_init(env, &program_r, "program_t",
7451 		      sizeof(ecl_object_t),
7452 		      ecl_program_dtor,
7453 		      ERL_NIF_RT_CREATE|ERL_NIF_RT_TAKEOVER, &tried);
7454     ecl_resource_init(env, &kernel_r, "kernel_t",
7455 		      sizeof(ecl_kernel_t),   // NOTE! specialized!
7456 		      ecl_kernel_dtor,
7457 		      ERL_NIF_RT_CREATE|ERL_NIF_RT_TAKEOVER, &tried);
7458     ecl_resource_init(env, &event_r, "event_t",
7459 		      sizeof(ecl_event_t),    // NOTE! specialized!
7460 		      ecl_event_dtor,
7461 		      ERL_NIF_RT_CREATE|ERL_NIF_RT_TAKEOVER, &tried);
7462 
7463     ecl_resource_init(env, &context_r, "context_t",
7464 		      sizeof(ecl_context_t),     // NOTE! specialized!
7465 		      ecl_context_dtor,
7466 		      ERL_NIF_RT_CREATE|ERL_NIF_RT_TAKEOVER, &tried);
7467 
7468     // Scan through all contexts and initiate upgrade & sync of the threads
7469     DBG("ecl_upgrade: upgrade and sync ecl=%p", ecl);
7470     DBG("ecl_upgrade: upgrade and sync ecl->context_list_lock=%p",
7471 	ecl->context_list_lock);
7472     sync_count = 0;
7473     enif_rwlock_rwlock(ecl->context_list_lock);
7474     for (ctx = ecl->context_list; ctx != NULL; ctx = ctx->next) {
7475 	ecl_message_t m;
7476 	DBG("ecl_upgrade: ctx=%p", ctx);
7477 	m.type   = ECL_MESSAGE_UPGRADE;
7478 	m.upgrade = ecl_context_main;
7479 	DBG("ecl_upgrade: send upgrade func=%p to %p",
7480 	    ecl_context_main, ctx->thr);
7481 	ecl_message_send(ctx->thr, &m);
7482 
7483 	m.type   = ECL_MESSAGE_SYNC;
7484 	DBG("ecl_upgrade: send sync to %p", ctx->thr);
7485 	ecl_message_send(ctx->thr, &m);
7486 	sync_count++;
7487     }
7488     enif_rwlock_rwunlock(ecl->context_list_lock);
7489 
7490     while(sync_count) {
7491 	ecl_message_t m;
7492 	int r;
7493 	if ((r = ecl_queue_get(&ecl->q, &m)) < 0)
7494 	    return -1;
7495 	if (m.type != ECL_MESSAGE_SYNC_ACK)
7496 	    return -1;
7497 	sync_count--;
7498     }
7499 
7500     *priv_data = *old_priv_data;
7501     return 0;
7502 }
7503 
ecl_unload(ErlNifEnv * env,void * priv_data)7504 static void ecl_unload(ErlNifEnv* env, void* priv_data)
7505 {
7506     ecl_env_t* ecl = priv_data;
7507     UNUSED(env);
7508 
7509     ecl->ref_count--;
7510     DBG("ecl_unload: ecl=%p ref_count=%d", ecl, ecl->ref_count);
7511     if (ecl->ref_count == 0) {
7512 	cl_uint i;
7513 	cl_uint j;
7514 
7515 	for (i = 0; i < ecl->nplatforms; i++) {
7516 	    ecl_object_t* obj;
7517 
7518 	    for (j = 0; j < ecl->platform[i].ndevices; j++) {
7519 		obj = ecl->platform[i].o_device[j];
7520 		enif_release_resource(obj);
7521 	    }
7522 	    enif_free(ecl->platform[i].o_device);
7523 
7524 	    obj = ecl->platform[i].o_platform;
7525 	    enif_release_resource(obj);
7526 	}
7527 	enif_free(ecl->platform);
7528 
7529 	enif_rwlock_rwlock(ecl->ref_lock);
7530 	lhash_delete(&ecl->ref);
7531 	enif_rwlock_rwunlock(ecl->ref_lock);
7532 
7533 	enif_rwlock_destroy(ecl->ref_lock);
7534 
7535 	enif_rwlock_rwlock(ecl->context_list_lock);
7536 	DBG("ecl->context_list = %p", ecl->context_list);
7537 	enif_rwlock_rwunlock(ecl->context_list_lock);
7538 
7539 	enif_rwlock_destroy(ecl->context_list_lock);
7540 
7541 	enif_free(ecl);
7542     }
7543 }
7544 
7545 /*
7546 #warning "testing only, REMOVE before release"
7547 #define ERL_NIF_INIT_BODY			\
7548      DBG("erl_nif_init")
7549 */
7550 
7551 ERL_NIF_INIT(cl, ecl_funcs,
7552 	     ecl_load, NULL,
7553 	     ecl_upgrade, ecl_unload)
7554