1 /****** BEGIN COPYRIGHT *******************************************************
2 *
3 * Copyright (C) 2007 - 2012, Rogvall Invest AB, <tony@rogvall.se>
4 *
5 * This software is licensed as described in the file COPYRIGHT, which
6 * you should have received as part of this distribution. The terms
7 * are also available at http://www.rogvall.se/docs/copyright.txt.
8 *
9 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
10 * copies of the Software, and permit persons to whom the Software is
11 * furnished to do so, under the terms of the COPYRIGHT file.
12 *
13 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
14 * KIND, either express or implied.
15 *
16 ****** END COPYRIGHT ********************************************************/
17 //
18 // NIF interface for OpenCL binding
19 //
20
21 #include <stdio.h>
22
23 #ifndef WIN32
24 #include <stdbool.h>
25 #include <stdlib.h>
26 #include <stdarg.h>
27 #include <stdint.h>
28 #include <string.h>
29 #include <errno.h>
30 #include <dlfcn.h>
31 #else
32 #include <windows.h>
33 #endif
34
35 #define CL_USE_DEPRECATED_OPENCL_1_1_APIS 1
36
37 #ifdef DARWIN
38 #include <OpenCL/opencl.h>
39 #else
40 #include <CL/cl.h>
41 #include <CL/cl_ext.h>
42 #endif
43
44 // Old cl_platform doesn't have the CL_CALLBACK
45 #ifndef CL_CALLBACK
46 #define CL_CALLBACK
47 #endif
48
49 #ifdef WIN32
50 typedef cl_bool bool;
51 #define true 1
52 #define false 0
53 #endif
54
55
56 #ifdef WIN_X64
57 #define ecl_get_sizet(a1,a2,a3) enif_get_uint64(a1,a2,a3)
58 #define ecl_make_sizet(a1,a2) enif_make_uint64(a1,a2)
59 #else
60 #define ecl_get_sizet(a1,a2,a3) enif_get_ulong(a1,a2,(unsigned long*)a3)
61 #define ecl_make_sizet(a1,a2) enif_make_ulong(a1,a2)
62 #endif
63
64 #define UNUSED(a) ((void) a)
65
66 #include "erl_nif.h"
67 #include "cl_hash.h"
68
69 #define sizeof_array(a) (sizeof(a) / sizeof(a[0]))
70
71 // #define DEBUG
72
73 #ifdef DEBUG
74 #include <stdarg.h>
75 static void ecl_emit_error(char* file, int line, ...);
76 #define DBG(...) ecl_emit_error(__FILE__,__LINE__,__VA_ARGS__)
77 #else
78 #define DBG(...)
79 #endif
80
81 #define CL_ERROR(...) ecl_emit_error(__FILE__,__LINE__,__VA_ARGS__)
82
83 // soft limits
84 #define MAX_INFO_SIZE 1024
85 #define MAX_DEVICES 128
86 #define MAX_PLATFORMS 128
87 #define MAX_OPTION_LIST 1024
88 #define MAX_KERNEL_NAME 1024
89 #define MAX_KERNELS 1024
90 #define MAX_SOURCES 128
91 #define MAX_WAIT_LIST 128
92 #define MAX_WORK_SIZE 3
93 #define MAX_IMAGE_FORMATS 128
94 #define MAX_MEM_OBJECTS 128
95
96 // Atom macros
97 #define ATOM(name) atm_##name
98
99 #define DECL_ATOM(name) \
100 ERL_NIF_TERM atm_##name = 0
101
102 // require env in context (ugly)
103 #define LOAD_ATOM(name) \
104 atm_##name = enif_make_atom(env,#name)
105
106 #define LOAD_ATOM_STRING(name,string) \
107 atm_##name = enif_make_atom(env,string)
108
109 // Wrapper to handle reource atom name etc.
110 typedef struct {
111 char* name;
112 ERL_NIF_TERM type; // resource atom name
113 ErlNifResourceType* res; // the resource type
114 size_t size; // "real" object size
115 } ecl_resource_t;
116
117 struct _ecl_object_t;
118
119 typedef struct _ecl_platform_t {
120 struct _ecl_object_t* o_platform;
121 cl_uint ndevices;
122 struct _ecl_object_t** o_device;
123 } ecl_platform_t;
124
125 struct _ecl_env_t;
126
127 typedef struct _ecl_object_t {
128 lhash_bucket_t hbucket; // inheritance: map: cl->ecl
129 struct _ecl_env_t* env;
130 cl_int version;
131 struct _ecl_object_t* parent; // parent resource object
132 union {
133 cl_platform_id platform;
134 cl_device_id device;
135 cl_context context;
136 cl_command_queue queue;
137 cl_mem mem;
138 cl_sampler sampler;
139 cl_program program;
140 cl_kernel kernel;
141 cl_event event;
142 void* opaque;
143 };
144 } ecl_object_t;
145
146 // "inherits" ecl_object_t and add special binary objects (read/write)
147 typedef struct _ecl_event_t {
148 ecl_object_t obj; // FIXED place for inhertiance
149 bool rd; // Read binary operation
150 bool rl; // Do not release if true
151 ErlNifEnv* bin_env; // environment to hold binary term data
152 ErlNifBinary* bin; // read/write data
153 } ecl_event_t;
154
155 #define KERNEL_ARG_OTHER 0
156 #define KERNEL_ARG_MEM 1
157 #define KERNEL_ARG_SAMPLER 2
158
159 // This is a special construct inorder to kee
160 typedef struct {
161 int type; // 0=other, 1=mem, 2=samper
162 union {
163 cl_mem mem;
164 cl_sampler sampler;
165 void* other;
166 void* value;
167 };
168 } ecl_kernel_arg_t;
169
170 // "inherits" ecl_object_t and reference count kernel args
171 typedef struct _ecl_kernel_t {
172 ecl_object_t obj; // FIXED place for inhertiance
173 cl_uint num_args; // number of arguments used by the kernel
174 ecl_kernel_arg_t* arg; // array of current args
175 } ecl_kernel_t;
176
177
178 typedef enum {
179 OCL_CHAR, // cl_char
180 OCL_UCHAR, // cl_uchar
181 OCL_SHORT, // cl_short
182 OCL_USHORT, // cl_ushort
183 OCL_INT, // cl_int
184 OCL_UINT, // cl_uint
185 OCL_LONG, // cl_long
186 OCL_ULONG, // cl_ulong
187 OCL_HALF, // cl_half
188 OCL_FLOAT, // cl_float
189 OCL_DOUBLE, // cl_double
190 OCL_BOOL, // cl_bool
191 OCL_STRING, // cl_char*
192 OCL_BITFIELD, // cl_ulong
193 OCL_ENUM, // cl_int
194 OCL_POINTER, // void*
195 OCL_SIZE, // size_t
196 OCL_PLATFORM, // void*
197 OCL_DEVICE, // void*
198 OCL_CONTEXT, // void*
199 OCL_PROGRAM, // void*
200 OCL_COMMAND_QUEUE, // void*
201 OCL_IMAGE_FORMAT, // cl_image_format
202 #if CL_VERSION_1_2 == 1
203 OCL_DEVICE_PARTITION, // cl_device_partition_property
204 #endif
205 OCL_NUM_TYPES
206 } ocl_type_t;
207
208 #define OCL_DEVICE_TYPE OCL_BITFIELD
209 #define OCL_DEVICE_FP_CONFIG OCL_BITFIELD
210 #define OCL_DEVICE_GLOBAL_MEM_CACHE_TYPE OCL_ENUM
211 #define OCL_PLATFORM_INFO OCL_UINT
212 #define OCL_DEVICE_INFO OCL_UINT
213 #define OCL_DEVICE_EXEC_CAPABILITIES OCL_BITFIELD
214 #define OCL_QUEUE_PROPERTIES OCL_BITFIELD
215 #define OCL_DEVICE_LOCAL_MEM_TYPE OCL_ENUM
216 #define OCL_MEM_OBJECT_TYPE OCL_ENUM
217 #define OCL_MEM_FLAGS OCL_BITFIELD
218 #define OCL_SAMPLER_ADDRESSING_MODE OCL_ENUM
219 #define OCL_SAMPLER_FILTER_MODE OCL_ENUM
220 #define OCL_BUILD_STATUS OCL_ENUM
221 #define OCL_DEVICE_DOUBLE_FP_CONFIG OCL_BITFIELD
222 #define OCL_PROGRAM_BINARY_TYPE OCL_ENUM
223
224 typedef struct {
225 ERL_NIF_TERM* key;
226 ErlNifUInt64 value;
227 } ecl_kv_t;
228
229 typedef struct {
230 ERL_NIF_TERM* info_key; // Atom
231 cl_uint info_id; // Information
232 bool is_array; // return type is a vector of data
233 ocl_type_t info_type; // info data type
234 void* extern_info; // Encode/Decode enum/bitfields
235 size_t def_size; // Def size in bytes (if == 0 query driver)
236 } ecl_info_t;
237
238 typedef enum {
239 ECL_MESSAGE_STOP, // time to die
240 ECL_MESSAGE_UPGRADE, // time to upgrade
241 ECL_MESSAGE_SYNC, // synk
242 ECL_MESSAGE_SYNC_ACK, // synk return message
243 ECL_MESSAGE_FLUSH, // call clFlush
244 ECL_MESSAGE_FINISH, // call clFinish
245 ECL_MESSAGE_WAIT_FOR_EVENT // call clWaitForEvents (only one event!)
246 } ecl_message_type_t;
247
248 struct _ecl_thread_t;
249
250 typedef struct ecl_message_t
251 {
252 ecl_message_type_t type;
253 ErlNifPid sender; // sender pid
254 ErlNifEnv* env; // message environment (ref, bin's etc)
255 ERL_NIF_TERM ref; // ref (in env!)
256 union {
257 ecl_object_t* queue; // ECL_MESSAGE_FLUSH/ECL_MESSAGE_FINISH
258 ecl_event_t* event; // ECL_MESSAGE_WAIT_FOR_EVENT
259 void* (*upgrade)(void*); // ECL_MESSAGE_UPGRADE
260 };
261 } ecl_message_t;
262
263 typedef struct _ecl_qlink_t {
264 struct _ecl_qlink_t* next;
265 ecl_message_t mesg;
266 } ecl_qlink_t;
267
268 #define MAX_QLINK 8 // pre-allocated qlinks
269
270 typedef struct {
271 ErlNifMutex* mtx;
272 ErlNifCond* cv;
273 int len;
274 ecl_qlink_t* front; // pick from front
275 ecl_qlink_t* rear; // insert at rear
276 ecl_qlink_t* free; // free list in ql
277 ecl_qlink_t ql[MAX_QLINK]; // "pre" allocated qlinks
278 } ecl_queue_t;
279
280 typedef struct _ecl_thread_t {
281 ErlNifTid tid; // thread id
282 ecl_queue_t q; // message queue
283 void* arg; // thread init argument
284 } ecl_thread_t;
285
286 // "inherits" ecl_object_t and add keep track of the context thread
287 typedef struct _ecl_context_t {
288 ecl_object_t obj; // FIXED place for inhertiance
289 struct _ecl_context_t* next; // next context in list
290 ecl_thread_t* thr; // The context thread
291 int upgrade_count; // upgrade tick
292 } ecl_context_t;
293
294 typedef struct _ecl_env_t {
295 int ref_count; // ref count the load/upgrade/unload
296 lhash_t ref; // cl -> ecl
297 ErlNifRWLock* ref_lock; // lhash operation lock
298 ecl_queue_t q; // sync queue
299 cl_uint nplatforms;
300 ecl_platform_t* platform;
301 ErlNifRWLock* context_list_lock;
302 ecl_context_t* context_list;
303 cl_int icd_version;
304 } ecl_env_t;
305
306 typedef struct _ecl_func_t {
307 char* name;
308 void* func;
309 int version; // 10,11,12,20...
310 } ecl_func_t;
311
312 //
313 // ECL_FUNC function list is used multiple times by
314 // updating the meaning of ECL_FUNC
315 //
316 #define ECL_FUNC_LIST \
317 ECL_FUNC(clGetPlatformIDs,10), \
318 ECL_FUNC(clGetPlatformInfo,10), \
319 ECL_FUNC(clGetDeviceIDs,10), \
320 ECL_FUNC(clGetDeviceInfo,10), \
321 ECL_FUNC(clCreateSubDevices,12), \
322 ECL_FUNC(clRetainDevice,12), \
323 ECL_FUNC(clReleaseDevice,12), \
324 ECL_FUNC(clCreateContext,10), \
325 ECL_FUNC(clCreateContextFromType,10), \
326 ECL_FUNC(clRetainContext,10), \
327 ECL_FUNC(clReleaseContext,10), \
328 ECL_FUNC(clGetContextInfo,10), \
329 ECL_FUNC(clCreateCommandQueue,10), \
330 ECL_FUNC(clRetainCommandQueue,10), \
331 ECL_FUNC(clReleaseCommandQueue,10), \
332 ECL_FUNC(clGetCommandQueueInfo,10), \
333 ECL_FUNC(clCreateBuffer,10), \
334 ECL_FUNC(clCreateSubBuffer,11), \
335 ECL_FUNC(clCreateImage,12), \
336 ECL_FUNC(clCreatePipe,20), \
337 ECL_FUNC(clRetainMemObject,10), \
338 ECL_FUNC(clReleaseMemObject,10), \
339 ECL_FUNC(clGetSupportedImageFormats,10), \
340 ECL_FUNC(clGetMemObjectInfo,10), \
341 ECL_FUNC(clGetImageInfo,10), \
342 ECL_FUNC(clGetPipeInfo,20), \
343 ECL_FUNC(clSetMemObjectDestructorCallback,11), \
344 ECL_FUNC(clSVMAlloc,20), \
345 ECL_FUNC(clSVMFree,20), \
346 ECL_FUNC(clCreateSampler,10), \
347 ECL_FUNC(clCreateSamplerWithProperties,20), \
348 ECL_FUNC(clRetainSampler,10), \
349 ECL_FUNC(clReleaseSampler,10), \
350 ECL_FUNC(clGetSamplerInfo,10), \
351 ECL_FUNC(clCreateProgramWithSource,10), \
352 ECL_FUNC(clCreateProgramWithBinary,10), \
353 ECL_FUNC(clCreateProgramWithBuiltInKernels,12), \
354 ECL_FUNC(clRetainProgram,10), \
355 ECL_FUNC(clReleaseProgram,10), \
356 ECL_FUNC(clBuildProgram,10), \
357 ECL_FUNC(clCompileProgram,12), \
358 ECL_FUNC(clLinkProgram,12), \
359 ECL_FUNC(clUnloadPlatformCompiler,12), \
360 ECL_FUNC(clGetProgramInfo,10), \
361 ECL_FUNC(clGetProgramBuildInfo,10), \
362 ECL_FUNC(clCreateKernel,10), \
363 ECL_FUNC(clCreateKernelsInProgram,10), \
364 ECL_FUNC(clSetKernelArg,10), \
365 ECL_FUNC(clSetKernelArgSVMPointer,20), \
366 ECL_FUNC(clSetKernelExecInfo,20), \
367 ECL_FUNC(clRetainKernel,10), \
368 ECL_FUNC(clReleaseKernel,10), \
369 ECL_FUNC(clGetKernelInfo,10), \
370 ECL_FUNC(clGetKernelArgInfo,12), \
371 ECL_FUNC(clGetKernelWorkGroupInfo,10), \
372 ECL_FUNC(clWaitForEvents,10), \
373 ECL_FUNC(clGetEventInfo,10), \
374 ECL_FUNC(clCreateUserEvent,11), \
375 ECL_FUNC(clRetainEvent,10), \
376 ECL_FUNC(clReleaseEvent,10), \
377 ECL_FUNC(clSetUserEventStatus,11), \
378 ECL_FUNC(clSetEventCallback,11), \
379 ECL_FUNC(clGetEventProfilingInfo,10), \
380 ECL_FUNC(clFlush,10), \
381 ECL_FUNC(clFinish,10), \
382 ECL_FUNC(clEnqueueReadBuffer,10), \
383 ECL_FUNC(clEnqueueReadBufferRect,11), \
384 ECL_FUNC(clEnqueueWriteBuffer,10), \
385 ECL_FUNC(clEnqueueWriteBufferRect,11), \
386 ECL_FUNC(clEnqueueFillBuffer,12), \
387 ECL_FUNC(clEnqueueCopyBuffer,10), \
388 ECL_FUNC(clEnqueueCopyBufferRect,11), \
389 ECL_FUNC(clEnqueueReadImage,10), \
390 ECL_FUNC(clEnqueueWriteImage,10), \
391 ECL_FUNC(clEnqueueFillImage,12), \
392 ECL_FUNC(clEnqueueCopyImage,10), \
393 ECL_FUNC(clEnqueueCopyImageToBuffer,10), \
394 ECL_FUNC(clEnqueueCopyBufferToImage,10), \
395 ECL_FUNC(clEnqueueMapBuffer,10), \
396 ECL_FUNC(clEnqueueMapImage,10), \
397 ECL_FUNC(clEnqueueUnmapMemObject,10), \
398 ECL_FUNC(clEnqueueMigrateMemObjects,12), \
399 ECL_FUNC(clEnqueueNDRangeKernel,10), \
400 ECL_FUNC(clEnqueueTask,10), \
401 ECL_FUNC(clEnqueueNativeKernel,10), \
402 ECL_FUNC(clEnqueueMarkerWithWaitList,12), \
403 ECL_FUNC(clEnqueueBarrierWithWaitList,12), \
404 ECL_FUNC(clEnqueueSVMFree,20), \
405 ECL_FUNC(clEnqueueSVMMemcpy,20), \
406 ECL_FUNC(clEnqueueSVMMemFill,20), \
407 ECL_FUNC(clEnqueueSVMMap,20), \
408 ECL_FUNC(clEnqueueSVMUnmap,20), \
409 ECL_FUNC(clGetExtensionFunctionAddressForPlatform,12), \
410 ECL_FUNC(clCreateImage2D,10), \
411 ECL_FUNC(clCreateImage3D,10), \
412 ECL_FUNC(clEnqueueMarker,10), \
413 ECL_FUNC(clEnqueueWaitForEvents,10), \
414 ECL_FUNC(clEnqueueBarrier,10), \
415 ECL_FUNC(clUnloadCompiler,10), \
416 ECL_FUNC(clGetExtensionFunctionAddress,10)
417
418 #include "ecl_types.h"
419
420 #undef ECL_FUNC
421 #define ECL_FUNC(nm,vsn) i_##nm
422 typedef enum {
423 ECL_FUNC_LIST
424 } ecl_func_index_t;
425
426 #undef ECL_FUNC
427 #define ECL_FUNC(nm,vsn) \
428 [i_##nm] = { .name = #nm, .func = NULL, .version = (vsn) }
429
430 ecl_func_t ecl_function[] = {
431 ECL_FUNC_LIST,
432 { NULL, NULL, 0 }
433 };
434
435 #define ECL_FUNC_PTR(nm) ecl_function[i_##nm].func
436 #define ECL_FUNC_VERSION(nm) ecl_function[i_##nm].version
437 #define ECL_CALL(nm) ((t_##nm)(ecl_function[i_##nm].func))
438
439 static void* ecl_context_main(void* arg);
440
441 static int ecl_load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info);
442
443 static int ecl_upgrade(ErlNifEnv* env, void** priv_data, void** old_priv_data,
444 ERL_NIF_TERM load_info);
445
446 static void ecl_unload(ErlNifEnv* env, void* priv_data);
447
448 static int ecl_load_dynfunctions(ecl_env_t* ecl);
449
450 static ERL_NIF_TERM ecl_versions(ErlNifEnv* env, int argc,
451 const ERL_NIF_TERM argv[]);
452
453 static ERL_NIF_TERM ecl_noop(ErlNifEnv* env, int argc,
454 const ERL_NIF_TERM argv[]);
455
456 static ERL_NIF_TERM ecl_get_platform_ids(ErlNifEnv* env, int argc,
457 const ERL_NIF_TERM argv[]);
458
459 static ERL_NIF_TERM ecl_get_platform_info(ErlNifEnv* env, int argc,
460 const ERL_NIF_TERM argv[]);
461
462 static ERL_NIF_TERM ecl_get_device_ids(ErlNifEnv* env, int argc,
463 const ERL_NIF_TERM argv[]);
464
465 #if CL_VERSION_1_2 == 1
466 static ERL_NIF_TERM ecl_create_sub_devices(ErlNifEnv* env, int argc,
467 const ERL_NIF_TERM argv[]);
468 #endif
469
470 static ERL_NIF_TERM ecl_get_device_info(ErlNifEnv* env, int argc,
471 const ERL_NIF_TERM argv[]);
472
473 static ERL_NIF_TERM ecl_create_context(ErlNifEnv* env, int argc,
474 const ERL_NIF_TERM argv[]);
475
476 static ERL_NIF_TERM ecl_get_context_info(ErlNifEnv* env, int argc,
477 const ERL_NIF_TERM argv[]);
478
479 static ERL_NIF_TERM ecl_create_queue(ErlNifEnv* env, int argc,
480 const ERL_NIF_TERM argv[]);
481
482 static ERL_NIF_TERM ecl_get_queue_info(ErlNifEnv* env, int argc,
483 const ERL_NIF_TERM argv[]);
484
485 static ERL_NIF_TERM ecl_create_buffer(ErlNifEnv* env, int argc,
486 const ERL_NIF_TERM argv[]);
487
488 #if CL_VERSION_1_1 == 1
489 static ERL_NIF_TERM ecl_create_sub_buffer(ErlNifEnv* env, int argc,
490 const ERL_NIF_TERM argv[]);
491 #endif
492
493 static ERL_NIF_TERM ecl_create_image2d(ErlNifEnv* env, int argc,
494 const ERL_NIF_TERM argv[]);
495
496 static ERL_NIF_TERM ecl_create_image3d(ErlNifEnv* env, int argc,
497 const ERL_NIF_TERM argv[]);
498
499 #if CL_VERSION_1_2 == 1
500 static ERL_NIF_TERM ecl_create_image(ErlNifEnv* env, int argc,
501 const ERL_NIF_TERM argv[]);
502 #endif
503
504 static ERL_NIF_TERM ecl_get_supported_image_formats(ErlNifEnv* env, int argc,
505 const ERL_NIF_TERM argv[]);
506
507 static ERL_NIF_TERM ecl_get_mem_object_info(ErlNifEnv* env, int argc,
508 const ERL_NIF_TERM argv[]);
509
510 static ERL_NIF_TERM ecl_get_image_info(ErlNifEnv* env, int argc,
511 const ERL_NIF_TERM argv[]);
512
513 static ERL_NIF_TERM ecl_create_sampler(ErlNifEnv* env, int argc,
514 const ERL_NIF_TERM argv[]);
515
516 static ERL_NIF_TERM ecl_get_sampler_info(ErlNifEnv* env, int argc,
517 const ERL_NIF_TERM argv[]);
518
519 static ERL_NIF_TERM ecl_create_program_with_source(ErlNifEnv* env, int argc,
520 const ERL_NIF_TERM argv[]);
521 static ERL_NIF_TERM ecl_create_program_with_binary(ErlNifEnv* env, int argc,
522 const ERL_NIF_TERM argv[]);
523 #if CL_VERSION_1_2 == 1
524 static ERL_NIF_TERM ecl_create_program_with_builtin_kernels(
525 ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]);
526 #endif
527 static ERL_NIF_TERM ecl_async_build_program(ErlNifEnv* env, int argc,
528 const ERL_NIF_TERM argv[]);
529
530 #if CL_VERSION_1_2 == 1
531 static ERL_NIF_TERM ecl_unload_platform_compiler(ErlNifEnv* env, int argc,
532 const ERL_NIF_TERM argv[]);
533 static ERL_NIF_TERM ecl_async_compile_program(ErlNifEnv* env, int argc,
534 const ERL_NIF_TERM argv[]);
535 static ERL_NIF_TERM ecl_async_link_program(ErlNifEnv* env, int argc,
536 const ERL_NIF_TERM argv[]);
537 #endif
538
539 static ERL_NIF_TERM ecl_unload_compiler(ErlNifEnv* env, int argc,
540 const ERL_NIF_TERM argv[]);
541 static ERL_NIF_TERM ecl_get_program_info(ErlNifEnv* env, int argc,
542 const ERL_NIF_TERM argv[]);
543 static ERL_NIF_TERM ecl_get_program_build_info(ErlNifEnv* env, int argc,
544 const ERL_NIF_TERM argv[]);
545
546 static ERL_NIF_TERM ecl_create_kernel(ErlNifEnv* env, int argc,
547 const ERL_NIF_TERM argv[]);
548 static ERL_NIF_TERM ecl_create_kernels_in_program(ErlNifEnv* env, int argc,
549 const ERL_NIF_TERM argv[]);
550 static ERL_NIF_TERM ecl_set_kernel_arg(ErlNifEnv* env, int argc,
551 const ERL_NIF_TERM argv[]);
552 static ERL_NIF_TERM ecl_set_kernel_arg_size(ErlNifEnv* env, int argc,
553 const ERL_NIF_TERM argv[]);
554 static ERL_NIF_TERM ecl_get_kernel_info(ErlNifEnv* env, int argc,
555 const ERL_NIF_TERM argv[]);
556 static ERL_NIF_TERM ecl_get_kernel_workgroup_info(ErlNifEnv* env, int argc,
557 const ERL_NIF_TERM argv[]);
558 #if CL_VERSION_1_2 == 1
559 static ERL_NIF_TERM ecl_get_kernel_arg_info(ErlNifEnv* env, int argc,
560 const ERL_NIF_TERM argv[]);
561 #endif
562
563 static ERL_NIF_TERM ecl_enqueue_task(ErlNifEnv* env, int argc,
564 const ERL_NIF_TERM argv[]);
565 static ERL_NIF_TERM ecl_enqueue_nd_range_kernel(ErlNifEnv* env, int argc,
566 const ERL_NIF_TERM argv[]);
567 static ERL_NIF_TERM ecl_enqueue_marker(ErlNifEnv* env, int argc,
568 const ERL_NIF_TERM argv[]);
569
570 static ERL_NIF_TERM ecl_enqueue_barrier(ErlNifEnv* env, int argc,
571 const ERL_NIF_TERM argv[]);
572
573 #if CL_VERSION_1_2 == 1
574
575 static ERL_NIF_TERM ecl_enqueue_marker_with_wait_list(ErlNifEnv* env,
576 int argc,
577 const ERL_NIF_TERM argv[]);
578 static ERL_NIF_TERM ecl_enqueue_barrier_with_wait_list(ErlNifEnv* env,
579 int argc,
580 const ERL_NIF_TERM argv[]);
581 #endif
582
583 static ERL_NIF_TERM ecl_enqueue_wait_for_events(ErlNifEnv* env, int argc,
584 const ERL_NIF_TERM argv[]);
585
586 static ERL_NIF_TERM ecl_enqueue_read_buffer(ErlNifEnv* env, int argc,
587 const ERL_NIF_TERM argv[]);
588 #if CL_VERSION_1_1 == 1
589 static ERL_NIF_TERM ecl_enqueue_read_buffer_rect(ErlNifEnv* env, int argc,
590 const ERL_NIF_TERM argv[]);
591 #endif
592
593 static ERL_NIF_TERM ecl_enqueue_write_buffer(ErlNifEnv* env, int argc,
594 const ERL_NIF_TERM argv[]);
595
596 #if CL_VERSION_1_1 == 1
597 static ERL_NIF_TERM ecl_enqueue_write_buffer_rect(ErlNifEnv* env, int argc,
598 const ERL_NIF_TERM argv[]);
599 #endif
600
601 #if CL_VERSION_1_2 == 1
602 static ERL_NIF_TERM ecl_enqueue_fill_buffer(ErlNifEnv* env, int argc,
603 const ERL_NIF_TERM argv[]);
604 #endif
605
606 static ERL_NIF_TERM ecl_enqueue_read_image(ErlNifEnv* env, int argc,
607 const ERL_NIF_TERM argv[]);
608
609 static ERL_NIF_TERM ecl_enqueue_write_image(ErlNifEnv* env, int argc,
610 const ERL_NIF_TERM argv[]);
611
612 static ERL_NIF_TERM ecl_enqueue_copy_buffer(ErlNifEnv* env, int argc,
613 const ERL_NIF_TERM argv[]);
614
615 #if CL_VERSION_1_1 == 1
616 static ERL_NIF_TERM ecl_enqueue_copy_buffer_rect(ErlNifEnv* env, int argc,
617 const ERL_NIF_TERM argv[]);
618 #endif
619
620 static ERL_NIF_TERM ecl_enqueue_copy_image(ErlNifEnv* env, int argc,
621 const ERL_NIF_TERM argv[]);
622 #if CL_VERSION_1_2 == 1
623 static ERL_NIF_TERM ecl_enqueue_fill_image(ErlNifEnv* env, int argc,
624 const ERL_NIF_TERM argv[]);
625 #endif
626
627
628 static ERL_NIF_TERM ecl_enqueue_copy_image_to_buffer(ErlNifEnv* env, int argc,
629 const ERL_NIF_TERM argv[]);
630
631 static ERL_NIF_TERM ecl_enqueue_copy_buffer_to_image(ErlNifEnv* env, int argc,
632 const ERL_NIF_TERM argv[]);
633
634 static ERL_NIF_TERM ecl_enqueue_map_buffer(ErlNifEnv* env, int argc,
635 const ERL_NIF_TERM argv[]);
636
637 static ERL_NIF_TERM ecl_enqueue_map_image(ErlNifEnv* env, int argc,
638 const ERL_NIF_TERM argv[]);
639
640 static ERL_NIF_TERM ecl_enqueue_unmap_mem_object(ErlNifEnv* env, int argc,
641 const ERL_NIF_TERM argv[]);
642
643 #if CL_VERSION_1_2 == 1
644 static ERL_NIF_TERM ecl_enqueue_migrate_mem_objects(ErlNifEnv* env, int argc,
645 const ERL_NIF_TERM argv[]);
646 #endif
647
648 static ERL_NIF_TERM ecl_async_flush(ErlNifEnv* env, int argc,
649 const ERL_NIF_TERM argv[]);
650
651 static ERL_NIF_TERM ecl_async_finish(ErlNifEnv* env, int argc,
652 const ERL_NIF_TERM argv[]);
653
654 // speical version of clWaitForEvents
655 static ERL_NIF_TERM ecl_async_wait_for_event(ErlNifEnv* env, int argc,
656 const ERL_NIF_TERM argv[]);
657
658 static ERL_NIF_TERM ecl_get_event_info(ErlNifEnv* env, int argc,
659 const ERL_NIF_TERM argv[]);
660
661
662 ErlNifFunc ecl_funcs[] =
663 {
664 { "noop", 0, ecl_noop },
665 { "versions", 0, ecl_versions },
666
667 // Platform
668 { "get_platform_ids", 0, ecl_get_platform_ids },
669 { "get_platform_info", 2, ecl_get_platform_info },
670
671 // Devices
672 { "get_device_ids", 2, ecl_get_device_ids },
673 #if CL_VERSION_1_2 == 1
674 { "create_sub_devices", 2, ecl_create_sub_devices },
675 #endif
676 { "get_device_info", 2, ecl_get_device_info },
677
678 // Context
679 { "create_context", 1, ecl_create_context },
680 { "get_context_info", 2, ecl_get_context_info },
681
682 // Command queue
683 { "create_queue", 3, ecl_create_queue },
684 { "get_queue_info", 2, ecl_get_queue_info },
685
686 // Memory object
687 { "create_buffer", 4, ecl_create_buffer },
688 #if CL_VERSION_1_1 == 1
689 { "create_sub_buffer", 4, ecl_create_sub_buffer },
690 #endif
691
692 { "get_mem_object_info", 2, ecl_get_mem_object_info },
693 { "get_image_info", 2, ecl_get_image_info },
694
695 { "create_image2d", 7, ecl_create_image2d },
696 { "create_image3d", 9, ecl_create_image3d },
697 #if CL_VERSION_1_2 == 1
698 { "create_image", 5, ecl_create_image },
699 #endif
700 { "get_supported_image_formats",3, ecl_get_supported_image_formats },
701
702 // Sampler
703 { "create_sampler", 4, ecl_create_sampler },
704 { "get_sampler_info", 2, ecl_get_sampler_info },
705
706 // Program
707 { "create_program_with_source", 2, ecl_create_program_with_source },
708 { "create_program_with_binary", 3, ecl_create_program_with_binary },
709 #if CL_VERSION_1_2 == 1
710 { "create_program_with_builtin_kernels", 3,
711 ecl_create_program_with_builtin_kernels },
712 #endif
713 { "async_build_program", 3, ecl_async_build_program },
714 #if CL_VERSION_1_2 == 1
715 { "unload_platform_compiler", 1, ecl_unload_platform_compiler },
716 #endif
717 #if CL_VERSION_1_2 == 1
718 { "async_compile_program", 5, ecl_async_compile_program },
719 #endif
720 #if CL_VERSION_1_2 == 1
721 { "async_link_program", 4, ecl_async_link_program },
722 #endif
723 { "unload_compiler", 0, ecl_unload_compiler },
724 { "get_program_info", 2, ecl_get_program_info },
725 { "get_program_build_info", 3, ecl_get_program_build_info },
726
727 // Kernel
728 { "create_kernel", 2, ecl_create_kernel },
729 { "create_kernels_in_program", 1, ecl_create_kernels_in_program },
730 { "set_kernel_arg", 3, ecl_set_kernel_arg },
731 { "set_kernel_arg_size", 3, ecl_set_kernel_arg_size },
732 { "get_kernel_info", 2, ecl_get_kernel_info },
733 { "get_kernel_workgroup_info", 3, ecl_get_kernel_workgroup_info },
734 #if CL_VERSION_1_2 == 1
735 { "get_kernel_arg_info", 3, ecl_get_kernel_arg_info },
736 #endif
737 // Events
738 { "enqueue_task", 4, ecl_enqueue_task },
739 { "enqueue_nd_range_kernel", 6, ecl_enqueue_nd_range_kernel },
740 { "enqueue_marker", 1, ecl_enqueue_marker },
741 { "enqueue_barrier", 1, ecl_enqueue_barrier },
742 #if CL_VERSION_1_2 == 1
743 { "enqueue_barrier_with_wait_list", 2, ecl_enqueue_barrier_with_wait_list },
744 { "enqueue_marker_with_wait_list", 2, ecl_enqueue_marker_with_wait_list },
745 #endif
746 { "enqueue_wait_for_events", 2, ecl_enqueue_wait_for_events },
747 { "enqueue_read_buffer", 5, ecl_enqueue_read_buffer },
748 #if CL_VERSION_1_1 == 1
749 { "enqueue_read_buffer_rect", 10, ecl_enqueue_read_buffer_rect },
750 #endif
751 { "enqueue_write_buffer", 7, ecl_enqueue_write_buffer },
752 #if CL_VERSION_1_1 == 1
753 { "enqueue_write_buffer_rect", 11, ecl_enqueue_write_buffer_rect },
754 #endif
755 #if CL_VERSION_1_2 == 1
756 { "enqueue_fill_buffer", 6, ecl_enqueue_fill_buffer },
757 #endif
758 { "enqueue_read_image", 7, ecl_enqueue_read_image },
759 { "enqueue_write_image", 9, ecl_enqueue_write_image },
760 { "enqueue_copy_buffer", 7, ecl_enqueue_copy_buffer },
761 #if CL_VERSION_1_1 == 1
762 { "enqueue_copy_buffer_rect", 11, ecl_enqueue_copy_buffer_rect },
763 #endif
764 { "enqueue_copy_image", 6, ecl_enqueue_copy_image },
765 #if CL_VERSION_1_2 == 1
766 { "enqueue_fill_image", 6, ecl_enqueue_fill_image },
767 #endif
768 { "enqueue_copy_image_to_buffer", 7, ecl_enqueue_copy_image_to_buffer },
769 { "enqueue_copy_buffer_to_image", 7, ecl_enqueue_copy_buffer_to_image },
770 { "enqueue_map_buffer", 6, ecl_enqueue_map_buffer },
771 { "enqueue_map_image", 6, ecl_enqueue_map_image },
772 { "enqueue_unmap_mem_object", 3, ecl_enqueue_unmap_mem_object },
773 #if CL_VERSION_1_2 == 1
774 { "enqueue_migrate_mem_objects", 4, ecl_enqueue_migrate_mem_objects },
775 #endif
776 { "async_flush", 1, ecl_async_flush },
777 { "async_finish", 1, ecl_async_finish },
778 { "async_wait_for_event", 1, ecl_async_wait_for_event },
779 { "get_event_info", 2, ecl_get_event_info }
780 };
781
782 static ecl_resource_t platform_r;
783 static ecl_resource_t device_r;
784 static ecl_resource_t context_r;
785 static ecl_resource_t command_queue_r;
786 static ecl_resource_t mem_r;
787 static ecl_resource_t sampler_r;
788 static ecl_resource_t program_r;
789 static ecl_resource_t kernel_r;
790 static ecl_resource_t event_r;
791
792 // General atoms
793 DECL_ATOM(ok);
794 DECL_ATOM(error);
795 DECL_ATOM(unknown);
796 DECL_ATOM(undefined);
797 DECL_ATOM(true);
798 DECL_ATOM(false);
799
800 // async messages
801 DECL_ATOM(cl_async);
802 DECL_ATOM(cl_event);
803
804 // Type names
805 DECL_ATOM(platform_t);
806 DECL_ATOM(device_t);
807 DECL_ATOM(context_t);
808 DECL_ATOM(command_queue_t);
809 DECL_ATOM(mem_t);
810 DECL_ATOM(sampler_t);
811 DECL_ATOM(program_t);
812 DECL_ATOM(kernel_t);
813 DECL_ATOM(event_t);
814
815 // 'cl' type names
816 DECL_ATOM(char);
817 DECL_ATOM(char2);
818 DECL_ATOM(char4);
819 DECL_ATOM(char8);
820 DECL_ATOM(char16);
821
822 DECL_ATOM(uchar);
823 DECL_ATOM(uchar2);
824 DECL_ATOM(uchar4);
825 DECL_ATOM(uchar8);
826 DECL_ATOM(uchar16);
827
828 DECL_ATOM(short);
829 DECL_ATOM(short2);
830 DECL_ATOM(short4);
831 DECL_ATOM(short8);
832 DECL_ATOM(short16);
833
834 DECL_ATOM(ushort);
835 DECL_ATOM(ushort2);
836 DECL_ATOM(ushort4);
837 DECL_ATOM(ushort8);
838 DECL_ATOM(ushort16);
839
840 DECL_ATOM(int);
841 DECL_ATOM(int2);
842 DECL_ATOM(int4);
843 DECL_ATOM(int8);
844 DECL_ATOM(int16);
845
846 DECL_ATOM(uint);
847 DECL_ATOM(uint2);
848 DECL_ATOM(uint4);
849 DECL_ATOM(uint8);
850 DECL_ATOM(uint16);
851
852 DECL_ATOM(long);
853 DECL_ATOM(long2);
854 DECL_ATOM(long4);
855 DECL_ATOM(long8);
856 DECL_ATOM(long16);
857
858 DECL_ATOM(ulong);
859 DECL_ATOM(ulong2);
860 DECL_ATOM(ulong4);
861 DECL_ATOM(ulong8);
862 DECL_ATOM(ulong16);
863
864 DECL_ATOM(half);
865
866 DECL_ATOM(float);
867 DECL_ATOM(float2);
868 DECL_ATOM(float4);
869 DECL_ATOM(float8);
870 DECL_ATOM(float16);
871
872 DECL_ATOM(double);
873 DECL_ATOM(double2);
874 DECL_ATOM(double4);
875 DECL_ATOM(double8);
876 DECL_ATOM(double16);
877
878 // records for image creation
879 DECL_ATOM(cl_image_desc);
880 DECL_ATOM(cl_image_format);
881
882 // Platform info
883 // DECL_ATOM(profile);
884 // DECL_ATOM(version);
885 // DECL_ATOM(name);
886 // DECL_ATOM(vendor);
887 // DECL_ATOM(extensions);
888
889 // Context info
890 DECL_ATOM(reference_count);
891 DECL_ATOM(devices);
892 DECL_ATOM(properties);
893
894 // Queue info
895 DECL_ATOM(context);
896 DECL_ATOM(num_devices);
897 DECL_ATOM(device);
898 // DECL_ATOM(reference_count);
899 // DECL_ATOM(properties);
900
901 // Mem info
902 DECL_ATOM(object_type);
903 DECL_ATOM(flags);
904 DECL_ATOM(size);
905 DECL_ATOM(host_ptr);
906 DECL_ATOM(map_count);
907 // DECL_ATOM(reference_count);
908 // DECL_ATOM(context);
909
910 // Image info
911 DECL_ATOM(format);
912 DECL_ATOM(element_size);
913 DECL_ATOM(row_pitch);
914 DECL_ATOM(slice_pitch);
915 DECL_ATOM(width);
916 DECL_ATOM(height);
917 DECL_ATOM(depth);
918
919 // Sampler info
920 // DECL_ATOM(reference_count);
921 // DECL_ATOM(context);
922 DECL_ATOM(normalized_coords);
923 DECL_ATOM(addressing_mode);
924 DECL_ATOM(filter_mode);
925
926 // Program info
927 // DECL_ATOM(reference_count);
928 // DECL_ATOM(context);
929 DECL_ATOM(num_decices);
930 // DECL_ATOM(devices);
931 DECL_ATOM(source);
932 DECL_ATOM(binary_sizes);
933 DECL_ATOM(binaries);
934
935 // Build Info
936 DECL_ATOM(status);
937 DECL_ATOM(options);
938 DECL_ATOM(log);
939 DECL_ATOM(binary_type);
940
941 // Kernel Info
942 DECL_ATOM(function_name);
943 DECL_ATOM(num_args);
944 // DECL_ATOM(reference_count);
945 // DECL_ATOM(context);
946 DECL_ATOM(program);
947
948 // Event Info
949 DECL_ATOM(command_queue);
950 DECL_ATOM(command_type);
951 // DECL_ATOM(reference_count);
952 DECL_ATOM(execution_status);
953
954 // Workgroup info
955 DECL_ATOM(work_group_size);
956 DECL_ATOM(compile_work_group_size);
957 // DECL_ATOM(local_mem_size);
958 DECL_ATOM(preferred_work_group_size_multiple);
959 DECL_ATOM(private_mem_size);
960 DECL_ATOM(global_work_size);
961
962 // Error codes
963 DECL_ATOM(device_not_found);
964 DECL_ATOM(device_not_available);
965 DECL_ATOM(compiler_not_available);
966 DECL_ATOM(mem_object_allocation_failure);
967 DECL_ATOM(out_of_resources);
968 DECL_ATOM(out_of_host_memory);
969 DECL_ATOM(profiling_info_not_available);
970 DECL_ATOM(mem_copy_overlap);
971 DECL_ATOM(image_format_mismatch);
972 DECL_ATOM(image_format_not_supported);
973 DECL_ATOM(build_program_failure);
974 DECL_ATOM(map_failure);
975 DECL_ATOM(invalid_value);
976 DECL_ATOM(invalid_device_type);
977 DECL_ATOM(invalid_platform);
978 DECL_ATOM(invalid_device);
979 DECL_ATOM(invalid_context);
980 DECL_ATOM(invalid_queue_properties);
981 DECL_ATOM(invalid_command_queue);
982 DECL_ATOM(invalid_host_ptr);
983 DECL_ATOM(invalid_mem_object);
984 DECL_ATOM(invalid_image_format_descriptor);
985 DECL_ATOM(invalid_image_size);
986 DECL_ATOM(invalid_sampler);
987 DECL_ATOM(invalid_binary);
988 DECL_ATOM(invalid_build_options);
989 DECL_ATOM(invalid_program);
990 DECL_ATOM(invalid_program_executable);
991 DECL_ATOM(invalid_kernel_name);
992 DECL_ATOM(invalid_kernel_definition);
993 DECL_ATOM(invalid_kernel);
994 DECL_ATOM(invalid_arg_index);
995 DECL_ATOM(invalid_arg_value);
996 DECL_ATOM(invalid_arg_size);
997 DECL_ATOM(invalid_kernel_args);
998 DECL_ATOM(invalid_work_dimension);
999 DECL_ATOM(invalid_work_group_size);
1000 DECL_ATOM(invalid_work_item_size);
1001 DECL_ATOM(invalid_global_offset);
1002 DECL_ATOM(invalid_event_wait_list);
1003 DECL_ATOM(invalid_event);
1004 DECL_ATOM(invalid_operation);
1005 DECL_ATOM(invalid_gl_object);
1006 DECL_ATOM(invalid_buffer_size);
1007 DECL_ATOM(invalid_mip_level);
1008 DECL_ATOM(invalid_global_work_size);
1009
1010 // cl_device_type
1011 DECL_ATOM(all);
1012 DECL_ATOM(default);
1013 DECL_ATOM(cpu);
1014 DECL_ATOM(gpu);
1015 DECL_ATOM(accelerator);
1016 DECL_ATOM(custom);
1017
1018 // fp_config
1019 DECL_ATOM(denorm);
1020 DECL_ATOM(inf_nan);
1021 DECL_ATOM(round_to_nearest);
1022 DECL_ATOM(round_to_zero);
1023 DECL_ATOM(round_to_inf);
1024 DECL_ATOM(fma);
1025 DECL_ATOM(soft_float);
1026 DECL_ATOM(correctly_rounded_divide_sqrt);
1027
1028 // mem_cache_type
1029 DECL_ATOM(none);
1030 DECL_ATOM(read_only);
1031 DECL_ATOM(read_write);
1032
1033 // local_mem_type
1034 DECL_ATOM(local);
1035 DECL_ATOM(global);
1036
1037 // exec capability
1038 DECL_ATOM(kernel);
1039 DECL_ATOM(native_kernel);
1040
1041 // command_queue_properties
1042 DECL_ATOM(out_of_order_exec_mode_enable);
1043 DECL_ATOM(profiling_enable);
1044
1045 // mem_flags
1046 // DECL_ATOM(read_write);
1047 DECL_ATOM(write_only);
1048 // DECL_ATOM(read_only);
1049 DECL_ATOM(use_host_ptr);
1050 DECL_ATOM(alloc_host_ptr);
1051 DECL_ATOM(copy_host_ptr);
1052
1053 // migration flags
1054 DECL_ATOM(host);
1055 DECL_ATOM(content_undefined);
1056
1057 // mem_object_type
1058 DECL_ATOM(buffer);
1059 DECL_ATOM(image2d);
1060 DECL_ATOM(image3d);
1061 // version1.2
1062 DECL_ATOM(image2d_array);
1063 DECL_ATOM(image1d);
1064 DECL_ATOM(image1d_array);
1065 DECL_ATOM(image1d_buffer);
1066
1067 // addressing_mode
1068 // DECL_ATOM(none);
1069 DECL_ATOM(clamp_to_edge);
1070 DECL_ATOM(clamp);
1071 DECL_ATOM(repeat);
1072
1073 // filter_mode
1074 DECL_ATOM(nearest);
1075 DECL_ATOM(linear);
1076
1077 // map_flags
1078 DECL_ATOM(read);
1079 DECL_ATOM(write);
1080
1081 // build_status
1082 DECL_ATOM(success);
1083 // DECL_ATOM(none);
1084 // DECL_ATOM(error);
1085 DECL_ATOM(in_progress);
1086
1087 // program_binary_type
1088 // DECL_ATOM(none);
1089 DECL_ATOM(compiled_object);
1090 DECL_ATOM(library);
1091 DECL_ATOM(executable);
1092
1093 // command_type
1094 DECL_ATOM(ndrange_kernel);
1095 DECL_ATOM(task);
1096 // DECL_ATOM(native_kernel);
1097 DECL_ATOM(read_buffer);
1098 DECL_ATOM(write_buffer);
1099 DECL_ATOM(copy_buffer);
1100 DECL_ATOM(read_image);
1101 DECL_ATOM(write_image);
1102 DECL_ATOM(copy_image);
1103 DECL_ATOM(copy_image_to_buffer);
1104 DECL_ATOM(copy_buffer_to_image);
1105 DECL_ATOM(map_buffer);
1106 DECL_ATOM(map_image);
1107 DECL_ATOM(unmap_mem_object);
1108 DECL_ATOM(marker);
1109 DECL_ATOM(aquire_gl_objects);
1110 DECL_ATOM(release_gl_objects);
1111 DECL_ATOM(migreate_mem_objects);
1112 DECL_ATOM(fill_buffer);
1113 DECL_ATOM(fill_image);
1114
1115 // execution_status
1116 DECL_ATOM(complete);
1117 DECL_ATOM(running);
1118 DECL_ATOM(submitted);
1119 DECL_ATOM(queued);
1120
1121 // arguments
1122 DECL_ATOM(region);
1123
1124 // DECL_ATOM(global);
1125 // DECL_ATOM(local);
1126 DECL_ATOM(constant);
1127 DECL_ATOM(private);
1128
1129 // DECL_ATOM(read_only);
1130 // DECL_ATOM(write_only);
1131 // DECL_ATOM(read_write);
1132 // DECL_ATOM(none);
1133
1134 // DECL_ATOM(none);
1135 DECL_ATOM(const);
1136 DECL_ATOM(restrict);
1137 DECL_ATOM(volatile);
1138
1139 DECL_ATOM(address_qualifier);
1140 DECL_ATOM(access_qualifier);
1141 DECL_ATOM(type_name);
1142 DECL_ATOM(type_qualifier);
1143 // DECL_ATOM(name);
1144
1145 #define SIZE_1 0x010000
1146 #define SIZE_2 0x020000
1147 #define SIZE_4 0x040000
1148 #define SIZE_8 0x080000
1149 #define SIZE_16 0x100000
1150
1151 ecl_kv_t kv_cl_type[] = {
1152 { &ATOM(char), SIZE_1 + OCL_CHAR },
1153 { &ATOM(char2), SIZE_2 + OCL_CHAR },
1154 { &ATOM(char4), SIZE_4 + OCL_CHAR },
1155 { &ATOM(char8), SIZE_8 + OCL_CHAR },
1156 { &ATOM(char16), SIZE_16 + OCL_CHAR },
1157 { &ATOM(uchar), SIZE_1 + OCL_UCHAR },
1158 { &ATOM(uchar2), SIZE_2 + OCL_UCHAR },
1159 { &ATOM(uchar4), SIZE_4 + OCL_UCHAR },
1160 { &ATOM(uchar8), SIZE_8 + OCL_UCHAR },
1161 { &ATOM(uchar16), SIZE_16 + OCL_UCHAR },
1162 { &ATOM(short), SIZE_1 + OCL_SHORT },
1163 { &ATOM(short2), SIZE_2 + OCL_SHORT },
1164 { &ATOM(short4), SIZE_4 + OCL_SHORT },
1165 { &ATOM(short8), SIZE_8 + OCL_SHORT },
1166 { &ATOM(short16), SIZE_16 + OCL_SHORT },
1167 { &ATOM(ushort), SIZE_1 + OCL_USHORT },
1168 { &ATOM(ushort2), SIZE_2 + OCL_USHORT },
1169 { &ATOM(ushort4), SIZE_4 + OCL_USHORT },
1170 { &ATOM(ushort8), SIZE_8 + OCL_USHORT },
1171 { &ATOM(ushort16), SIZE_16 + OCL_USHORT },
1172 { &ATOM(int), SIZE_1 + OCL_INT },
1173 { &ATOM(int2), SIZE_2 + OCL_INT },
1174 { &ATOM(int4), SIZE_4 + OCL_INT },
1175 { &ATOM(int8), SIZE_8 + OCL_INT },
1176 { &ATOM(int16), SIZE_16 + OCL_INT },
1177 { &ATOM(uint), SIZE_1 + OCL_UINT },
1178 { &ATOM(uint2), SIZE_2 + OCL_UINT },
1179 { &ATOM(uint4), SIZE_4 + OCL_UINT },
1180 { &ATOM(uint8), SIZE_8 + OCL_UINT },
1181 { &ATOM(uint16), SIZE_16 + OCL_UINT },
1182 { &ATOM(long), SIZE_1 + OCL_LONG },
1183 { &ATOM(long2), SIZE_2 + OCL_LONG },
1184 { &ATOM(long4), SIZE_4 + OCL_LONG },
1185 { &ATOM(long8), SIZE_8 + OCL_LONG },
1186 { &ATOM(long16), SIZE_16 + OCL_LONG },
1187 { &ATOM(ulong), SIZE_1 + OCL_ULONG },
1188 { &ATOM(ulong2), SIZE_2 + OCL_ULONG },
1189 { &ATOM(ulong4), SIZE_4 + OCL_ULONG },
1190 { &ATOM(ulong8), SIZE_8 + OCL_ULONG },
1191 { &ATOM(ulong16), SIZE_16 + OCL_ULONG },
1192 { &ATOM(half), SIZE_1 + OCL_HALF },
1193 { &ATOM(float), SIZE_1 + OCL_FLOAT },
1194 { &ATOM(float2), SIZE_2 + OCL_FLOAT },
1195 { &ATOM(float4), SIZE_4 + OCL_FLOAT },
1196 { &ATOM(float8), SIZE_8 + OCL_FLOAT },
1197 { &ATOM(float16), SIZE_16 + OCL_FLOAT },
1198 { &ATOM(double), SIZE_1 + OCL_DOUBLE },
1199 { &ATOM(double2), SIZE_2 + OCL_DOUBLE },
1200 { &ATOM(double4), SIZE_4 + OCL_DOUBLE },
1201 { &ATOM(double8), SIZE_8 + OCL_DOUBLE },
1202 { &ATOM(double16), SIZE_16 + OCL_DOUBLE },
1203 { 0, 0 }
1204 };
1205
1206 ecl_kv_t kv_device_type[] = { // bitfield
1207 { &ATOM(cpu), CL_DEVICE_TYPE_CPU },
1208 { &ATOM(gpu), CL_DEVICE_TYPE_GPU },
1209 { &ATOM(accelerator), CL_DEVICE_TYPE_ACCELERATOR },
1210 { &ATOM(default), CL_DEVICE_TYPE_DEFAULT },
1211 { &ATOM(all), CL_DEVICE_TYPE_ALL },
1212 #if CL_VERSION_1_2 == 1
1213 { &ATOM(custom), CL_DEVICE_TYPE_CUSTOM },
1214 #endif
1215 { 0, 0}
1216 };
1217
1218 ecl_kv_t kv_fp_config[] = { // bitfield
1219 { &ATOM(denorm), CL_FP_DENORM },
1220 { &ATOM(inf_nan), CL_FP_INF_NAN },
1221 { &ATOM(round_to_nearest), CL_FP_ROUND_TO_NEAREST },
1222 { &ATOM(round_to_zero), CL_FP_ROUND_TO_ZERO },
1223 { &ATOM(round_to_inf), CL_FP_ROUND_TO_INF },
1224 { &ATOM(fma), CL_FP_FMA },
1225 #if CL_VERSION_1_2 == 1
1226 { &ATOM(soft_float), CL_FP_SOFT_FLOAT },
1227 { &ATOM(correctly_rounded_divide_sqrt),CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT},
1228 #endif
1229 { 0, 0 }
1230 };
1231
1232 ecl_kv_t kv_mem_cache_type[] = { // enum
1233 { &ATOM(none), CL_NONE },
1234 { &ATOM(read_only), CL_READ_ONLY_CACHE },
1235 { &ATOM(read_write), CL_READ_WRITE_CACHE },
1236 { 0, 0 }
1237 };
1238
1239 ecl_kv_t kv_local_mem_type[] = { // enum
1240 { &ATOM(local), CL_LOCAL },
1241 { &ATOM(global), CL_GLOBAL },
1242 { 0, 0 }
1243 };
1244
1245 ecl_kv_t kv_exec_capabilities[] = { // bit field
1246 { &ATOM(kernel), CL_EXEC_KERNEL },
1247 { &ATOM(native_kernel), CL_EXEC_NATIVE_KERNEL },
1248 { 0, 0 }
1249 };
1250
1251
1252 ecl_kv_t kv_command_queue_properties[] = { // bit field
1253 { &ATOM(out_of_order_exec_mode_enable), CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE },
1254 { &ATOM(profiling_enable), CL_QUEUE_PROFILING_ENABLE },
1255 { 0, 0}
1256 };
1257
1258 ecl_kv_t kv_mem_flags[] = { // bit field
1259 { &ATOM(read_write), CL_MEM_READ_WRITE },
1260 { &ATOM(write_only), CL_MEM_WRITE_ONLY },
1261 { &ATOM(read_only), CL_MEM_READ_ONLY },
1262 { &ATOM(use_host_ptr), CL_MEM_USE_HOST_PTR },
1263 { &ATOM(alloc_host_ptr), CL_MEM_ALLOC_HOST_PTR },
1264 { &ATOM(copy_host_ptr), CL_MEM_COPY_HOST_PTR },
1265 { 0, 0 }
1266 };
1267
1268 #if CL_VERSION_1_2 == 1
1269 ecl_kv_t kv_migration_flags[] = { // bit field
1270 { &ATOM(host), CL_MIGRATE_MEM_OBJECT_HOST },
1271 { &ATOM(content_undefined), CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED},
1272 { 0, 0 }
1273 };
1274 #endif
1275
1276 ecl_kv_t kv_mem_object_type[] = { // enum
1277 { &ATOM(buffer), CL_MEM_OBJECT_BUFFER },
1278 { &ATOM(image2d), CL_MEM_OBJECT_IMAGE2D },
1279 { &ATOM(image3d), CL_MEM_OBJECT_IMAGE3D },
1280 #if CL_VERSION_1_2 == 1
1281 { &ATOM(image2d_array), CL_MEM_OBJECT_IMAGE2D_ARRAY },
1282 { &ATOM(image1d), CL_MEM_OBJECT_IMAGE1D },
1283 { &ATOM(image1d_array), CL_MEM_OBJECT_IMAGE1D_ARRAY },
1284 { &ATOM(image1d_buffer), CL_MEM_OBJECT_IMAGE1D_BUFFER },
1285 #endif
1286 { 0, 0 }
1287 };
1288
1289 ecl_kv_t kv_addressing_mode[] = { // enum
1290 { &ATOM(none), CL_ADDRESS_NONE },
1291 { &ATOM(clamp_to_edge), CL_ADDRESS_CLAMP_TO_EDGE },
1292 { &ATOM(clamp), CL_ADDRESS_CLAMP },
1293 { &ATOM(repeat), CL_ADDRESS_REPEAT },
1294 { 0, 0 }
1295 };
1296
1297 ecl_kv_t kv_filter_mode[] = { // enum
1298 { &ATOM(nearest), CL_FILTER_NEAREST },
1299 { &ATOM(linear), CL_FILTER_LINEAR },
1300 { 0, 0 }
1301 };
1302
1303 ecl_kv_t kv_map_flags[] = { // bitfield
1304 { &ATOM(read), CL_MAP_READ },
1305 { &ATOM(write), CL_MAP_WRITE },
1306 { 0, 0 }
1307 };
1308
1309 ecl_kv_t kv_build_status[] = { // enum
1310 { &ATOM(success), CL_BUILD_SUCCESS },
1311 { &ATOM(none), CL_BUILD_NONE },
1312 { &ATOM(error), CL_BUILD_ERROR },
1313 { &ATOM(in_progress), CL_BUILD_IN_PROGRESS },
1314 { 0, 0 }
1315 };
1316
1317 #if CL_VERSION_1_2 == 1
1318 ecl_kv_t kv_program_binary_type[] = { // enum
1319 { &ATOM(none), CL_PROGRAM_BINARY_TYPE_NONE },
1320 { &ATOM(compiled_object), CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT },
1321 { &ATOM(library), CL_PROGRAM_BINARY_TYPE_LIBRARY },
1322 { &ATOM(executable), CL_PROGRAM_BINARY_TYPE_EXECUTABLE },
1323 { 0, 0 }
1324 };
1325 #endif
1326
1327 ecl_kv_t kv_command_type[] = { // enum
1328 { &ATOM(ndrange_kernel), CL_COMMAND_NDRANGE_KERNEL },
1329 { &ATOM(task), CL_COMMAND_TASK },
1330 { &ATOM(native_kernel), CL_COMMAND_NATIVE_KERNEL },
1331 { &ATOM(read_buffer), CL_COMMAND_READ_BUFFER },
1332 { &ATOM(write_buffer), CL_COMMAND_WRITE_BUFFER },
1333 { &ATOM(copy_buffer), CL_COMMAND_COPY_BUFFER },
1334 { &ATOM(read_image), CL_COMMAND_READ_IMAGE },
1335 { &ATOM(write_image), CL_COMMAND_WRITE_IMAGE },
1336 { &ATOM(copy_image), CL_COMMAND_COPY_IMAGE },
1337 { &ATOM(copy_image_to_buffer), CL_COMMAND_COPY_IMAGE_TO_BUFFER },
1338 { &ATOM(copy_buffer_to_image), CL_COMMAND_COPY_BUFFER_TO_IMAGE },
1339 { &ATOM(map_buffer), CL_COMMAND_MAP_BUFFER },
1340 { &ATOM(map_image), CL_COMMAND_MAP_IMAGE },
1341 { &ATOM(unmap_mem_object), CL_COMMAND_UNMAP_MEM_OBJECT },
1342 { &ATOM(marker), CL_COMMAND_MARKER },
1343 { &ATOM(aquire_gl_objects), CL_COMMAND_ACQUIRE_GL_OBJECTS },
1344 { &ATOM(release_gl_objects), CL_COMMAND_RELEASE_GL_OBJECTS },
1345 #if CL_VERSION_12 == 1
1346 { &ATOM(migreate_mem_objects), CL_COMMAND_MIGRATE_MEM_OBJECTS },
1347 { &ATOM(fill_buffer), CL_COMMAND_FILL_BUFFER },
1348 { &ATOM(fill_image), CL_COMMAND_FILL_IMAGE },
1349 #endif
1350 { 0, 0}
1351 };
1352
1353 ecl_kv_t kv_execution_status[] = { // enum
1354 { &ATOM(complete), CL_COMPLETE }, // same as CL_SUCCESS
1355 { &ATOM(running), CL_RUNNING },
1356 { &ATOM(submitted), CL_SUBMITTED },
1357 { &ATOM(queued), CL_QUEUED },
1358 // the error codes (negative values)
1359 { &ATOM(device_not_found), CL_DEVICE_NOT_FOUND },
1360 { &ATOM(device_not_available), CL_DEVICE_NOT_AVAILABLE },
1361 { &ATOM(compiler_not_available), CL_COMPILER_NOT_AVAILABLE },
1362 { &ATOM(mem_object_allocation_failure), CL_MEM_OBJECT_ALLOCATION_FAILURE },
1363 { &ATOM(out_of_resources), CL_OUT_OF_RESOURCES },
1364 { &ATOM(out_of_host_memory), CL_OUT_OF_HOST_MEMORY },
1365 { &ATOM(profiling_info_not_available), CL_PROFILING_INFO_NOT_AVAILABLE },
1366 { &ATOM(mem_copy_overlap), CL_MEM_COPY_OVERLAP },
1367 { &ATOM(image_format_mismatch), CL_IMAGE_FORMAT_MISMATCH },
1368 { &ATOM(image_format_not_supported), CL_IMAGE_FORMAT_NOT_SUPPORTED },
1369 { &ATOM(build_program_failure), CL_BUILD_PROGRAM_FAILURE },
1370 { &ATOM(map_failure), CL_MAP_FAILURE },
1371 { &ATOM(invalid_value), CL_INVALID_VALUE },
1372 { &ATOM(invalid_device_type), CL_INVALID_DEVICE_TYPE },
1373 { &ATOM(invalid_platform), CL_INVALID_PLATFORM },
1374 { &ATOM(invalid_device), CL_INVALID_DEVICE },
1375 { &ATOM(invalid_context), CL_INVALID_CONTEXT },
1376 { &ATOM(invalid_queue_properties), CL_INVALID_QUEUE_PROPERTIES },
1377 { &ATOM(invalid_command_queue), CL_INVALID_COMMAND_QUEUE },
1378 { &ATOM(invalid_host_ptr), CL_INVALID_HOST_PTR },
1379 { &ATOM(invalid_mem_object), CL_INVALID_MEM_OBJECT },
1380 { &ATOM(invalid_image_format_descriptor), CL_INVALID_IMAGE_FORMAT_DESCRIPTOR },
1381 { &ATOM(invalid_image_size), CL_INVALID_IMAGE_SIZE },
1382 { &ATOM(invalid_sampler), CL_INVALID_SAMPLER },
1383 { &ATOM(invalid_binary), CL_INVALID_BINARY },
1384 { &ATOM(invalid_build_options), CL_INVALID_BUILD_OPTIONS },
1385 { &ATOM(invalid_program), CL_INVALID_PROGRAM },
1386 { &ATOM(invalid_program_executable), CL_INVALID_PROGRAM_EXECUTABLE },
1387 { &ATOM(invalid_kernel_name), CL_INVALID_KERNEL_NAME },
1388 { &ATOM(invalid_kernel_definition), CL_INVALID_KERNEL_DEFINITION },
1389 { &ATOM(invalid_kernel), CL_INVALID_KERNEL },
1390 { &ATOM(invalid_arg_index), CL_INVALID_ARG_INDEX },
1391 { &ATOM(invalid_arg_value), CL_INVALID_ARG_VALUE },
1392 { &ATOM(invalid_arg_size), CL_INVALID_ARG_SIZE },
1393 { &ATOM(invalid_kernel_args), CL_INVALID_KERNEL_ARGS },
1394 { &ATOM(invalid_work_dimension), CL_INVALID_WORK_DIMENSION },
1395 { &ATOM(invalid_work_group_size), CL_INVALID_WORK_GROUP_SIZE },
1396 { &ATOM(invalid_work_item_size), CL_INVALID_WORK_ITEM_SIZE },
1397 { &ATOM(invalid_global_offset), CL_INVALID_GLOBAL_OFFSET },
1398 { &ATOM(invalid_event_wait_list), CL_INVALID_EVENT_WAIT_LIST },
1399 { &ATOM(invalid_event), CL_INVALID_EVENT },
1400 { &ATOM(invalid_operation), CL_INVALID_OPERATION },
1401 { &ATOM(invalid_gl_object), CL_INVALID_GL_OBJECT },
1402 { &ATOM(invalid_buffer_size), CL_INVALID_BUFFER_SIZE },
1403 { &ATOM(invalid_mip_level), CL_INVALID_MIP_LEVEL },
1404 { &ATOM(invalid_global_work_size), CL_INVALID_GLOBAL_WORK_SIZE },
1405 { 0, 0 }
1406 };
1407
1408 DECL_ATOM(snorm_int8);
1409 DECL_ATOM(snorm_int16);
1410 DECL_ATOM(unorm_int8);
1411 DECL_ATOM(unorm_int16);
1412 DECL_ATOM(unorm_int24);
1413 DECL_ATOM(unorm_short_565);
1414 DECL_ATOM(unorm_short_555);
1415 DECL_ATOM(unorm_int_101010);
1416 DECL_ATOM(signed_int8);
1417 DECL_ATOM(signed_int16);
1418 DECL_ATOM(signed_int32);
1419 DECL_ATOM(unsigned_int8);
1420 DECL_ATOM(unsigned_int16);
1421 DECL_ATOM(unsigned_int32);
1422 DECL_ATOM(half_float);
1423 // DECL_ATOM(float);
1424
1425 ecl_kv_t kv_channel_type[] = { // enum
1426 { &ATOM(snorm_int8), CL_SNORM_INT8 },
1427 { &ATOM(snorm_int16), CL_SNORM_INT16 },
1428 { &ATOM(unorm_int8), CL_UNORM_INT8 },
1429 { &ATOM(unorm_int16), CL_UNORM_INT16 },
1430 { &ATOM(unorm_short_565), CL_UNORM_SHORT_565 },
1431 { &ATOM(unorm_short_555), CL_UNORM_SHORT_555 },
1432 { &ATOM(unorm_int_101010), CL_UNORM_INT_101010 },
1433 { &ATOM(signed_int8), CL_SIGNED_INT8 },
1434 { &ATOM(signed_int16), CL_SIGNED_INT16 },
1435 { &ATOM(signed_int32), CL_SIGNED_INT32 },
1436 { &ATOM(unsigned_int8), CL_UNSIGNED_INT8 },
1437 { &ATOM(unsigned_int16), CL_UNSIGNED_INT16 },
1438 { &ATOM(unsigned_int32), CL_UNSIGNED_INT32 },
1439 { &ATOM(half_float), CL_HALF_FLOAT },
1440 { &ATOM(float), CL_FLOAT },
1441 #if (CL_VERSION_1_2 == 1) && defined(CL_UNORM_INT24)
1442 { &ATOM(unorm_int24), CL_UNORM_INT24 },
1443 #endif
1444 { 0, 0 }
1445 };
1446
1447 // channel order
1448 DECL_ATOM(r);
1449 DECL_ATOM(a);
1450 DECL_ATOM(rg);
1451 DECL_ATOM(ra);
1452 DECL_ATOM(rgb);
1453 DECL_ATOM(rgba);
1454 DECL_ATOM(bgra);
1455 DECL_ATOM(argb);
1456 DECL_ATOM(intensity);
1457 DECL_ATOM(luminance);
1458 DECL_ATOM(rx);
1459 DECL_ATOM(rgx);
1460 DECL_ATOM(rgbx);
1461 // DECL_ATOM(depth);
1462 DECL_ATOM(depth_stencil);
1463
1464 // 1.1 features! in apple 1.0?
1465 #ifndef CL_Rx
1466 #define CL_Rx 0x10BA
1467 #endif
1468
1469 #ifndef CL_RGx
1470 #define CL_RGx 0x10BB
1471 #endif
1472
1473 #ifndef CL_RGBx
1474 #define CL_RGBx 0x10BC
1475 #endif
1476
1477 ecl_kv_t kv_channel_order[] = {
1478 { &ATOM(r), CL_R },
1479 { &ATOM(a), CL_A },
1480 { &ATOM(rg), CL_RG },
1481 { &ATOM(ra), CL_RA },
1482 { &ATOM(rgb), CL_RGB },
1483 { &ATOM(rgba), CL_RGBA },
1484 { &ATOM(bgra), CL_BGRA },
1485 { &ATOM(argb), CL_ARGB },
1486 { &ATOM(intensity), CL_INTENSITY },
1487 { &ATOM(luminance), CL_LUMINANCE },
1488 { &ATOM(rx), CL_Rx },
1489 { &ATOM(rgx), CL_RGx },
1490 { &ATOM(rgbx), CL_RGBx },
1491 #if CL_VERSION_1_2 == 1
1492 #if defined(CL_DEPTH)
1493 { &ATOM(depth), CL_DEPTH },
1494 #endif
1495 #if defined(CL_DEPTH_STENCIL)
1496 { &ATOM(depth_stencil), CL_DEPTH_STENCIL },
1497 #endif
1498 #endif
1499 { 0, 0 }
1500 };
1501
1502 // partition_property
1503 DECL_ATOM(equally);
1504 DECL_ATOM(by_counts);
1505 DECL_ATOM(by_counts_list_end);
1506 DECL_ATOM(by_affinity_domain);
1507
1508 #if CL_VERSION_1_2 == 1
1509 ecl_kv_t kv_device_partition_property[] = {
1510 { &ATOM(equally), CL_DEVICE_PARTITION_EQUALLY },
1511 { &ATOM(by_counts), CL_DEVICE_PARTITION_BY_COUNTS },
1512 { &ATOM(by_affinity_domain), CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN },
1513 { &ATOM(undefined), 0 },
1514 { 0, 0}
1515 };
1516 #endif
1517
1518 DECL_ATOM(numa);
1519 DECL_ATOM(l4_cache);
1520 DECL_ATOM(l3_cache);
1521 DECL_ATOM(l2_cache);
1522 DECL_ATOM(l1_cache);
1523 DECL_ATOM(next_partitionable);
1524
1525 #if CL_VERSION_1_2 == 1
1526 ecl_kv_t kv_device_affinity_domain[] = {
1527 { &ATOM(numa), CL_DEVICE_AFFINITY_DOMAIN_NUMA },
1528 { &ATOM(l4_cache), CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE },
1529 { &ATOM(l3_cache), CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE },
1530 { &ATOM(l2_cache), CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE },
1531 { &ATOM(l1_cache), CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE },
1532 { &ATOM(next_partitionable), CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE },
1533 { &ATOM(undefined), 0 },
1534 { 0, 0}
1535 };
1536 #endif
1537
1538 // Device info
1539 DECL_ATOM(type);
1540 DECL_ATOM(vendor_id);
1541 DECL_ATOM(max_compute_units);
1542 DECL_ATOM(max_work_item_dimensions);
1543 DECL_ATOM(max_work_group_size);
1544 DECL_ATOM(max_work_item_sizes);
1545 DECL_ATOM(preferred_vector_width_char);
1546 DECL_ATOM(preferred_vector_width_short);
1547 DECL_ATOM(preferred_vector_width_int);
1548 DECL_ATOM(preferred_vector_width_long);
1549 DECL_ATOM(preferred_vector_width_float);
1550 DECL_ATOM(preferred_vector_width_double);
1551 DECL_ATOM(max_clock_frequency);
1552 DECL_ATOM(address_bits);
1553 DECL_ATOM(max_read_image_args);
1554 DECL_ATOM(max_write_image_args);
1555 DECL_ATOM(max_mem_alloc_size);
1556 DECL_ATOM(image2d_max_width);
1557 DECL_ATOM(image2d_max_height);
1558 DECL_ATOM(image3d_max_width);
1559 DECL_ATOM(image3d_max_height);
1560 DECL_ATOM(image3d_max_depth);
1561 DECL_ATOM(image_support);
1562 DECL_ATOM(max_parameter_size);
1563 DECL_ATOM(max_samplers);
1564 DECL_ATOM(mem_base_addr_align);
1565 DECL_ATOM(min_data_type_align_size);
1566 DECL_ATOM(single_fp_config);
1567 DECL_ATOM(global_mem_cache_type);
1568 DECL_ATOM(global_mem_cacheline_size);
1569 DECL_ATOM(global_mem_cache_size);
1570 DECL_ATOM(global_mem_size);
1571 DECL_ATOM(max_constant_buffer_size);
1572 DECL_ATOM(max_constant_args);
1573 DECL_ATOM(local_mem_type);
1574 DECL_ATOM(local_mem_size);
1575 DECL_ATOM(error_correction_support);
1576 DECL_ATOM(profiling_timer_resolution);
1577 DECL_ATOM(endian_little);
1578 DECL_ATOM(available);
1579 DECL_ATOM(compiler_available);
1580 DECL_ATOM(execution_capabilities);
1581 DECL_ATOM(queue_properties);
1582 DECL_ATOM(name);
1583 DECL_ATOM(vendor);
1584 DECL_ATOM(driver_version);
1585 DECL_ATOM(profile);
1586 DECL_ATOM(version);
1587 DECL_ATOM(extensions);
1588 DECL_ATOM(platform);
1589
1590 // cl_khr_fp64 extension || CL_VERSION_1_2 == 1
1591 DECL_ATOM(double_fp_config);
1592 // cl_khr_fp16 extension || CL_VERSION_1_2 == 1
1593 DECL_ATOM(half_fp_config);
1594 // 1.2
1595 DECL_ATOM(preferred_vector_width_half);
1596 DECL_ATOM(host_unified_memory);
1597 DECL_ATOM(native_vector_width_char);
1598 DECL_ATOM(native_vector_width_short);
1599 DECL_ATOM(native_vector_width_int);
1600 DECL_ATOM(native_vector_width_long);
1601 DECL_ATOM(native_vector_width_float);
1602 DECL_ATOM(native_vector_width_double);
1603 DECL_ATOM(native_vector_width_half);
1604 DECL_ATOM(opencl_c_version);
1605 DECL_ATOM(linker_available);
1606 DECL_ATOM(built_in_kernels);
1607 DECL_ATOM(image_max_buffer_size);
1608 DECL_ATOM(image_max_array_size);
1609 DECL_ATOM(parent_device);
1610 DECL_ATOM(partition_max_sub_devices);
1611 DECL_ATOM(partition_properties);
1612 DECL_ATOM(partition_affinity_domain);
1613 DECL_ATOM(partition_type);
1614 // DECL_ATOM(reference_count);
1615 DECL_ATOM(preferred_interop_user_sync);
1616 DECL_ATOM(printf_buffer_size);
1617 DECL_ATOM(image_pitch_alignment);
1618 DECL_ATOM(image_base_address_alignment);
1619 // cl_nv_device_attribute_query extension
1620 DECL_ATOM(compute_capability_major_nv);
1621 DECL_ATOM(compute_capability_minor_nv);
1622 DECL_ATOM(registers_per_block_nv);
1623 DECL_ATOM(warp_size_nv);
1624 DECL_ATOM(gpu_overlap_nv);
1625 DECL_ATOM(kernel_exec_timeout_nv);
1626 DECL_ATOM(device_integrated_memory_nv);
1627
1628 // Map device info index 0...N => cl_device_info x Data type
1629 ecl_info_t device_info[] =
1630 {
1631 { &ATOM(type), CL_DEVICE_TYPE, false, OCL_DEVICE_TYPE, kv_device_type, 0 },
1632 { &ATOM(vendor_id), CL_DEVICE_VENDOR_ID, false, OCL_UINT, 0, 0 },
1633 { &ATOM(max_compute_units), CL_DEVICE_MAX_COMPUTE_UNITS, false, OCL_UINT, 0, 0 },
1634 { &ATOM(max_work_item_dimensions), CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, false, OCL_UINT, 0, 0 },
1635 { &ATOM(max_work_group_size), CL_DEVICE_MAX_WORK_GROUP_SIZE, false, OCL_SIZE, 0, 0 },
1636 { &ATOM(max_work_item_sizes), CL_DEVICE_MAX_WORK_ITEM_SIZES, true, OCL_SIZE, 0, 0 },
1637 { &ATOM(preferred_vector_width_char), CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, false, OCL_UINT, 0, 0 },
1638 { &ATOM(preferred_vector_width_short), CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, false, OCL_UINT, 0, 0 },
1639 { &ATOM(preferred_vector_width_int), CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, false, OCL_UINT, 0, 0 },
1640 { &ATOM(preferred_vector_width_long), CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, false,OCL_UINT, 0, 0 },
1641 { &ATOM(preferred_vector_width_float), CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, false, OCL_UINT, 0, 0 },
1642 { &ATOM(preferred_vector_width_double), CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, false, OCL_UINT, 0, 0 },
1643 { &ATOM(max_clock_frequency), CL_DEVICE_MAX_CLOCK_FREQUENCY, false, OCL_UINT, 0, 0 },
1644 { &ATOM(address_bits), CL_DEVICE_ADDRESS_BITS, false, OCL_UINT, 0, 0 },
1645 { &ATOM(max_read_image_args), CL_DEVICE_MAX_READ_IMAGE_ARGS, false, OCL_UINT, 0, 0 },
1646 { &ATOM(max_write_image_args), CL_DEVICE_MAX_WRITE_IMAGE_ARGS, false, OCL_UINT, 0, 0 },
1647 { &ATOM(max_mem_alloc_size), CL_DEVICE_MAX_MEM_ALLOC_SIZE, false, OCL_ULONG, 0, 0 },
1648 { &ATOM(image2d_max_width), CL_DEVICE_IMAGE2D_MAX_WIDTH, false, OCL_SIZE, 0, 0 },
1649 { &ATOM(image2d_max_height), CL_DEVICE_IMAGE2D_MAX_HEIGHT, false, OCL_SIZE, 0, 0 },
1650 { &ATOM(image3d_max_width), CL_DEVICE_IMAGE3D_MAX_WIDTH, false, OCL_SIZE, 0, 0 },
1651 { &ATOM(image3d_max_height), CL_DEVICE_IMAGE3D_MAX_HEIGHT, false, OCL_SIZE, 0, 0 },
1652 { &ATOM(image3d_max_depth), CL_DEVICE_IMAGE3D_MAX_DEPTH, false, OCL_SIZE, 0, 0 },
1653 { &ATOM(image_support), CL_DEVICE_IMAGE_SUPPORT, false, OCL_BOOL, 0, 0 },
1654 { &ATOM(max_parameter_size), CL_DEVICE_MAX_PARAMETER_SIZE, false, OCL_SIZE, 0, 0 },
1655 { &ATOM(max_samplers), CL_DEVICE_MAX_SAMPLERS, false, OCL_UINT, 0, 0 },
1656 { &ATOM(mem_base_addr_align), CL_DEVICE_MEM_BASE_ADDR_ALIGN, false, OCL_UINT, 0, 0 },
1657 { &ATOM(min_data_type_align_size), CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, false, OCL_UINT, 0, 0 },
1658 { &ATOM(single_fp_config), CL_DEVICE_SINGLE_FP_CONFIG, false, OCL_DEVICE_FP_CONFIG, kv_fp_config, 0 },
1659 { &ATOM(global_mem_cache_type), CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, false, OCL_DEVICE_GLOBAL_MEM_CACHE_TYPE, kv_mem_cache_type, 0 },
1660 { &ATOM(global_mem_cacheline_size), CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, false, OCL_UINT, 0, 0 },
1661 { &ATOM(global_mem_cache_size), CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, false, OCL_ULONG, 0, 0 },
1662 { &ATOM(global_mem_size), CL_DEVICE_GLOBAL_MEM_SIZE, false, OCL_ULONG, 0, 0 },
1663 { &ATOM(max_constant_buffer_size), CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, false, OCL_ULONG, 0, 0 },
1664 { &ATOM(max_constant_args), CL_DEVICE_MAX_CONSTANT_ARGS, false, OCL_UINT, 0, 0 },
1665 { &ATOM(local_mem_type), CL_DEVICE_LOCAL_MEM_TYPE, false, OCL_DEVICE_LOCAL_MEM_TYPE, kv_local_mem_type, 0 },
1666 { &ATOM(local_mem_size), CL_DEVICE_LOCAL_MEM_SIZE, false, OCL_ULONG, 0, 0 },
1667 { &ATOM(error_correction_support), CL_DEVICE_ERROR_CORRECTION_SUPPORT, false, OCL_BOOL, 0, 0 },
1668 { &ATOM(profiling_timer_resolution), CL_DEVICE_PROFILING_TIMER_RESOLUTION, false, OCL_SIZE, 0, 0 },
1669 { &ATOM(endian_little), CL_DEVICE_ENDIAN_LITTLE, false, OCL_BOOL, 0, 0 },
1670 { &ATOM(available), CL_DEVICE_AVAILABLE, false, OCL_BOOL, 0, 0 },
1671 { &ATOM(compiler_available), CL_DEVICE_COMPILER_AVAILABLE, false, OCL_BOOL, 0, 0 },
1672 { &ATOM(execution_capabilities), CL_DEVICE_EXECUTION_CAPABILITIES, false, OCL_DEVICE_EXEC_CAPABILITIES, kv_exec_capabilities, 0 },
1673 { &ATOM(queue_properties), CL_DEVICE_QUEUE_PROPERTIES, false, OCL_QUEUE_PROPERTIES, kv_command_queue_properties, 0 },
1674 { &ATOM(name), CL_DEVICE_NAME, false, OCL_STRING, 0, 0 },
1675 { &ATOM(vendor), CL_DEVICE_VENDOR, false, OCL_STRING, 0, 0 },
1676 { &ATOM(driver_version), CL_DRIVER_VERSION, false, OCL_STRING, 0, 0 },
1677 { &ATOM(profile), CL_DEVICE_PROFILE, false, OCL_STRING, 0, 0 },
1678 { &ATOM(version), CL_DEVICE_VERSION, false, OCL_STRING, 0, 0 },
1679 { &ATOM(extensions), CL_DEVICE_EXTENSIONS, false, OCL_STRING, 0, 0 },
1680 { &ATOM(platform), CL_DEVICE_PLATFORM, false, OCL_PLATFORM, 0, 0 },
1681 #if CL_VERSION_1_1 == 1
1682 { &ATOM(preferred_vector_width_half), CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF,false, OCL_UINT, 0, 0 },
1683 { &ATOM(host_unified_memory), CL_DEVICE_HOST_UNIFIED_MEMORY,false,OCL_BOOL,0, 0},
1684 { &ATOM(native_vector_width_char), CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR,false,OCL_UINT, 0, 0},
1685 { &ATOM(native_vector_width_short), CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT,false,OCL_UINT, 0, 0},
1686 { &ATOM(native_vector_width_int), CL_DEVICE_NATIVE_VECTOR_WIDTH_INT,false,OCL_UINT, 0, 0},
1687 { &ATOM(native_vector_width_long), CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG,false,OCL_UINT, 0, 0},
1688 { &ATOM(native_vector_width_float), CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT,false,OCL_UINT, 0, 0},
1689 { &ATOM(native_vector_width_double), CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE,false,OCL_UINT, 0, 0},
1690 { &ATOM(native_vector_width_half), CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF,false,OCL_UINT, 0, 0},
1691 { &ATOM(opencl_c_version), CL_DEVICE_OPENCL_C_VERSION,false,OCL_STRING, 0, 0},
1692 #endif
1693 // cl_khr_fp64 extension || CL_VERSION_1_2 == 1
1694 #if CL_DEVICE_DOUBLE_FP_CONFIG
1695 { &ATOM(double_fp_config), CL_DEVICE_DOUBLE_FP_CONFIG, false, OCL_DEVICE_FP_CONFIG, kv_fp_config, 0 },
1696 #endif
1697 // cl_khr_fp16 extension || CL_VERSION_1_2 == 1
1698 #if CL_DEVICE_HALF_FP_CONFIG
1699 { &ATOM(half_fp_config), CL_DEVICE_HALF_FP_CONFIG, false, OCL_DEVICE_FP_CONFIG, kv_fp_config, 0 },
1700 #endif
1701 #if CL_VERSION_1_2 == 1
1702 { &ATOM(linker_available), CL_DEVICE_LINKER_AVAILABLE,false,OCL_BOOL, 0, 0},
1703 { &ATOM(built_in_kernels), CL_DEVICE_BUILT_IN_KERNELS,false, OCL_STRING, 0, 0},
1704 { &ATOM(image_max_buffer_size), CL_DEVICE_IMAGE_MAX_BUFFER_SIZE,false,OCL_SIZE, 0, 0},
1705 { &ATOM(image_max_array_size), CL_DEVICE_IMAGE_MAX_ARRAY_SIZE,false,OCL_SIZE, 0, 0},
1706 { &ATOM(parent_device), CL_DEVICE_PARENT_DEVICE,false,OCL_DEVICE, 0, 0},
1707 { &ATOM(partition_max_sub_devices), CL_DEVICE_PARTITION_MAX_SUB_DEVICES,false,OCL_SIZE, 0, 0},
1708 { &ATOM(partition_properties), CL_DEVICE_PARTITION_PROPERTIES,true,
1709 OCL_ENUM, kv_device_partition_property, 0},
1710
1711 { &ATOM(partition_affinity_domain), CL_DEVICE_PARTITION_AFFINITY_DOMAIN,false,OCL_ENUM, kv_device_affinity_domain, 0 },
1712
1713 { &ATOM(partition_type), CL_DEVICE_PARTITION_TYPE, false, OCL_DEVICE_PARTITION, 0, 0},
1714 { &ATOM(reference_count), CL_DEVICE_REFERENCE_COUNT, false, OCL_UINT, 0, 0 },
1715 { &ATOM(preferred_interop_user_sync), CL_DEVICE_PREFERRED_INTEROP_USER_SYNC,false, OCL_BOOL, 0, 0},
1716 { &ATOM(printf_buffer_size), CL_DEVICE_PRINTF_BUFFER_SIZE,false, OCL_SIZE, 0, 0 },
1717 #ifdef CL_DEVICE_IMAGE_PITCH_ALIGNMENT
1718 { &ATOM(image_pitch_alignment), CL_DEVICE_IMAGE_PITCH_ALIGNMENT, false, OCL_SIZE, 0, 0 },
1719 #endif
1720 #ifdef CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT
1721 { &ATOM(image_base_address_alignment), CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT, false, OCL_SIZE, 0, 0 },
1722 #endif
1723 #endif
1724
1725 // cl_nv_device_attribute_query extension
1726 #ifdef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV
1727 { &ATOM(compute_capability_major_nv), CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, false, OCL_UINT, 0, 0},
1728 #endif
1729 #ifdef CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV
1730 { &ATOM(compute_capability_minor_nv), CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, false, OCL_UINT, 0, 0},
1731 #endif
1732 #ifdef CL_DEVICE_REGISTERS_PER_BLOCK_NV
1733 { &ATOM(registers_per_block_nv),CL_DEVICE_REGISTERS_PER_BLOCK_NV, false, OCL_UINT, 0, 0},
1734 #endif
1735 #ifdef CL_DEVICE_WARP_SIZE_NV
1736 { &ATOM(warp_size_nv),CL_DEVICE_WARP_SIZE_NV, false, OCL_UINT, 0, 0},
1737 #endif
1738 #ifdef CL_DEVICE_GPU_OVERLAP_NV
1739 { &ATOM(gpu_overlap_nv),CL_DEVICE_GPU_OVERLAP_NV, false, OCL_BOOL, 0, 0},
1740 #endif
1741 #ifdef CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV
1742 { &ATOM(kernel_exec_timeout_nv), CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, false, OCL_BOOL, 0, 0},
1743 #endif
1744 #ifdef CL_DEVICE_INTEGRATED_MEMORY_NV
1745 { &ATOM(device_integrated_memory_nv),CL_DEVICE_INTEGRATED_MEMORY_NV, false, OCL_BOOL, 0, 0},
1746 #endif
1747
1748 };
1749
1750 // Map device info index 0...N => cl_device_info x Data type
1751 ecl_info_t platform_info[] =
1752 {
1753 { &ATOM(profile), CL_PLATFORM_PROFILE, false, OCL_STRING, 0, 0 },
1754 { &ATOM(version), CL_PLATFORM_VERSION, false, OCL_STRING, 0, 0 },
1755 { &ATOM(name), CL_PLATFORM_NAME, false, OCL_STRING, 0, 0 },
1756 { &ATOM(vendor), CL_PLATFORM_VENDOR, false, OCL_STRING, 0, 0 },
1757 { &ATOM(extensions), CL_PLATFORM_EXTENSIONS, false, OCL_STRING, 0, 0 }
1758 };
1759
1760 ecl_info_t context_info[] =
1761 {
1762 { &ATOM(reference_count), CL_CONTEXT_REFERENCE_COUNT, false, OCL_UINT, 0, 0 },
1763 { &ATOM(devices), CL_CONTEXT_DEVICES, true, OCL_DEVICE, 0, 0 },
1764 { &ATOM(properties), CL_CONTEXT_PROPERTIES, true, OCL_INT, 0, 0 }
1765 };
1766
1767 ecl_info_t queue_info[] =
1768 {
1769 { &ATOM(context), CL_QUEUE_CONTEXT, false, OCL_CONTEXT, 0, 0 },
1770 { &ATOM(device), CL_QUEUE_DEVICE, false, OCL_DEVICE, 0, 0 },
1771 { &ATOM(reference_count), CL_QUEUE_REFERENCE_COUNT, false, OCL_UINT, 0, 0 },
1772 { &ATOM(properties), CL_QUEUE_PROPERTIES, false, OCL_QUEUE_PROPERTIES, kv_command_queue_properties, 0 }
1773 };
1774
1775 ecl_info_t mem_info[] =
1776 {
1777 { &ATOM(object_type), CL_MEM_TYPE, false, OCL_MEM_OBJECT_TYPE, kv_mem_object_type, 0 },
1778 { &ATOM(flags), CL_MEM_FLAGS, false, OCL_MEM_FLAGS, kv_mem_flags, 0 },
1779 { &ATOM(size), CL_MEM_SIZE, false, OCL_SIZE, 0, 0 },
1780 // FIXME: pointer!! map it (binary resource?)
1781 { &ATOM(host_ptr), CL_MEM_HOST_PTR, false, OCL_POINTER, 0, 0 },
1782 { &ATOM(map_count), CL_MEM_MAP_COUNT, false, OCL_UINT, 0, 0 },
1783 { &ATOM(reference_count), CL_MEM_REFERENCE_COUNT, false, OCL_UINT, 0, 0 },
1784 { &ATOM(context), CL_MEM_CONTEXT, false, OCL_CONTEXT, 0, 0 }
1785 };
1786
1787 ecl_info_t image_info[] =
1788 {
1789 { &ATOM(format), CL_IMAGE_FORMAT, false, OCL_IMAGE_FORMAT, 0, 0 },
1790 { &ATOM(element_size), CL_IMAGE_ELEMENT_SIZE, false, OCL_SIZE, 0, 0 },
1791 { &ATOM(row_pitch), CL_IMAGE_ROW_PITCH, false, OCL_SIZE, 0, 0 },
1792 { &ATOM(slice_pitch), CL_IMAGE_SLICE_PITCH, false, OCL_SIZE, 0, 0 },
1793 { &ATOM(width), CL_IMAGE_WIDTH, false, OCL_SIZE, 0, 0 },
1794 { &ATOM(height), CL_IMAGE_HEIGHT, false, OCL_SIZE, 0, 0 },
1795 { &ATOM(depth), CL_IMAGE_DEPTH, false, OCL_SIZE, 0, 0 }
1796 };
1797
1798 ecl_info_t sampler_info[] =
1799 {
1800 { &ATOM(reference_count), CL_SAMPLER_REFERENCE_COUNT, false, OCL_UINT, 0, 0},
1801 { &ATOM(context), CL_SAMPLER_CONTEXT, false, OCL_CONTEXT, 0, 0 },
1802 { &ATOM(normalized_coords), CL_SAMPLER_NORMALIZED_COORDS, false, OCL_BOOL, 0, 0 },
1803 { &ATOM(addressing_mode), CL_SAMPLER_ADDRESSING_MODE, false, OCL_SAMPLER_ADDRESSING_MODE, kv_addressing_mode, 0 },
1804 { &ATOM(filter_mode), CL_SAMPLER_FILTER_MODE, false, OCL_SAMPLER_FILTER_MODE, kv_filter_mode, 0 }
1805 };
1806
1807 ecl_info_t program_info[] = {
1808 { &ATOM(reference_count), CL_PROGRAM_REFERENCE_COUNT, false, OCL_UINT, 0, 0 },
1809 { &ATOM(context), CL_PROGRAM_CONTEXT, false, OCL_CONTEXT, 0, 0 },
1810 { &ATOM(num_devices), CL_PROGRAM_NUM_DEVICES, false, OCL_UINT, 0, 0 },
1811 { &ATOM(devices), CL_PROGRAM_DEVICES, true, OCL_DEVICE, 0, 0 },
1812 { &ATOM(source), CL_PROGRAM_SOURCE, false, OCL_STRING, 0, 0 },
1813 { &ATOM(binary_sizes), CL_PROGRAM_BINARY_SIZES, true, OCL_SIZE, 0, 0 },
1814 { &ATOM(binaries), CL_PROGRAM_BINARIES, true, OCL_STRING, 0, 0 }
1815 };
1816
1817 ecl_info_t build_info[] = {
1818 { &ATOM(status), CL_PROGRAM_BUILD_STATUS, false, OCL_BUILD_STATUS, kv_build_status, 0 },
1819 { &ATOM(options), CL_PROGRAM_BUILD_OPTIONS, false, OCL_STRING, 0, 0 },
1820 { &ATOM(log), CL_PROGRAM_BUILD_LOG, false, OCL_STRING, 0, 0 },
1821 #if CL_VERSION_1_2 == 1
1822 { &ATOM(binary_type), CL_PROGRAM_BINARY_TYPE, false, OCL_PROGRAM_BINARY_TYPE, kv_program_binary_type, 0 },
1823 #endif
1824 };
1825
1826 ecl_info_t kernel_info[] = {
1827 { &ATOM(function_name), CL_KERNEL_FUNCTION_NAME, false, OCL_STRING, 0, 0 },
1828 { &ATOM(num_args), CL_KERNEL_NUM_ARGS, false, OCL_UINT, 0, 0 },
1829 { &ATOM(reference_count), CL_KERNEL_REFERENCE_COUNT, false, OCL_UINT, 0, 0 },
1830 { &ATOM(context), CL_KERNEL_CONTEXT, false, OCL_CONTEXT, 0, 0 },
1831 { &ATOM(program), CL_KERNEL_PROGRAM, false, OCL_PROGRAM, 0, 0 }
1832 };
1833
1834 ecl_info_t workgroup_info[] = {
1835 { &ATOM(work_group_size), CL_KERNEL_WORK_GROUP_SIZE, false, OCL_SIZE, 0, sizeof(size_t)},
1836 { &ATOM(compile_work_group_size), CL_KERNEL_COMPILE_WORK_GROUP_SIZE, true, OCL_SIZE, 0, sizeof(size_t[3])},
1837 { &ATOM(local_mem_size), CL_KERNEL_LOCAL_MEM_SIZE, false, OCL_ULONG, 0, sizeof(cl_ulong)},
1838 #if CL_VERSION_1_1 == 1
1839 { &ATOM(preferred_work_group_size_multiple), CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, false, OCL_SIZE, 0, sizeof(size_t) },
1840 { &ATOM(private_mem_size), CL_KERNEL_PRIVATE_MEM_SIZE, false, OCL_ULONG, 0, sizeof(cl_ulong)},
1841 #endif
1842 #if CL_VERSION_1_2 == 1
1843 { &ATOM(global_work_size), CL_KERNEL_GLOBAL_WORK_SIZE, true, OCL_SIZE, 0, sizeof(size_t[3])},
1844 #endif
1845 };
1846
1847 ecl_info_t event_info[] = {
1848 { &ATOM(command_queue), CL_EVENT_COMMAND_QUEUE, false, OCL_COMMAND_QUEUE, 0, 0 },
1849 { &ATOM(command_type), CL_EVENT_COMMAND_TYPE, false, OCL_ENUM, kv_command_type, 0 },
1850 { &ATOM(reference_count), CL_EVENT_REFERENCE_COUNT, false, OCL_UINT, 0, 0 },
1851 { &ATOM(execution_status), CL_EVENT_COMMAND_EXECUTION_STATUS, false, OCL_ENUM, kv_execution_status, 0 }
1852 };
1853
1854 // clGetKernelArgInfo 1.2
1855 #if CL_VERSION_1_2 == 1
1856
1857 ecl_kv_t kv_address_qualifier[] = {
1858 { &ATOM(global), CL_KERNEL_ARG_ADDRESS_GLOBAL },
1859 { &ATOM(local), CL_KERNEL_ARG_ADDRESS_LOCAL },
1860 { &ATOM(constant), CL_KERNEL_ARG_ADDRESS_CONSTANT },
1861 { &ATOM(private), CL_KERNEL_ARG_ADDRESS_PRIVATE },
1862 { 0, 0 }
1863 };
1864
1865 ecl_kv_t kv_access_qualifier[] = {
1866 { &ATOM(read_only), CL_KERNEL_ARG_ACCESS_READ_ONLY },
1867 { &ATOM(write_only), CL_KERNEL_ARG_ACCESS_WRITE_ONLY },
1868 { &ATOM(read_write), CL_KERNEL_ARG_ACCESS_READ_WRITE },
1869 { &ATOM(none), CL_KERNEL_ARG_ACCESS_NONE },
1870 { 0, 0 }
1871 };
1872
1873 ecl_kv_t kv_type_qualifier[] = {
1874 { &ATOM(none), CL_KERNEL_ARG_TYPE_NONE },
1875 { &ATOM(const), CL_KERNEL_ARG_TYPE_CONST },
1876 { &ATOM(restrict), CL_KERNEL_ARG_TYPE_RESTRICT },
1877 { &ATOM(volatile), CL_KERNEL_ARG_TYPE_VOLATILE },
1878 { 0, 0 }
1879 };
1880
1881 ecl_info_t arg_info[] = {
1882 { &ATOM(address_qualifier), CL_KERNEL_ARG_ADDRESS_QUALIFIER, false, OCL_ENUM, kv_address_qualifier },
1883 { &ATOM(access_qualifier), CL_KERNEL_ARG_ACCESS_QUALIFIER, false, OCL_ENUM, kv_access_qualifier },
1884 { &ATOM(type_name), CL_KERNEL_ARG_TYPE_NAME, false, OCL_STRING, 0 },
1885 { &ATOM(type_qualifier), CL_KERNEL_ARG_TYPE_QUALIFIER, false, OCL_ENUM, kv_type_qualifier },
1886 { &ATOM(name), CL_KERNEL_ARG_NAME, false, OCL_STRING, 0 },
1887 };
1888 #endif
1889
1890 // Error reasons
ecl_error(cl_int err)1891 ERL_NIF_TERM ecl_error(cl_int err)
1892 {
1893 switch(err) {
1894 case CL_DEVICE_NOT_FOUND:
1895 return ATOM(device_not_found);
1896 case CL_DEVICE_NOT_AVAILABLE:
1897 return ATOM(device_not_available);
1898 case CL_COMPILER_NOT_AVAILABLE:
1899 return ATOM(compiler_not_available);
1900 case CL_MEM_OBJECT_ALLOCATION_FAILURE:
1901 return ATOM(mem_object_allocation_failure);
1902 case CL_OUT_OF_RESOURCES:
1903 return ATOM(out_of_resources);
1904 case CL_OUT_OF_HOST_MEMORY:
1905 return ATOM(out_of_host_memory);
1906 case CL_PROFILING_INFO_NOT_AVAILABLE:
1907 return ATOM(profiling_info_not_available);
1908 case CL_MEM_COPY_OVERLAP:
1909 return ATOM(mem_copy_overlap);
1910 case CL_IMAGE_FORMAT_MISMATCH:
1911 return ATOM(image_format_mismatch);
1912 case CL_IMAGE_FORMAT_NOT_SUPPORTED:
1913 return ATOM(image_format_not_supported);
1914 case CL_BUILD_PROGRAM_FAILURE:
1915 return ATOM(build_program_failure);
1916 case CL_MAP_FAILURE:
1917 return ATOM(map_failure);
1918 case CL_INVALID_VALUE:
1919 return ATOM(invalid_value);
1920 case CL_INVALID_DEVICE_TYPE:
1921 return ATOM(invalid_device_type);
1922 case CL_INVALID_PLATFORM:
1923 return ATOM(invalid_platform);
1924 case CL_INVALID_DEVICE:
1925 return ATOM(invalid_device);
1926 case CL_INVALID_CONTEXT:
1927 return ATOM(invalid_context);
1928 case CL_INVALID_QUEUE_PROPERTIES:
1929 return ATOM(invalid_queue_properties);
1930 case CL_INVALID_COMMAND_QUEUE:
1931 return ATOM(invalid_command_queue);
1932 case CL_INVALID_HOST_PTR:
1933 return ATOM(invalid_host_ptr);
1934 case CL_INVALID_MEM_OBJECT:
1935 return ATOM(invalid_mem_object);
1936 case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
1937 return ATOM(invalid_image_format_descriptor);
1938 case CL_INVALID_IMAGE_SIZE:
1939 return ATOM(invalid_image_size);
1940 case CL_INVALID_SAMPLER:
1941 return ATOM(invalid_sampler);
1942 case CL_INVALID_BINARY:
1943 return ATOM(invalid_binary);
1944 case CL_INVALID_BUILD_OPTIONS:
1945 return ATOM(invalid_build_options);
1946 case CL_INVALID_PROGRAM:
1947 return ATOM(invalid_program);
1948 case CL_INVALID_PROGRAM_EXECUTABLE:
1949 return ATOM(invalid_program_executable);
1950 case CL_INVALID_KERNEL_NAME:
1951 return ATOM(invalid_kernel_name);
1952 case CL_INVALID_KERNEL_DEFINITION:
1953 return ATOM(invalid_kernel_definition);
1954 case CL_INVALID_KERNEL:
1955 return ATOM(invalid_kernel);
1956 case CL_INVALID_ARG_INDEX:
1957 return ATOM(invalid_arg_index);
1958 case CL_INVALID_ARG_VALUE:
1959 return ATOM(invalid_arg_value);
1960 case CL_INVALID_ARG_SIZE:
1961 return ATOM(invalid_arg_size);
1962 case CL_INVALID_KERNEL_ARGS:
1963 return ATOM(invalid_kernel_args);
1964 case CL_INVALID_WORK_DIMENSION:
1965 return ATOM(invalid_work_dimension);
1966 case CL_INVALID_WORK_GROUP_SIZE:
1967 return ATOM(invalid_work_group_size);
1968 case CL_INVALID_WORK_ITEM_SIZE:
1969 return ATOM(invalid_work_item_size);
1970 case CL_INVALID_GLOBAL_OFFSET:
1971 return ATOM(invalid_global_offset);
1972 case CL_INVALID_EVENT_WAIT_LIST:
1973 return ATOM(invalid_event_wait_list);
1974 case CL_INVALID_EVENT:
1975 return ATOM(invalid_event);
1976 case CL_INVALID_OPERATION:
1977 return ATOM(invalid_operation);
1978 case CL_INVALID_GL_OBJECT:
1979 return ATOM(invalid_gl_object);
1980 case CL_INVALID_BUFFER_SIZE:
1981 return ATOM(invalid_buffer_size);
1982 case CL_INVALID_MIP_LEVEL:
1983 return ATOM(invalid_mip_level);
1984 case CL_INVALID_GLOBAL_WORK_SIZE:
1985 return ATOM(invalid_global_work_size);
1986 default:
1987 return ATOM(unknown);
1988 }
1989 }
1990
ecl_make_error(ErlNifEnv * env,cl_int err)1991 ERL_NIF_TERM ecl_make_error(ErlNifEnv* env, cl_int err)
1992 {
1993 return enif_make_tuple2(env, ATOM(error), ecl_error(err));
1994 }
1995
ecl_emit_error(char * file,int line,...)1996 static void ecl_emit_error(char* file, int line, ...)
1997 {
1998 va_list ap;
1999 char* fmt;
2000
2001 va_start(ap, line);
2002 fmt = va_arg(ap, char*);
2003
2004 fprintf(stderr, "%s:%d: ", file, line);
2005 vfprintf(stderr, fmt, ap);
2006 fprintf(stderr, "\r\n");
2007 va_end(ap);
2008 fflush(stderr);
2009 }
2010
2011 // Parse bool
get_bool(ErlNifEnv * env,const ERL_NIF_TERM key,cl_bool * val)2012 static int get_bool(ErlNifEnv* env, const ERL_NIF_TERM key, cl_bool* val)
2013 {
2014 UNUSED(env);
2015 if (key == ATOM(true)) {
2016 *val = true;
2017 return 1;
2018 }
2019 else if (key == ATOM(false)) {
2020 *val = false;
2021 return 1;
2022 }
2023 return 0;
2024 }
2025
2026
2027 // Parse enum
get_enum(ErlNifEnv * env,const ERL_NIF_TERM key,cl_uint * num,ecl_kv_t * kv)2028 static int get_enum(ErlNifEnv* env, const ERL_NIF_TERM key,
2029 cl_uint* num, ecl_kv_t* kv)
2030 {
2031 UNUSED(env);
2032
2033 if (!enif_is_atom(env, key))
2034 return 0;
2035 while(kv->key) {
2036 if (*kv->key == key) {
2037 *num = (cl_uint) kv->value;
2038 return 1;
2039 }
2040 kv++;
2041 }
2042 return 0;
2043 }
2044
2045 // Parse bitfield
get_bitfield(ErlNifEnv * env,const ERL_NIF_TERM key,cl_bitfield * field,ecl_kv_t * kv)2046 static int get_bitfield(ErlNifEnv* env, const ERL_NIF_TERM key,
2047 cl_bitfield* field, ecl_kv_t* kv)
2048 {
2049 UNUSED(env);
2050
2051 if (!enif_is_atom(env, key))
2052 return 0;
2053 while(kv->key) {
2054 if (*kv->key == key) {
2055 *field = kv->value;
2056 return 1;
2057 }
2058 kv++;
2059 }
2060 return 0;
2061 }
2062
2063
get_bitfields(ErlNifEnv * env,const ERL_NIF_TERM term,cl_bitfield * field,ecl_kv_t * kv)2064 static int get_bitfields(ErlNifEnv* env, const ERL_NIF_TERM term,
2065 cl_bitfield* field, ecl_kv_t* kv)
2066 {
2067 cl_bitfield t;
2068
2069 if (enif_is_atom(env, term)) {
2070 if (!get_bitfield(env, term, &t, kv))
2071 return 0;
2072 *field = t;
2073 return 1;
2074 }
2075 else if (enif_is_empty_list(env, term)) {
2076 *field = 0;
2077 return 1;
2078 }
2079 else if (enif_is_list(env, term)) {
2080 cl_bitfield fs = 0;
2081 ERL_NIF_TERM list = term;
2082 ERL_NIF_TERM head, tail;
2083
2084 while(enif_get_list_cell(env, list, &head, &tail)) {
2085 if (!get_bitfield(env, head, &t, kv))
2086 return 0;
2087 fs |= t;
2088 list = tail;
2089 }
2090 if (!enif_is_empty_list(env, list))
2091 return 0;
2092 *field = fs;
2093 return 1;
2094 }
2095 return 0;
2096 }
2097
make_enum(ErlNifEnv * env,cl_uint num,ecl_kv_t * kv)2098 ERL_NIF_TERM make_enum(ErlNifEnv* env, cl_uint num, ecl_kv_t* kv)
2099 {
2100 while(kv->key) {
2101 if (num == (cl_uint)kv->value)
2102 return *kv->key;
2103 kv++;
2104 }
2105 return enif_make_uint(env, num);
2106 }
2107
make_bitfields(ErlNifEnv * env,cl_bitfield v,ecl_kv_t * kv)2108 ERL_NIF_TERM make_bitfields(ErlNifEnv* env, cl_bitfield v, ecl_kv_t* kv)
2109 {
2110 ERL_NIF_TERM list = enif_make_list(env, 0);
2111
2112 if (v) {
2113 int n = 0;
2114 while(kv->key) {
2115 kv++;
2116 n++;
2117 }
2118 while(n--) {
2119 kv--;
2120 if ((kv->value & v) == kv->value)
2121 list = enif_make_list_cell(env, *kv->key, list);
2122 }
2123 }
2124 return list;
2125 }
2126
2127
2128
2129 /******************************************************************************
2130 *
2131 * Linear hash functions
2132 *
2133 *****************************************************************************/
2134
2135 #define EPTR_HANDLE(ptr) ((intptr_t)(ptr))
2136
ref_hash(void * key)2137 static lhash_value_t ref_hash(void* key)
2138 {
2139 return (lhash_value_t) key;
2140 }
2141
ref_cmp(void * key,void * data)2142 static int ref_cmp(void* key, void* data)
2143 {
2144 if (((intptr_t)key) == EPTR_HANDLE(((ecl_object_t*)data)->opaque))
2145 return 0;
2146 return 1;
2147 }
2148
ref_release(void * data)2149 static void ref_release(void *data)
2150 {
2151 UNUSED(data);
2152 // object's are free'd by garbage collection
2153 }
2154
2155 // Remove object from hash
object_erase(ecl_object_t * obj)2156 static void object_erase(ecl_object_t* obj)
2157 {
2158 ecl_env_t* ecl = obj->env;
2159 enif_rwlock_rwlock(ecl->ref_lock);
2160 lhash_erase(&ecl->ref, (void*)EPTR_HANDLE(obj->opaque));
2161 enif_rwlock_rwunlock(ecl->ref_lock);
2162 }
2163
2164 /******************************************************************************
2165 *
2166 * Message queue
2167 *
2168 *****************************************************************************/
2169
2170 // Peek at queue front
2171 #if 0
2172 static ecl_message_t* ecl_queue_peek(ecl_queue_t* q)
2173 {
2174 ecl_qlink_t* ql;
2175
2176 enif_mutex_lock(q->mtx);
2177 ql = q->front;
2178 enif_mutex_unlock(q->mtx);
2179 if (ql)
2180 return &ql->mesg;
2181 else
2182 return 0;
2183 }
2184 #endif
2185
2186 // Get message from queue front
ecl_queue_get(ecl_queue_t * q,ecl_message_t * m)2187 static int ecl_queue_get(ecl_queue_t* q, ecl_message_t* m)
2188 {
2189 ecl_qlink_t* ql;
2190
2191 enif_mutex_lock(q->mtx);
2192 while(!(ql = q->front)) {
2193 enif_cond_wait(q->cv, q->mtx);
2194 }
2195 if (!(q->front = ql->next))
2196 q->rear = 0;
2197 q->len--;
2198
2199 *m = ql->mesg;
2200
2201 if ((ql >= &q->ql[0]) && (ql <= &q->ql[MAX_QLINK-1])) {
2202 ql->next = q->free;
2203 q->free = ql;
2204 }
2205 else
2206 enif_free(ql);
2207 enif_mutex_unlock(q->mtx);
2208 return 0;
2209 }
2210
2211 // Put message at queue rear
ecl_queue_put(ecl_queue_t * q,ecl_message_t * m)2212 static int ecl_queue_put(ecl_queue_t* q, ecl_message_t* m)
2213 {
2214 ecl_qlink_t* ql;
2215 ecl_qlink_t* qr;
2216 int res = 0;
2217
2218 enif_mutex_lock(q->mtx);
2219
2220 if ((ql = q->free))
2221 q->free = ql->next;
2222 else
2223 ql = enif_alloc(sizeof(ecl_qlink_t));
2224 if (!ql)
2225 res = -1;
2226 else {
2227 ql->mesg = *m;
2228 q->len++;
2229 ql->next = 0;
2230 if (!(qr = q->rear)) {
2231 q->front = ql;
2232 enif_cond_signal(q->cv);
2233 }
2234 else
2235 qr->next = ql;
2236 q->rear = ql;
2237 }
2238 enif_mutex_unlock(q->mtx);
2239 return res;
2240 }
2241
ecl_queue_init(ecl_queue_t * q)2242 static int ecl_queue_init(ecl_queue_t* q)
2243 {
2244 int i;
2245 if (!(q->cv = enif_cond_create("queue_cv")))
2246 return -1;
2247 if (!(q->mtx = enif_mutex_create("queue_mtx")))
2248 return -1;
2249 q->front = 0;
2250 q->rear = 0;
2251 q->len = 0;
2252 for (i = 0; i < MAX_QLINK-1; i++)
2253 q->ql[i].next = &q->ql[i+1];
2254 q->ql[MAX_QLINK-1].next = 0;
2255 q->free = &q->ql[0];
2256 return 0;
2257 }
2258
ecl_queue_destroy(ecl_queue_t * q)2259 static void ecl_queue_destroy(ecl_queue_t* q)
2260 {
2261 ecl_qlink_t* ql;
2262
2263 enif_cond_destroy(q->cv);
2264 enif_mutex_destroy(q->mtx);
2265
2266 ql = q->front;
2267 while(ql) {
2268 ecl_qlink_t* qln = ql->next;
2269 if ((ql >= &q->ql[0]) && (ql <= &q->ql[MAX_QLINK-1]))
2270 ;
2271 else
2272 enif_free(ql);
2273 ql = qln;
2274 }
2275 }
2276
2277 /******************************************************************************
2278 *
2279 * Threads
2280 *
2281 *****************************************************************************/
2282
ecl_message_send(ecl_thread_t * thr,ecl_message_t * m)2283 static int ecl_message_send(ecl_thread_t* thr, ecl_message_t* m)
2284 {
2285 return ecl_queue_put(&thr->q, m);
2286 }
2287
ecl_message_recv(ecl_thread_t * thr,ecl_message_t * m)2288 static int ecl_message_recv(ecl_thread_t* thr, ecl_message_t* m)
2289 {
2290 int r;
2291 if ((r = ecl_queue_get(&thr->q, m)) < 0)
2292 return r;
2293 return 0;
2294 }
2295
2296 #if 0
2297 static ecl_message_t* ecl_message_peek(ecl_thread_t* thr, ecl_thread_t** from)
2298 {
2299 ecl_message_t* m;
2300 if ((m = ecl_queue_peek(&thr->q))) {
2301 if (from)
2302 *from = m->sender;
2303 }
2304 return m;
2305 }
2306 #endif
2307
ecl_thread_start(void * (* func)(void * arg),void * arg,int stack_size)2308 static ecl_thread_t* ecl_thread_start(void* (*func)(void* arg),
2309 void* arg, int stack_size)
2310 {
2311 ErlNifThreadOpts* opts;
2312 ecl_thread_t* thr;
2313
2314 if (!(thr = enif_alloc(sizeof(ecl_thread_t))))
2315 return 0;
2316 if (ecl_queue_init(&thr->q) < 0)
2317 goto error;
2318 if (!(opts = enif_thread_opts_create("ecl_thread_opts")))
2319 goto error;
2320 opts->suggested_stack_size = stack_size;
2321 thr->arg = arg;
2322
2323 enif_thread_create("ecl_thread", &thr->tid, func, thr, opts);
2324 enif_thread_opts_destroy(opts);
2325 return thr;
2326 error:
2327 enif_free(thr);
2328 return 0;
2329 }
2330
ecl_thread_stop(ecl_thread_t * thr,void ** exit_value)2331 static int ecl_thread_stop(ecl_thread_t* thr, void** exit_value)
2332 {
2333 ecl_message_t m;
2334
2335 m.type = ECL_MESSAGE_STOP;
2336 m.env = 0;
2337 ecl_message_send(thr, &m);
2338 enif_thread_join(thr->tid, exit_value);
2339 ecl_queue_destroy(&thr->q);
2340 enif_free(thr);
2341 return 0;
2342 }
2343
ecl_thread_exit(void * value)2344 static void ecl_thread_exit(void* value)
2345 {
2346 enif_thread_exit(value);
2347 }
2348
2349 /******************************************************************************
2350 *
2351 * Ecl resource
2352 *
2353 *****************************************************************************/
2354
ecl_resource_init(ErlNifEnv * env,ecl_resource_t * res,char * name,size_t size,void (* dtor)(ErlNifEnv *,ecl_object_t *),ErlNifResourceFlags flags,ErlNifResourceFlags * tried)2355 static int ecl_resource_init(ErlNifEnv* env,
2356 ecl_resource_t* res,
2357 char* name,
2358 size_t size, // object size
2359 void (*dtor)(ErlNifEnv*, ecl_object_t*),
2360 ErlNifResourceFlags flags,
2361 ErlNifResourceFlags* tried)
2362 {
2363 res->name = name;
2364 res->type = enif_make_atom(env, name);
2365 res->size = size;
2366 res->res = enif_open_resource_type(env, 0, name,
2367 (ErlNifResourceDtor*) dtor,
2368 flags, tried);
2369 return 0;
2370 }
2371
2372 //
2373 // Reference new kernel argument and Dereference old value
2374 //
2375
unref_kernel_arg(int type,void * val)2376 static void unref_kernel_arg(int type, void* val)
2377 {
2378 switch(type) {
2379 case KERNEL_ARG_MEM:
2380 if (val)
2381 clReleaseMemObject((cl_mem) val);
2382 break;
2383 case KERNEL_ARG_SAMPLER:
2384 if (val)
2385 clReleaseSampler((cl_sampler) val);
2386 break;
2387 case KERNEL_ARG_OTHER:
2388 default:
2389 break;
2390 }
2391 }
2392
ref_kernel_arg(int type,void * val)2393 static void ref_kernel_arg(int type, void* val)
2394 {
2395 switch(type) {
2396 case KERNEL_ARG_MEM:
2397 if (val)
2398 clRetainMemObject((cl_mem) val);
2399 break;
2400 case KERNEL_ARG_SAMPLER:
2401 if (val)
2402 clRetainSampler((cl_sampler) val);
2403 break;
2404 case KERNEL_ARG_OTHER:
2405 default:
2406 break;
2407 }
2408 }
2409
set_kernel_arg(ecl_kernel_t * kern,cl_uint i,int type,void * value)2410 static int set_kernel_arg(ecl_kernel_t* kern, cl_uint i, int type, void* value)
2411 {
2412 if (i < kern->num_args) {
2413 int old_type = kern->arg[i].type;
2414 void* old_value = kern->arg[i].value;
2415 ref_kernel_arg(type, value);
2416 kern->arg[i].type = type;
2417 kern->arg[i].value = value;
2418 unref_kernel_arg(old_type, old_value);
2419 return 0;
2420 }
2421 return -1;
2422 }
2423
2424 /******************************************************************************
2425 *
2426 * Resource destructors
2427 *
2428 *****************************************************************************/
2429
ecl_platform_dtor(ErlNifEnv * env,ecl_object_t * obj)2430 static void ecl_platform_dtor(ErlNifEnv* env, ecl_object_t* obj)
2431 {
2432 UNUSED(env);
2433 UNUSED(obj);
2434 DBG("ecl_platform_dtor: %p", obj);
2435 object_erase(obj);
2436 if (obj->parent) enif_release_resource(obj->parent);
2437 }
2438
ecl_device_dtor(ErlNifEnv * env,ecl_object_t * obj)2439 static void ecl_device_dtor(ErlNifEnv* env, ecl_object_t* obj)
2440 {
2441 UNUSED(env);
2442 UNUSED(obj);
2443 DBG("ecl_device_dtor: %p", obj);
2444 object_erase(obj);
2445 if (obj->parent) enif_release_resource(obj->parent);
2446 }
2447
ecl_queue_dtor(ErlNifEnv * env,ecl_object_t * obj)2448 static void ecl_queue_dtor(ErlNifEnv* env, ecl_object_t* obj)
2449 {
2450 UNUSED(env);
2451 DBG("ecl_queue_dtor: %p", obj);
2452 clReleaseCommandQueue(obj->queue);
2453 object_erase(obj);
2454 if (obj->parent) enif_release_resource(obj->parent);
2455 }
2456
ecl_mem_dtor(ErlNifEnv * env,ecl_object_t * obj)2457 static void ecl_mem_dtor(ErlNifEnv* env, ecl_object_t* obj)
2458 {
2459 UNUSED(env);
2460 DBG("ecl_mem_dtor: %p", obj);
2461 clReleaseMemObject(obj->mem);
2462 object_erase(obj);
2463 if (obj->parent) enif_release_resource(obj->parent);
2464 }
2465
ecl_sampler_dtor(ErlNifEnv * env,ecl_object_t * obj)2466 static void ecl_sampler_dtor(ErlNifEnv* env, ecl_object_t* obj)
2467 {
2468 UNUSED(env);
2469 DBG("ecl_sampler_dtor: %p", obj);
2470 clReleaseSampler(obj->sampler);
2471 object_erase(obj);
2472 if (obj->parent) enif_release_resource(obj->parent);
2473 }
2474
ecl_program_dtor(ErlNifEnv * env,ecl_object_t * obj)2475 static void ecl_program_dtor(ErlNifEnv* env, ecl_object_t* obj)
2476 {
2477 UNUSED(env);
2478 DBG("ecl_program_dtor: %p", obj);
2479 clReleaseProgram(obj->program);
2480 object_erase(obj);
2481 if (obj->parent) enif_release_resource(obj->parent);
2482 }
2483
ecl_kernel_dtor(ErlNifEnv * env,ecl_object_t * obj)2484 static void ecl_kernel_dtor(ErlNifEnv* env, ecl_object_t* obj)
2485 {
2486 ecl_kernel_t* kern = (ecl_kernel_t*) obj;
2487 cl_uint i;
2488 UNUSED(env);
2489 DBG("ecl_kernel_dtor: %p", kern);
2490 for (i = 0; i < kern->num_args; i++)
2491 unref_kernel_arg(kern->arg[i].type, kern->arg[i].value);
2492 enif_free(kern->arg);
2493 clReleaseKernel(kern->obj.kernel);
2494 object_erase(obj);
2495 if (obj->parent) enif_release_resource(obj->parent);
2496 }
2497
ecl_event_dtor(ErlNifEnv * env,ecl_object_t * obj)2498 static void ecl_event_dtor(ErlNifEnv* env, ecl_object_t* obj)
2499 {
2500 ecl_event_t* evt = (ecl_event_t*) obj;
2501 UNUSED(env);
2502 DBG("ecl_event_dtor: %p", evt);
2503 clReleaseEvent(evt->obj.event);
2504 object_erase(obj);
2505 if (evt->bin) {
2506 if (!evt->rl)
2507 enif_release_binary(evt->bin);
2508 enif_free(evt->bin);
2509 }
2510 if (evt->bin_env)
2511 enif_free_env(evt->bin_env);
2512 if (obj->parent) enif_release_resource(obj->parent);
2513 }
2514
ecl_context_dtor(ErlNifEnv * env,ecl_object_t * obj)2515 static void ecl_context_dtor(ErlNifEnv* env, ecl_object_t* obj)
2516 {
2517 void* exit_value;
2518 ecl_context_t* ctx = (ecl_context_t*) obj;
2519 ecl_context_t** pp;
2520 ecl_env_t* ecl = enif_priv_data(env);
2521 ecl_thread_t* thr = ctx->thr;
2522
2523 DBG("ecl_context_dtor: %p", ctx);
2524
2525 enif_rwlock_rwlock(ecl->context_list_lock);
2526 pp = &ecl->context_list;
2527 while(*pp != ctx)
2528 pp = &(*pp)->next;
2529 *pp = ctx->next;
2530 enif_rwlock_rwunlock(ecl->context_list_lock);
2531
2532 clReleaseContext(ctx->obj.context);
2533 object_erase(obj);
2534 // parent is always = 0
2535 // kill the event thread
2536 ecl_thread_stop(thr, &exit_value);
2537 }
2538
2539
2540 /******************************************************************************
2541 *
2542 * make/get
2543 *
2544 *****************************************************************************/
2545
2546 // For now, wrap the resource object {type,pointer-val,handle}
make_object(ErlNifEnv * env,const ERL_NIF_TERM type,void * robject)2547 static ERL_NIF_TERM make_object(ErlNifEnv* env, const ERL_NIF_TERM type,
2548 void* robject)
2549 {
2550 if (!robject)
2551 return ATOM(undefined);
2552 else
2553 return enif_make_tuple3(env,
2554 type,
2555 ecl_make_sizet(env, (size_t) robject),
2556 enif_make_resource(env, robject));
2557 }
2558
2559 // Accept {type,pointer-val,handle}
get_ecl_object(ErlNifEnv * env,const ERL_NIF_TERM term,ecl_resource_t * rtype,bool nullp,ecl_object_t ** robjectp)2560 static int get_ecl_object(ErlNifEnv* env, const ERL_NIF_TERM term,
2561 ecl_resource_t* rtype, bool nullp,
2562 ecl_object_t** robjectp)
2563 {
2564 const ERL_NIF_TERM* elem;
2565 int arity;
2566 size_t handle; // not really a size_t but the type has a good size
2567
2568 if (nullp && (term == ATOM(undefined))) {
2569 *robjectp = 0;
2570 return 1;
2571 }
2572 if (!enif_get_tuple(env, term, &arity, &elem))
2573 return 0;
2574 if (arity != 3)
2575 return 0;
2576 if (!enif_is_atom(env, elem[0]) || (elem[0] != rtype->type))
2577 return 0;
2578 if (!ecl_get_sizet(env, elem[1], &handle))
2579 return 0;
2580 if (!enif_get_resource(env, elem[2], rtype->res, (void**) robjectp))
2581 return 0;
2582 if ((size_t)*robjectp != handle)
2583 return 0;
2584 return 1;
2585 }
2586
2587 #if 0
2588 static int get_ecl_object_list(ErlNifEnv* env, const ERL_NIF_TERM term,
2589 ecl_resource_t* rtype, bool nullp,
2590 ecl_object_t** robjv, size_t* rlen)
2591 {
2592 size_t maxlen = *rlen;
2593 size_t n = 0;
2594 ERL_NIF_TERM list = term;
2595
2596 while(n < maxlen) {
2597 ERL_NIF_TERM head, tail;
2598
2599 if (enif_get_list_cell(env, list, &head, &tail)) {
2600 if (!get_ecl_object(env, head, rtype, nullp, robjv))
2601 return 0;
2602 n++;
2603 robjv++;
2604 list = tail;
2605 }
2606 else if (enif_is_empty_list(env, list)) {
2607 *rlen = n;
2608 return 1;
2609 }
2610 else
2611 return 0;
2612 }
2613 return 0;
2614 }
2615 #endif
2616
get_object(ErlNifEnv * env,const ERL_NIF_TERM term,ecl_resource_t * rtype,bool nullp,void ** rptr)2617 static int get_object(ErlNifEnv* env, const ERL_NIF_TERM term,
2618 ecl_resource_t* rtype, bool nullp,
2619 void** rptr)
2620 {
2621 ecl_object_t* obj;
2622 if (get_ecl_object(env, term, rtype, nullp, &obj)) {
2623 *rptr = obj ? obj->opaque : 0;
2624 return 1;
2625 }
2626 return 0;
2627 }
2628
get_object_list(ErlNifEnv * env,const ERL_NIF_TERM term,ecl_resource_t * rtype,bool nullp,void ** robjv,cl_uint * rlen)2629 static int get_object_list(ErlNifEnv* env, const ERL_NIF_TERM term,
2630 ecl_resource_t* rtype, bool nullp,
2631 void** robjv, cl_uint* rlen)
2632 {
2633 cl_uint maxlen = *rlen;
2634 cl_uint n = 0;
2635 ERL_NIF_TERM list = term;
2636
2637 while(n < maxlen) {
2638 ERL_NIF_TERM head, tail;
2639
2640 if (enif_get_list_cell(env, list, &head, &tail)) {
2641 if (!get_object(env, head, rtype, nullp, robjv))
2642 return 0;
2643 n++;
2644 robjv++;
2645 list = tail;
2646 }
2647 else if (enif_is_empty_list(env, list)) {
2648 *rlen = n;
2649 return 1;
2650 }
2651 else
2652 return 0;
2653 }
2654 return 0;
2655 }
2656
2657
2658
get_sizet_list(ErlNifEnv * env,const ERL_NIF_TERM term,size_t * rvec,size_t * rlen)2659 static int get_sizet_list(ErlNifEnv* env, const ERL_NIF_TERM term,
2660 size_t* rvec, size_t* rlen)
2661 {
2662 size_t maxlen = *rlen;
2663 size_t n = 0;
2664 ERL_NIF_TERM list = term;
2665
2666 while(n < maxlen) {
2667 ERL_NIF_TERM head, tail;
2668
2669 if (enif_get_list_cell(env, list, &head, &tail)) {
2670 if (!ecl_get_sizet(env, head, rvec))
2671 return 0;
2672 n++;
2673 rvec++;
2674 list = tail;
2675 }
2676 else if (enif_is_empty_list(env, list)) {
2677 *rlen = n;
2678 return 1;
2679 }
2680 else
2681 return 0;
2682 }
2683 if (enif_is_empty_list(env, list)) {
2684 *rlen = n;
2685 return 1;
2686 }
2687 return 0;
2688 }
2689
get_binary_list(ErlNifEnv * env,const ERL_NIF_TERM term,ErlNifBinary * rvec,size_t * rlen)2690 static int get_binary_list(ErlNifEnv* env, const ERL_NIF_TERM term,
2691 ErlNifBinary* rvec, size_t* rlen)
2692 {
2693 size_t maxlen = *rlen;
2694 size_t n = 0;
2695 ERL_NIF_TERM list = term;
2696
2697 while(n < maxlen) {
2698 ERL_NIF_TERM head, tail;
2699
2700 if (enif_get_list_cell(env, list, &head, &tail)) {
2701 if (!enif_inspect_binary(env, head, rvec))
2702 return 0;
2703 n++;
2704 rvec++;
2705 list = tail;
2706 }
2707 else if (enif_is_empty_list(env, list)) {
2708 *rlen = n;
2709 return 1;
2710 }
2711 else
2712 return 0;
2713 }
2714 return 0;
2715 }
2716
2717 #if CL_VERSION_1_2 == 1
2718 // avoid warning
2719 // currently onlt used my compile_program which is a 1.2 function
2720
2721 // free an array of strings
free_string_list(char ** rvec,size_t n)2722 static void free_string_list(char** rvec, size_t n)
2723 {
2724 int i;
2725 for (i = 0; i < (int)n; i++)
2726 enif_free(rvec[i]);
2727 }
2728
2729 // get a list of, max *rlen, dynamically allocated, strings.
get_string_list(ErlNifEnv * env,const ERL_NIF_TERM term,char ** rvec,size_t * rlen)2730 static int get_string_list(ErlNifEnv* env, const ERL_NIF_TERM term,
2731 char** rvec, size_t* rlen)
2732 {
2733 char** rvec0 = rvec;
2734 size_t maxlen = *rlen;
2735 size_t n = 0;
2736 ERL_NIF_TERM list = term;
2737 ERL_NIF_TERM head, tail;
2738 while((n < maxlen) &&
2739 enif_get_list_cell(env, list, &head, &tail)) {
2740 char* str;
2741 unsigned int len;
2742 if (!enif_get_list_length(env, head, &len))
2743 goto error;
2744 if (!(str = enif_alloc(len+1)))
2745 goto error;
2746 if (!enif_get_string(env, head, str, len+1, ERL_NIF_LATIN1))
2747 goto error;
2748 *rvec++ = str;
2749 n++;
2750 list = tail;
2751 }
2752 if (enif_is_empty_list(env, list)) {
2753 *rlen = n;
2754 return 1;
2755 }
2756 error:
2757 free_string_list(rvec0, rvec-rvec0);
2758 return 0;
2759 }
2760 #endif
2761
2762 // Copy a "local" binary to a new process independent environment
2763 // fill the binary structure with the new data and return it.
2764 //
ecl_make_binary(ErlNifEnv * src_env,const ERL_NIF_TERM src,ErlNifEnv * dst_env,ErlNifBinary * bin)2765 static int ecl_make_binary(ErlNifEnv* src_env,
2766 const ERL_NIF_TERM src,
2767 ErlNifEnv* dst_env,
2768 ErlNifBinary* bin)
2769 {
2770 ERL_NIF_TERM ref_counted;
2771
2772 if (enif_is_binary(src_env, src)) {
2773 // Update refc (and/or fix heap binaries)
2774 ref_counted = enif_make_copy(dst_env, src);
2775 return enif_inspect_binary(dst_env, ref_counted, bin);
2776 } else {
2777 // iolist to binary
2778 if (!enif_inspect_iolist_as_binary(src_env, src, bin))
2779 return 0;
2780 // ref count binary ?
2781 enif_make_binary(dst_env, bin);
2782 return 1;
2783 }
2784 }
2785
2786
2787 // Lookup a openCL object (native => reource ecl_object_t*)
ecl_lookup(ErlNifEnv * env,void * ptr)2788 static ecl_object_t* ecl_lookup(ErlNifEnv* env, void* ptr)
2789 {
2790 if (!ptr)
2791 return 0;
2792 else {
2793 ecl_env_t* ecl = enif_priv_data(env);
2794 ecl_object_t* obj;
2795
2796 enif_rwlock_rlock(ecl->ref_lock);
2797 obj = (ecl_object_t*) lhash_lookup(&ecl->ref,(void*)EPTR_HANDLE(ptr));
2798 enif_rwlock_runlock(ecl->ref_lock);
2799 return obj;
2800 }
2801 }
2802
2803 // Create a new openCL resource object
ecl_new(ErlNifEnv * env,ecl_resource_t * rtype,void * ptr,ecl_object_t * parent,cl_int version)2804 static ecl_object_t* ecl_new(ErlNifEnv* env, ecl_resource_t* rtype,
2805 void* ptr, ecl_object_t* parent, cl_int version)
2806 {
2807 if (!ptr)
2808 return 0;
2809 else {
2810 ecl_env_t* ecl = enif_priv_data(env);
2811 ecl_object_t* obj;
2812
2813 obj = enif_alloc_resource(rtype->res, rtype->size);
2814 if (obj) {
2815 if (parent) enif_keep_resource(parent);
2816 obj->opaque = ptr;
2817 obj->env = ecl;
2818 obj->parent = parent;
2819 if(version == -1) {
2820 version = parent ? parent->version : ecl->icd_version;
2821 }
2822 obj->version = (version < ecl->icd_version) ? version : ecl->icd_version;
2823 enif_rwlock_rwlock(ecl->ref_lock);
2824 lhash_insert_new(&ecl->ref, (void*)EPTR_HANDLE(ptr), obj);
2825 enif_rwlock_rwunlock(ecl->ref_lock);
2826 }
2827 return obj;
2828 }
2829 }
2830
ecl_make_object(ErlNifEnv * env,ecl_resource_t * rtype,void * ptr,ecl_object_t * parent)2831 static ERL_NIF_TERM ecl_make_object(ErlNifEnv* env, ecl_resource_t* rtype,
2832 void* ptr, ecl_object_t* parent)
2833 {
2834 ecl_object_t* obj = ecl_new(env,rtype,ptr,parent,-1);
2835 ERL_NIF_TERM res;
2836 res = make_object(env, rtype->type, obj);
2837 if (obj)
2838 enif_release_resource(obj);
2839 return res;
2840 }
2841
2842
2843 // lookup or create a new ecl_object_t resource
ecl_maybe_new(ErlNifEnv * env,ecl_resource_t * rtype,void * ptr,ecl_object_t * parent,bool * is_new)2844 static ecl_object_t* ecl_maybe_new(ErlNifEnv* env, ecl_resource_t* rtype,
2845 void* ptr, ecl_object_t* parent,
2846 bool* is_new)
2847 {
2848 ecl_object_t* obj = ecl_lookup(env, ptr);
2849 if (!obj) {
2850 obj = ecl_new(env, rtype, ptr, parent,-1);
2851 *is_new = true;
2852 }
2853 else
2854 *is_new = false;
2855 return obj;
2856 }
2857
2858
2859 // lookup or create resource object, return as erlang term
ecl_lookup_object(ErlNifEnv * env,ecl_resource_t * rtype,void * ptr,ecl_object_t * parent)2860 static ERL_NIF_TERM ecl_lookup_object(ErlNifEnv* env, ecl_resource_t* rtype,
2861 void* ptr, ecl_object_t* parent)
2862 {
2863 bool is_new;
2864 ERL_NIF_TERM res;
2865 ecl_object_t* obj = ecl_maybe_new(env,rtype,ptr,parent,&is_new);
2866
2867 res = make_object(env, rtype->type, obj);
2868 if (obj && is_new)
2869 enif_release_resource(obj);
2870 return res;
2871 }
2872
ecl_make_kernel(ErlNifEnv * env,cl_kernel kernel,ecl_object_t * parent)2873 static ERL_NIF_TERM ecl_make_kernel(ErlNifEnv* env, cl_kernel kernel,
2874 ecl_object_t* parent)
2875 {
2876 ecl_kernel_t* kern = (ecl_kernel_t*) ecl_new(env,&kernel_r,
2877 (void*)kernel,parent,-1);
2878 ERL_NIF_TERM res;
2879 cl_uint num_args;
2880 size_t sz;
2881
2882 // Get number of arguments, FIXME: check error return
2883 clGetKernelInfo(kernel,CL_KERNEL_NUM_ARGS,sizeof(num_args),&num_args,0);
2884 sz = num_args*sizeof(ecl_kernel_arg_t);
2885
2886 kern->arg = (ecl_kernel_arg_t*) enif_alloc(sz);
2887 memset(kern->arg, 0, sz);
2888 kern->num_args = num_args;
2889
2890 res = make_object(env, kernel_r.type, kern);
2891 if (kern)
2892 enif_release_resource(kern);
2893 return res;
2894 }
2895
ecl_make_event(ErlNifEnv * env,cl_event event,bool rd,bool rl,ErlNifEnv * bin_env,ErlNifBinary * bin,ecl_object_t * parent)2896 static ERL_NIF_TERM ecl_make_event(ErlNifEnv* env, cl_event event,
2897 bool rd, bool rl,
2898 ErlNifEnv* bin_env,
2899 ErlNifBinary* bin,
2900 ecl_object_t* parent)
2901 {
2902 ecl_event_t* evt = (ecl_event_t*) ecl_new(env,&event_r,
2903 (void*)event,parent,-1);
2904 ERL_NIF_TERM res;
2905 evt->bin_env = bin_env;
2906 evt->bin = bin;
2907 evt->rd = rd;
2908 evt->rl = rl;
2909 res = make_object(env, event_r.type, (ecl_object_t*) evt);
2910 if (evt)
2911 enif_release_resource(evt);
2912 return res;
2913 }
2914
ecl_make_context(ErlNifEnv * env,cl_context context,cl_int version)2915 static ERL_NIF_TERM ecl_make_context(ErlNifEnv* env, cl_context context, cl_int version)
2916 {
2917 ERL_NIF_TERM res;
2918 ecl_env_t* ecl;
2919 ecl_context_t* ctx = (ecl_context_t*) ecl_new(env,&context_r,
2920 (void*)context,0,version);
2921 ecl = ctx->obj.env;
2922 ctx->upgrade_count = 0; // first incarnation
2923 ctx->thr = ecl_thread_start(ecl_context_main, ctx, 8); // 8K stack!
2924 res = make_object(env, context_r.type, (ecl_object_t*) ctx);
2925 enif_rwlock_rwlock(ecl->context_list_lock);
2926 // link contexts for upgrade
2927 ctx->next = ecl->context_list;
2928 ecl->context_list = ctx;
2929 enif_rwlock_rwunlock(ecl->context_list_lock);
2930
2931 if (ctx)
2932 enif_release_resource(ctx);
2933 return res;
2934 }
2935
2936
2937 typedef cl_int CL_API_CALL info_fn_t(void* ptr, cl_uint param_name,
2938 size_t param_value_size,
2939 void* param_value, size_t* param_value_size_ret);
2940 typedef cl_int CL_API_CALL info2_fn_t(void* ptr1, void* ptr2, cl_uint param_name,
2941 size_t param_value_size,
2942 void* param_value, size_t* param_value_size_ret);
2943
2944 // return size of type
ecl_sizeof(ocl_type_t type)2945 static size_t ecl_sizeof(ocl_type_t type)
2946 {
2947 switch(type) {
2948 case OCL_CHAR: return sizeof(cl_char);
2949 case OCL_UCHAR: return sizeof(cl_uchar);
2950 case OCL_SHORT: return sizeof(cl_short);
2951 case OCL_USHORT: return sizeof(cl_ushort);
2952 case OCL_INT: return sizeof(cl_int);
2953 case OCL_UINT: return sizeof(cl_uint);
2954 case OCL_LONG: return sizeof(cl_long);
2955 case OCL_ULONG: return sizeof(cl_ulong);
2956 case OCL_HALF: return sizeof(cl_half);
2957 case OCL_FLOAT: return sizeof(cl_float);
2958 case OCL_DOUBLE: return sizeof(cl_double);
2959 case OCL_BOOL: return sizeof(cl_bool);
2960 case OCL_STRING: return sizeof(cl_char*);
2961 case OCL_ENUM: return sizeof(cl_int);
2962 case OCL_BITFIELD: return sizeof(cl_bitfield);
2963 case OCL_POINTER: return sizeof(void*);
2964 case OCL_SIZE: return sizeof(size_t);
2965 case OCL_PLATFORM: return sizeof(void*);
2966 case OCL_DEVICE: return sizeof(void*);
2967 case OCL_CONTEXT: return sizeof(void*);
2968 case OCL_PROGRAM: return sizeof(void*);
2969 case OCL_COMMAND_QUEUE: return sizeof(void*);
2970 case OCL_IMAGE_FORMAT: return sizeof(cl_image_format);
2971 #if CL_VERSION_1_2 == 1
2972 case OCL_DEVICE_PARTITION: return sizeof(cl_device_partition_property);
2973 #endif
2974 case OCL_NUM_TYPES:
2975 default:
2976 DBG("info_size: unknown type %d detected", type);
2977 return sizeof(cl_int);
2978 }
2979 }
2980
2981 // put basic value types
make_info_element(ErlNifEnv * env,ocl_type_t type,void * ptr,ecl_kv_t * kv)2982 static ERL_NIF_TERM make_info_element(ErlNifEnv* env, ocl_type_t type, void* ptr, ecl_kv_t* kv)
2983 {
2984 switch(type) {
2985 case OCL_CHAR: return enif_make_int(env, *((cl_char*)ptr));
2986 case OCL_SHORT: return enif_make_int(env, *((cl_short*)ptr));
2987 case OCL_INT: return enif_make_int(env, *((cl_int*)ptr));
2988 case OCL_LONG: return enif_make_int64(env, *((cl_long*)ptr));
2989 case OCL_UCHAR: return enif_make_uint(env, *((cl_uchar*)ptr));
2990 case OCL_USHORT: return enif_make_uint(env, *((cl_ushort*)ptr));
2991 case OCL_UINT: return enif_make_uint(env, *((cl_uint*)ptr));
2992 case OCL_HALF: return enif_make_uint(env, *((cl_half*)ptr));
2993 case OCL_ULONG: return enif_make_uint64(env, *((cl_ulong*)ptr));
2994 case OCL_SIZE: return ecl_make_sizet(env, *((size_t*)ptr));
2995 case OCL_FLOAT: return enif_make_double(env, *((cl_float*)ptr));
2996 case OCL_DOUBLE: return enif_make_double(env, *((cl_double*)ptr));
2997 case OCL_BOOL: return (*((cl_bool*)ptr)) ? ATOM(true) : ATOM(false);
2998 // case POINTER: cbuf_put_pointer(data, *((pointer_t*)ptr)); break;
2999 case OCL_STRING:
3000 return enif_make_string_len(env, (char*) ptr, strlen((char*) ptr), ERL_NIF_LATIN1);
3001
3002 case OCL_BITFIELD:
3003 return make_bitfields(env, *((cl_bitfield*)ptr), kv);
3004
3005 case OCL_ENUM:
3006 return make_enum(env, *((cl_int*)ptr), kv);
3007
3008 case OCL_POINTER:
3009 return ecl_make_sizet(env, *((intptr_t*)ptr));
3010
3011 case OCL_PLATFORM:
3012 return ecl_lookup_object(env,&platform_r,*(void**)ptr,0);
3013
3014 case OCL_DEVICE:
3015 return ecl_lookup_object(env,&device_r,*(void**)ptr,0);
3016
3017 case OCL_CONTEXT:
3018 return ecl_lookup_object(env,&context_r,*(void**)ptr,0);
3019
3020 case OCL_PROGRAM:
3021 // FIXME: find context object, pass as parent
3022 return ecl_lookup_object(env,&program_r,*(void**)ptr,0);
3023
3024 case OCL_COMMAND_QUEUE:
3025 // FIXME: find context object, pass as parent
3026 return ecl_lookup_object(env,&command_queue_r,*(void**)ptr,0);
3027
3028 case OCL_IMAGE_FORMAT: {
3029 cl_image_format* fmt = (cl_image_format*) ptr;
3030 ERL_NIF_TERM channel_order;
3031 ERL_NIF_TERM channel_type;
3032 channel_order = make_enum(env,fmt->image_channel_order,
3033 kv_channel_order);
3034 channel_type = make_enum(env,fmt->image_channel_data_type,
3035 kv_channel_type);
3036 return enif_make_tuple2(env, channel_order, channel_type);
3037 }
3038 #if CL_VERSION_1_2 == 1
3039 case OCL_DEVICE_PARTITION: { // cl_device_partition_property
3040 cl_device_partition_property* prop = (cl_device_partition_property*)ptr;
3041 ERL_NIF_TERM term = ATOM(undefined);
3042
3043 switch(*prop++) {
3044 case CL_DEVICE_PARTITION_EQUALLY:
3045 term = enif_make_uint(env, *prop);
3046 return enif_make_tuple2(env, ATOM(equally), term);
3047 case CL_DEVICE_PARTITION_BY_COUNTS: {
3048 cl_device_partition_property* pp = prop;
3049 term = enif_make_list(env, 0);
3050 while(*pp != CL_DEVICE_PARTITION_BY_COUNTS_LIST_END)
3051 pp++;
3052 if (pp > prop) { // build list backwards
3053 pp--;
3054 while(pp >= prop) {
3055 ERL_NIF_TERM ui = enif_make_uint(env, *pp);
3056 term = enif_make_list_cell(env, ui, term);
3057 pp--;
3058 }
3059 }
3060 return enif_make_tuple2(env, ATOM(by_counts), term);
3061 }
3062 case CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN:
3063 switch(*prop) {
3064 case CL_DEVICE_AFFINITY_DOMAIN_NUMA:
3065 term = ATOM(numa); break;
3066 case CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE:
3067 term = ATOM(l4_cache); break;
3068 case CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE:
3069 term = ATOM(l3_cache); break;
3070 case CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE:
3071 term = ATOM(l2_cache); break;
3072 case CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE:
3073 term = ATOM(l1_cache); break;
3074 case CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE:
3075 term = ATOM(next_partitionable); break;
3076 default: return ATOM(undefined);
3077 }
3078 return enif_make_tuple2(env, ATOM(by_affinity_domain), term);
3079
3080 default:
3081 return ATOM(undefined);
3082 }
3083 break;
3084 }
3085 #endif
3086 case OCL_NUM_TYPES:
3087 default:
3088 return ATOM(undefined);
3089 }
3090 }
3091
3092
make_info_value(ErlNifEnv * env,ecl_info_t * iptr,void * buf,size_t buflen)3093 static ERL_NIF_TERM make_info_value(ErlNifEnv* env, ecl_info_t* iptr, void* buf, size_t buflen)
3094 {
3095 char* dptr = (char*) buf;
3096 ERL_NIF_TERM value;
3097
3098 if (iptr->is_array) { // arrays are return as lists of items
3099 ERL_NIF_TERM list = enif_make_list(env, 0);
3100 size_t elem_size = ecl_sizeof(iptr->info_type);
3101 size_t n = (buflen / elem_size);
3102 dptr += (n*elem_size); // run backwards!!!
3103 while (buflen >= elem_size) {
3104 dptr -= elem_size;
3105 value = make_info_element(env, iptr->info_type, dptr, iptr->extern_info);
3106 list = enif_make_list_cell(env, value, list);
3107 buflen -= elem_size;
3108 }
3109 value = list;
3110 }
3111 else {
3112 value = make_info_element(env, iptr->info_type, dptr, iptr->extern_info);
3113 }
3114 return value;
3115 }
3116
3117 // Find object value
3118 // return {ok,Value} | {error,Reason} | exception badarg
3119 //
make_object_info(ErlNifEnv * env,ERL_NIF_TERM key,ecl_object_t * obj,info_fn_t * func,ecl_info_t * info,size_t num_info)3120 ERL_NIF_TERM make_object_info(ErlNifEnv* env, ERL_NIF_TERM key, ecl_object_t* obj, info_fn_t* func,
3121 ecl_info_t* info, size_t num_info)
3122 {
3123 size_t returned_size = 0;
3124 size_t size = MAX_INFO_SIZE;
3125 unsigned char buf[MAX_INFO_SIZE];
3126 void* ptr = buf;
3127 ERL_NIF_TERM res;
3128 cl_int err;
3129 unsigned int i;
3130
3131 if (!enif_is_atom(env, key))
3132 return enif_make_badarg(env);
3133 i = 0;
3134 while((i < num_info) && (*info[i].info_key != key))
3135 i++;
3136 if (i == num_info)
3137 return enif_make_badarg(env); // or error ?
3138
3139 err = (*func)(obj->opaque,info[i].info_id,size,ptr,&returned_size);
3140 if (err == CL_INVALID_VALUE) {
3141 // try again allocate returned_size, returned_size does not
3142 // (yet) return the actual needed bytes (by spec)
3143 // but it looks like it... ;-)
3144 size = returned_size;
3145 if (!(ptr = enif_alloc(size)))
3146 return ecl_make_error(env, CL_OUT_OF_HOST_MEMORY);
3147 err = (*func)(obj->opaque,info[i].info_id,size,ptr,&returned_size);
3148 }
3149
3150 if (!err) {
3151 res = enif_make_tuple2(env, ATOM(ok),
3152 make_info_value(env,&info[i],ptr,returned_size));
3153 }
3154 else
3155 res = ecl_make_error(env, err);
3156 if (ptr != buf)
3157 enif_free(ptr);
3158 return res;
3159 }
3160
3161
make_object_info2(ErlNifEnv * env,ERL_NIF_TERM key,ecl_object_t * obj1,void * obj2,info2_fn_t * func,ecl_info_t * info,size_t num_info)3162 ERL_NIF_TERM make_object_info2(ErlNifEnv* env, ERL_NIF_TERM key, ecl_object_t* obj1, void* obj2,
3163 info2_fn_t* func, ecl_info_t* info, size_t num_info)
3164 {
3165 size_t returned_size = 0;
3166 cl_long *buf;
3167 cl_int err;
3168 unsigned int i;
3169 ERL_NIF_TERM result;
3170
3171 if (!enif_is_atom(env, key))
3172 return enif_make_badarg(env);
3173 i = 0;
3174 while((i < num_info) && (*info[i].info_key != key))
3175 i++;
3176 if (i == num_info)
3177 return enif_make_badarg(env); // or error ?
3178 returned_size = info[i].def_size;
3179 if (returned_size > 0 ||
3180 !(err = (*func)(obj1->opaque, obj2, info[i].info_id,
3181 0, NULL, &returned_size))) {
3182 if (!(buf = enif_alloc(returned_size)))
3183 return ecl_make_error(env, CL_OUT_OF_RESOURCES);
3184 if (!(err = (*func)(obj1->opaque, obj2, info[i].info_id,
3185 returned_size, buf, &returned_size))) {
3186 result = enif_make_tuple2(env, ATOM(ok), make_info_value(env, &info[i], buf, returned_size));
3187 enif_free(buf);
3188 return result;
3189 }
3190 }
3191 return ecl_make_error(env, err);
3192 }
3193
3194 /******************************************************************************
3195 *
3196 * main ecl event loop run as a thread.
3197 * The main purpose is to dispatch and send messages to owners
3198 *
3199 *****************************************************************************/
3200
ecl_context_main(void * arg)3201 static void* ecl_context_main(void* arg)
3202 {
3203 ecl_thread_t* self = arg;
3204 ecl_context_t* ctx = self->arg;
3205
3206 ctx->upgrade_count++; // signal that we have started/upgraded
3207
3208 DBG("ecl_context_main: started (addr=%p,tid=%p,count=%d)",
3209 &self, self->tid, ctx->upgrade_count);
3210
3211 while(1) {
3212 ecl_message_t m;
3213 ecl_message_recv(self, &m);
3214
3215 switch(m.type) {
3216 case ECL_MESSAGE_UPGRADE:
3217 DBG("ecl_context_main: %p got upgrade func=%p",
3218 self, m.upgrade);
3219 // upgrade must never return and SHOULD be tail recursive!
3220 return (m.upgrade)(arg);
3221
3222 case ECL_MESSAGE_SYNC:
3223 DBG("ecl_context_main: %p got sync", self);
3224 m.type = ECL_MESSAGE_SYNC_ACK;
3225 ecl_queue_put(&ctx->obj.env->q, &m);
3226 break;
3227
3228 case ECL_MESSAGE_SYNC_ACK:
3229 // Should not end up here
3230 DBG("ecl_context_main: sync ack received");
3231 break;
3232
3233 case ECL_MESSAGE_STOP: {
3234 DBG("ecl_context_main: stopped by command");
3235 if (m.env) {
3236 enif_send(0, &m.sender, m.env,
3237 enif_make_tuple3(m.env,
3238 ATOM(cl_async), m.ref,
3239 ATOM(ok)));
3240 enif_free_env(m.env);
3241 }
3242 ecl_thread_exit(self);
3243 break;
3244 }
3245
3246 case ECL_MESSAGE_FLUSH: { // flush message queue
3247 cl_int err;
3248
3249 DBG("ecl_context_main: flush q=%lu", (unsigned long) m.queue);
3250 err = ECL_CALL(clFlush)(m.queue->queue);
3251 // send {cl_async, Ref, ok | {error,Reason}}
3252 if (m.env) {
3253 ERL_NIF_TERM reply;
3254 int res;
3255 UNUSED(res);
3256 reply = !err ? ATOM(ok) : ecl_make_error(m.env, err);
3257 res = enif_send(0, &m.sender, m.env,
3258 enif_make_tuple3(m.env,
3259 ATOM(cl_async),
3260 m.ref,
3261 reply));
3262 DBG("ecl_context_main: send r=%d", res);
3263 enif_free_env(m.env);
3264 }
3265 enif_release_resource(m.queue);
3266 break;
3267 }
3268
3269 case ECL_MESSAGE_FINISH: { // finish message queue
3270 cl_int err;
3271 DBG("ecl_context_main: finish q=%lu", (unsigned long) m.queue);
3272 err = ECL_CALL(clFlush)(m.queue->queue);
3273 // send {cl_async, Ref, ok | {error,Reason}}
3274 if (m.env) {
3275 int res;
3276 ERL_NIF_TERM reply;
3277 UNUSED(res);
3278 reply = !err ? ATOM(ok) : ecl_make_error(m.env, err);
3279 res = enif_send(0, &m.sender, m.env,
3280 enif_make_tuple3(m.env,
3281 ATOM(cl_async), m.ref,
3282 reply));
3283 DBG("ecl_context_main: send r=%d", res);
3284 enif_free_env(m.env);
3285 }
3286 enif_release_resource(m.queue);
3287 break;
3288 }
3289
3290 case ECL_MESSAGE_WAIT_FOR_EVENT: { // wait for one event
3291 cl_int err;
3292 cl_event list[1];
3293 DBG("ecl_context_main: wait_for_event e=%lu",
3294 (unsigned long) m.event);
3295 list[0] = m.event->obj.event;
3296 err = ECL_CALL(clWaitForEvents)(1, list);
3297 DBG("ecl_context_main: wait_for_event err=%d", err);
3298 // reply to caller pid !
3299 if (m.env) {
3300 ERL_NIF_TERM reply;
3301 int res;
3302
3303 UNUSED(res);
3304 if (!err) {
3305 cl_int status;
3306 // read status COMPLETE | ERROR
3307 // FIXME: check error
3308 clGetEventInfo(m.event->obj.event,
3309 CL_EVENT_COMMAND_EXECUTION_STATUS,
3310 sizeof(status), &status, 0);
3311 switch(status) {
3312 case CL_COMPLETE:
3313 DBG("ecl_context_main: wait_for_event complete");
3314 if (m.event->bin && m.event->rd) {
3315 m.event->rl = true;
3316 reply = enif_make_binary(m.env, m.event->bin);
3317 }
3318 else
3319 reply = ATOM(complete);
3320 break;
3321 default:
3322 DBG("ecl_context_main: wait_for_event: status=%d",
3323 status);
3324 // must/should be an error
3325 reply = ecl_make_error(m.env, status);
3326 break;
3327 }
3328 }
3329 else
3330 reply = ecl_make_error(m.env, err);
3331 res = enif_send(0, &m.sender, m.env,
3332 enif_make_tuple3(m.env,
3333 ATOM(cl_event), m.ref,
3334 reply));
3335 DBG("ecl_context_main: send r=%d", res);
3336 enif_free_env(m.env);
3337 }
3338 enif_release_resource(m.event);
3339 break;
3340 }
3341 default:
3342 break;
3343 }
3344 }
3345 return 0;
3346 }
3347
3348
3349 //
3350 // API functions
3351 //
3352
3353 // noop - no operation for NIF interface performance benchmarking
ecl_noop(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])3354 static ERL_NIF_TERM ecl_noop(ErlNifEnv* env, int argc,
3355 const ERL_NIF_TERM argv[])
3356 {
3357 UNUSED(env);
3358 UNUSED(argc);
3359 UNUSED(argv);
3360 return ATOM(ok);
3361 }
3362
3363 // version - return list of API versions supported
ecl_versions(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])3364 static ERL_NIF_TERM ecl_versions(ErlNifEnv* env, int argc,
3365 const ERL_NIF_TERM argv[])
3366 {
3367 ERL_NIF_TERM list = enif_make_list(env, 0);
3368 ERL_NIF_TERM vsn;
3369 UNUSED(env);
3370 UNUSED(argc);
3371 UNUSED(argv);
3372
3373 #if CL_VERSION_1_0 == 1
3374 vsn = enif_make_tuple2(env, enif_make_int(env, 1), enif_make_int(env, 0));
3375 list = enif_make_list_cell(env, vsn, list);
3376 #endif
3377 #if CL_VERSION_1_1 == 1
3378 vsn = enif_make_tuple2(env, enif_make_int(env, 1), enif_make_int(env, 1));
3379 list = enif_make_list_cell(env, vsn, list);
3380 #endif
3381 #if CL_VERSION_1_2 == 1
3382 vsn = enif_make_tuple2(env, enif_make_int(env, 1), enif_make_int(env, 2));
3383 list = enif_make_list_cell(env, vsn, list);
3384 #endif
3385 return list;
3386 }
3387
ecl_get_platform_ids(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])3388 static ERL_NIF_TERM ecl_get_platform_ids(ErlNifEnv* env, int argc,
3389 const ERL_NIF_TERM argv[])
3390 {
3391 cl_uint num_platforms;
3392 cl_platform_id platform_id[MAX_PLATFORMS];
3393 ERL_NIF_TERM idv[MAX_PLATFORMS];
3394 ERL_NIF_TERM platform_list;
3395 cl_uint i;
3396 cl_int err;
3397 UNUSED(argc);
3398 UNUSED(argv);
3399
3400 if ((err = ECL_CALL(clGetPlatformIDs)(MAX_PLATFORMS, platform_id, &num_platforms)))
3401 return ecl_make_error(env, err);
3402
3403 for (i = 0; i < num_platforms; i++)
3404 idv[i] = ecl_lookup_object(env,&platform_r,platform_id[i],0);
3405
3406 platform_list = enif_make_list_from_array(env, idv,num_platforms);
3407 return enif_make_tuple2(env, ATOM(ok), platform_list);
3408 }
3409
ecl_get_platform_info(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])3410 static ERL_NIF_TERM ecl_get_platform_info(ErlNifEnv* env, int argc,
3411 const ERL_NIF_TERM argv[])
3412 {
3413 ecl_object_t* o_platform;
3414 UNUSED(argc);
3415
3416 if (!get_ecl_object(env, argv[0], &platform_r, false, &o_platform))
3417 return enif_make_badarg(env);
3418 return make_object_info(env, argv[1], o_platform,
3419 (info_fn_t*) ECL_FUNC_PTR(clGetPlatformInfo),
3420 platform_info,
3421 sizeof_array(platform_info));
3422 }
3423
3424
ecl_get_device_ids(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])3425 static ERL_NIF_TERM ecl_get_device_ids(ErlNifEnv* env, int argc,
3426 const ERL_NIF_TERM argv[])
3427 {
3428 cl_device_type device_type = 0;
3429 cl_device_id device_id[MAX_DEVICES];
3430 ERL_NIF_TERM idv[MAX_DEVICES];
3431 ERL_NIF_TERM device_list;
3432 cl_uint num_devices;
3433 cl_uint i;
3434 cl_platform_id platform;
3435 cl_int err;
3436 UNUSED(argc);
3437
3438 if (!get_object(env, argv[0], &platform_r, true,(void**)&platform))
3439 return enif_make_badarg(env);
3440 if (!get_bitfields(env, argv[1], &device_type, kv_device_type))
3441 return enif_make_badarg(env);
3442 if ((err = ECL_CALL(clGetDeviceIDs)(platform, device_type, MAX_DEVICES,
3443 device_id, &num_devices)))
3444 return ecl_make_error(env, err);
3445
3446 for (i = 0; i < num_devices; i++)
3447 idv[i] = ecl_lookup_object(env, &device_r, device_id[i], 0);
3448 device_list = enif_make_list_from_array(env, idv, num_devices);
3449 return enif_make_tuple2(env, ATOM(ok), device_list);
3450 }
3451
3452 #if CL_VERSION_1_2 == 1
3453
3454 // parse:
3455 // {equally,<unsigned int>} |
3456 // {by_counts, [<unsigned_int>]}
3457 // {by_affinity_domain, num|l4_cache|l3_cache|l2_cache|l1_cache|
3458 // next_partiionable}
3459 //
get_partition_properties(ErlNifEnv * env,const ERL_NIF_TERM term,cl_device_partition_property * rvec,size_t * rlen)3460 static int get_partition_properties(ErlNifEnv* env, const ERL_NIF_TERM term,
3461 cl_device_partition_property* rvec,
3462 size_t* rlen)
3463 {
3464 const ERL_NIF_TERM* elem;
3465 int arity;
3466 size_t maxlen = *rlen;
3467 size_t n = 0;
3468
3469 if (!enif_get_tuple(env, term, &arity, &elem))
3470 return 0;
3471 if (arity != 2)
3472 return 0;
3473 if (!enif_is_atom(env, elem[0]))
3474 return 0;
3475
3476 if (elem[0] == ATOM(equally)) {
3477 unsigned long v;
3478 *rvec++ = CL_DEVICE_PARTITION_EQUALLY;
3479 if (!enif_get_ulong(env, elem[1], &v))
3480 return 0;
3481 *rvec++ = v;
3482 n=2;
3483 }
3484 else if (elem[0] == ATOM(by_counts)) {
3485 ERL_NIF_TERM head, tail;
3486 ERL_NIF_TERM list = elem[1];
3487 unsigned long v;
3488 *rvec++ = CL_DEVICE_PARTITION_BY_COUNTS;
3489 n++;
3490 while((n < maxlen-1) &&
3491 enif_get_list_cell(env, list, &head, &tail)) {
3492 if (!enif_get_ulong(env, head, &v))
3493 return 0;
3494 *rvec++=v;
3495 n++;
3496 list = tail;
3497 }
3498 if (!enif_is_empty_list(env, list))
3499 return 0;
3500 *rvec++ = CL_DEVICE_PARTITION_BY_COUNTS_LIST_END;
3501 n++;
3502 }
3503 else if (elem[0] == ATOM(by_affinity_domain)) {
3504 *rvec++ = CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN;
3505 if (elem[1] == ATOM(numa))
3506 *rvec++ = CL_DEVICE_AFFINITY_DOMAIN_NUMA;
3507 else if (elem[1] == ATOM(l4_cache))
3508 *rvec++ = CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE;
3509 else if (elem[1] == ATOM(l3_cache))
3510 *rvec++ = CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE;
3511 else if (elem[1] == ATOM(l2_cache))
3512 *rvec++ = CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE;
3513 else if (elem[1] == ATOM(l1_cache))
3514 *rvec++ = CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE;
3515 else if (elem[1] == ATOM(next_partitionable))
3516 *rvec++ = CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE;
3517 else
3518 return 0;
3519 n = 2;
3520 }
3521 else
3522 return 0;
3523 *rlen = n;
3524 return 1;
3525 }
3526
ecl_create_sub_devices(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])3527 static ERL_NIF_TERM ecl_create_sub_devices(ErlNifEnv* env, int argc,
3528 const ERL_NIF_TERM argv[])
3529 {
3530 ecl_object_t* d;
3531 cl_device_id out_devices[MAX_DEVICES];
3532 ERL_NIF_TERM idv[MAX_DEVICES];
3533 ERL_NIF_TERM device_list;
3534 cl_uint num_devices;
3535 cl_uint i;
3536 cl_device_partition_property properties[128];
3537 size_t num_property = 128-1;
3538 cl_int err;
3539 UNUSED(argc);
3540
3541 // fixme calc length of properties !
3542 if (!get_ecl_object(env, argv[0], &device_r, false, &d))
3543 return enif_make_badarg(env);
3544 if (!get_partition_properties(env, argv[1], properties, &num_property))
3545 return enif_make_badarg(env);
3546 properties[num_property] = 0;
3547
3548 err = ECL_CALL(clCreateSubDevices)(d->device, properties, MAX_DEVICES,
3549 out_devices, &num_devices);
3550 if (err)
3551 return ecl_make_error(env, err);
3552 for (i = 0; i < num_devices; i++) {
3553 ecl_object_t* obj;
3554 if ((obj = ecl_lookup(env, out_devices[i])) == NULL)
3555 obj = ecl_new(env, &device_r, out_devices[i], 0, d->version);
3556 idv[i] = make_object(env, device_r.type, obj);
3557 }
3558 device_list = enif_make_list_from_array(env, idv, num_devices);
3559 return enif_make_tuple2(env, ATOM(ok), device_list);
3560 }
3561 #endif
3562
3563
ecl_get_device_info(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])3564 static ERL_NIF_TERM ecl_get_device_info(ErlNifEnv* env, int argc,
3565 const ERL_NIF_TERM argv[])
3566 {
3567 ecl_object_t* o_device;
3568 UNUSED(argc);
3569
3570 if (!get_ecl_object(env, argv[0], &device_r, false, &o_device))
3571 return enif_make_badarg(env);
3572 return make_object_info(env, argv[1], o_device,
3573 (info_fn_t*) clGetDeviceInfo,
3574 device_info,
3575 sizeof_array(device_info));
3576 }
3577
3578 typedef struct {
3579 ErlNifPid sender; // sender pid
3580 ErlNifEnv* s_env; // senders message environment (ref, bin's etc)
3581 ErlNifEnv* r_env; // receiver message environment (ref, bin's etc)
3582 ErlNifTid tid; // Calling thread
3583 } ecl_notify_data_t;
3584
ecl_context_notify(const char * errinfo,const void * private_info,size_t cb,void * user_data)3585 void CL_CALLBACK ecl_context_notify(const char *errinfo,
3586 const void* private_info, size_t cb,
3587 void * user_data)
3588 {
3589 /* ecl_notify_data_t* bp = user_data; */
3590 /* ERL_NIF_TERM reply; */
3591 /* ErlNifEnv* s_env; */
3592 /* int res; */
3593 UNUSED(errinfo);
3594 UNUSED(private_info);
3595 UNUSED(cb);
3596 UNUSED(user_data);
3597
3598 DBG("ecl_context_notify: user_data=%p", user_data);
3599 DBG("ecl_context_notify: priv_info=%p cb=%d", private_info, cb);
3600 CL_ERROR("CL ERROR ASYNC: %s", errinfo);
3601 }
3602
3603 //
3604 // cl:create_context([cl_device_id()]) ->
3605 // {ok, cl_context()} | {error, cl_error()}
3606 //
ecl_create_context(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])3607 static ERL_NIF_TERM ecl_create_context(ErlNifEnv* env, int argc,
3608 const ERL_NIF_TERM argv[])
3609 {
3610 cl_device_id device_list[MAX_DEVICES];
3611 cl_uint num_devices = MAX_DEVICES;
3612 cl_context context;
3613 cl_int err;
3614 ecl_notify_data_t* bp;
3615
3616 UNUSED(argc);
3617
3618 if (!get_object_list(env, argv[0], &device_r, false,
3619 (void**) device_list, &num_devices))
3620 return enif_make_badarg(env);
3621
3622 if (!(bp = enif_alloc(sizeof(ecl_notify_data_t))))
3623 return ecl_make_error(env, CL_OUT_OF_RESOURCES); // enomem?
3624
3625 if (!(bp->r_env = enif_alloc_env())) {
3626 enif_free(bp);
3627 return ecl_make_error(env, CL_OUT_OF_RESOURCES); // enomem?
3628 }
3629 (void) enif_self(env, &bp->sender);
3630 bp->s_env = env;
3631 bp->tid = enif_thread_self();
3632 DBG("ecl_create_context: self %p", bp->tid);
3633
3634 context = ECL_CALL(clCreateContext)(0, num_devices, device_list,
3635 ecl_context_notify,
3636 bp,
3637 &err);
3638 if (context) {
3639 cl_uint i;
3640 ERL_NIF_TERM t;
3641 ecl_object_t *dev;
3642 cl_int version = 100;
3643 for(i = 0; i < num_devices; i++) {
3644 dev = ecl_lookup(env, device_list[i]);
3645 /* Should hopefully be the same for all devices ?
3646 use the least version */
3647 if(dev->version < version)
3648 version = dev->version;
3649 }
3650 t = ecl_make_context(env, context, version);
3651 return enif_make_tuple2(env, ATOM(ok), t);
3652 }
3653 return ecl_make_error(env, err);
3654 }
3655
ecl_get_context_info(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])3656 static ERL_NIF_TERM ecl_get_context_info(ErlNifEnv* env, int argc,
3657 const ERL_NIF_TERM argv[])
3658 {
3659 ecl_object_t* o_context;
3660 UNUSED(argc);
3661
3662 if (!get_ecl_object(env, argv[0], &context_r, false, &o_context))
3663 return enif_make_badarg(env);
3664 return make_object_info(env, argv[1], o_context,
3665 (info_fn_t*) ECL_FUNC_PTR(clGetContextInfo),
3666 context_info,
3667 sizeof_array(context_info));
3668 }
3669
ecl_create_queue(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])3670 static ERL_NIF_TERM ecl_create_queue(ErlNifEnv* env, int argc,
3671 const ERL_NIF_TERM argv[])
3672 {
3673 ecl_object_t* o_context;
3674 cl_device_id device;
3675 cl_command_queue_properties properties;
3676 cl_command_queue queue;
3677 cl_int err;
3678 UNUSED(argc);
3679
3680 if (!get_ecl_object(env, argv[0], &context_r, false, &o_context))
3681 return enif_make_badarg(env);
3682 if (!get_object(env, argv[1], &device_r, false, (void**) &device))
3683 return enif_make_badarg(env);
3684 if (!get_bitfields(env, argv[2], &properties,
3685 kv_command_queue_properties))
3686 return enif_make_badarg(env);
3687 queue = ECL_CALL(clCreateCommandQueue)(o_context->context, device, properties,
3688 &err);
3689 if (queue) {
3690 ERL_NIF_TERM t;
3691 t = ecl_make_object(env, &command_queue_r,(void*) queue, o_context);
3692 return enif_make_tuple2(env, ATOM(ok), t);
3693 }
3694 return ecl_make_error(env, err);
3695 }
3696
ecl_get_queue_info(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])3697 static ERL_NIF_TERM ecl_get_queue_info(ErlNifEnv* env, int argc,
3698 const ERL_NIF_TERM argv[])
3699 {
3700 ecl_object_t* o_queue;
3701 UNUSED(argc);
3702
3703 if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
3704 return enif_make_badarg(env);
3705 return make_object_info(env, argv[1], o_queue,
3706 (info_fn_t*) ECL_FUNC_PTR(clGetCommandQueueInfo),
3707 queue_info,
3708 sizeof_array(queue_info));
3709 }
3710
3711
ecl_create_buffer(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])3712 static ERL_NIF_TERM ecl_create_buffer(ErlNifEnv* env, int argc,
3713 const ERL_NIF_TERM argv[])
3714 {
3715 ecl_object_t* o_context;
3716 size_t size;
3717 cl_mem_flags mem_flags;
3718 cl_mem mem;
3719 ErlNifBinary bin;
3720 void* host_ptr = 0;
3721 cl_int err;
3722 UNUSED(argc);
3723
3724
3725 if (!get_ecl_object(env, argv[0], &context_r, false, &o_context))
3726 return enif_make_badarg(env);
3727 if (!get_bitfields(env, argv[1], &mem_flags, kv_mem_flags))
3728 return enif_make_badarg(env);
3729 if (!ecl_get_sizet(env, argv[2], &size))
3730 return enif_make_badarg(env);
3731 if (!enif_inspect_iolist_as_binary(env, argv[3], &bin))
3732 return enif_make_badarg(env);
3733 // How do we keep binary data (CL_MEM_USE_HOST_PTR)
3734 // We should probably make sure that the buffer is read_only in this
3735 // case!
3736 // we must be able to reference count the binary object!
3737 // USE enif_make_copy !!!! this copy is done to the thread environment!
3738 if (bin.size > 0) {
3739 host_ptr = bin.data;
3740 mem_flags |= CL_MEM_COPY_HOST_PTR;
3741 if (size < bin.size)
3742 size = bin.size;
3743 }
3744 else if (size)
3745 mem_flags |= CL_MEM_ALLOC_HOST_PTR;
3746
3747 mem = ECL_CALL(clCreateBuffer)(o_context->context, mem_flags, size,
3748 host_ptr, &err);
3749
3750 if (!err) {
3751 ERL_NIF_TERM t;
3752 t = ecl_make_object(env, &mem_r,(void*) mem, o_context);
3753 return enif_make_tuple2(env, ATOM(ok), t);
3754 }
3755 return ecl_make_error(env, err);
3756 }
3757
3758 #if CL_VERSION_1_1 == 1
ecl_create_sub_buffer(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])3759 static ERL_NIF_TERM ecl_create_sub_buffer(ErlNifEnv* env, int argc,
3760 const ERL_NIF_TERM argv[])
3761 {
3762 ecl_object_t* o_buf;
3763 cl_mem_flags mem_flags;
3764 cl_mem mem;
3765 ERL_NIF_TERM info;
3766 ERL_NIF_TERM info_arg1, info_arg2;
3767 cl_buffer_region reg;
3768 cl_int err;
3769 UNUSED(argc);
3770
3771 if (!get_ecl_object(env, argv[0], &mem_r, false, &o_buf))
3772 return enif_make_badarg(env);
3773 if (!get_bitfields(env, argv[1], &mem_flags, kv_mem_flags))
3774 return enif_make_badarg(env);
3775 if (!enif_is_atom(env, argv[2]) || (argv[2] != ATOM(region)))
3776 return enif_make_badarg(env);
3777 info = argv[3];
3778 if (!enif_is_list(env, info))
3779 return enif_make_badarg(env);
3780 enif_get_list_cell(env, info, &info_arg1, &info);
3781 if (!enif_is_list(env, info))
3782 return enif_make_badarg(env);
3783 enif_get_list_cell(env, info, &info_arg2, &info);
3784 if (!enif_is_empty_list(env, info))
3785 return enif_make_badarg(env);
3786 if (!ecl_get_sizet(env, info_arg1, ®.origin))
3787 return enif_make_badarg(env);
3788 if (!ecl_get_sizet(env, info_arg2, ®.size))
3789 return enif_make_badarg(env);
3790
3791 mem = ECL_CALL(clCreateSubBuffer)(o_buf->mem, mem_flags,
3792 CL_BUFFER_CREATE_TYPE_REGION,
3793 ®, &err);
3794 if (!err) {
3795 ERL_NIF_TERM t;
3796 t = ecl_make_object(env, &mem_r,(void*) mem, o_buf);
3797 return enif_make_tuple2(env, ATOM(ok), t);
3798 }
3799 return ecl_make_error(env, err);
3800 }
3801 #endif
3802 //
3803 // format {channel_order, channel_data_type} (old) |
3804 // {'cl_image_format', order, data_type }
3805 //
get_image_format(ErlNifEnv * env,ERL_NIF_TERM arg,cl_image_format * format)3806 static int get_image_format(ErlNifEnv* env, ERL_NIF_TERM arg,
3807 cl_image_format* format)
3808 {
3809 const ERL_NIF_TERM* rec;
3810 int i, arity;
3811
3812 if (!enif_get_tuple(env, arg, &arity, &rec))
3813 return 0;
3814 if (arity == 2)
3815 i = 0;
3816 else if (arity == 3) {
3817 i = 1;
3818 if (!enif_is_atom(env, rec[0]) || (rec[0] != ATOM(cl_image_format)))
3819 return 0;
3820 }
3821 else
3822 return 0;
3823
3824 if (!get_enum(env, rec[i], &format->image_channel_order,
3825 kv_channel_order))
3826 return 0;
3827 if (!get_enum(env, rec[i+1], &format->image_channel_data_type,
3828 kv_channel_type))
3829 return 0;
3830 return 1;
3831 }
3832
3833 //
3834 // format {'cl_image_desc',image_type,image_width,image_height,image_depth,
3835 // image_array_size,image_row_pitch,image_slice_pitch,
3836 // num_mip_levels,num_samples,buffer}
3837 //
get_image_desc(ErlNifEnv * env,ERL_NIF_TERM arg,cl_image_desc * desc)3838 static int get_image_desc(ErlNifEnv* env, ERL_NIF_TERM arg,
3839 cl_image_desc* desc)
3840 {
3841 const ERL_NIF_TERM* rec;
3842 int arity;
3843
3844 if (!enif_get_tuple(env, arg, &arity, &rec) || (arity != 11))
3845 return 0;
3846
3847 if (!enif_is_atom(env, rec[0]) || (rec[0] != ATOM(cl_image_desc)))
3848 return 0;
3849
3850 if (!get_enum(env, rec[1], &desc->image_type, kv_mem_object_type))
3851 return 0;
3852 if (!ecl_get_sizet(env, rec[2], &desc->image_width))
3853 return 0;
3854 if (!ecl_get_sizet(env, rec[3], &desc->image_height))
3855 return 0;
3856 if (!ecl_get_sizet(env, rec[4], &desc->image_depth))
3857 return 0;
3858 if (!ecl_get_sizet(env, rec[5], &desc->image_array_size))
3859 return 0;
3860 if (!ecl_get_sizet(env, rec[6], &desc->image_row_pitch))
3861 return 0;
3862 if (!ecl_get_sizet(env, rec[7], &desc->image_slice_pitch))
3863 return 0;
3864 desc->num_mip_levels = 0; // rec[8] according to spec
3865 desc->num_samples = 0; // rec[9] according to spec
3866 if (!get_object(env, rec[10], &mem_r, true, (void**)&desc->buffer))
3867 return 0;
3868 return 1;
3869 }
3870
3871 // 1.0, 1,1 wrapper where clCreateImage
e_clCreateImage(cl_context context,cl_mem_flags flags,const cl_image_format image_format,const cl_image_desc * image_desc,void * host_ptr,cl_int * errcode_ret)3872 cl_mem CL_CALLBACK e_clCreateImage(cl_context context,
3873 cl_mem_flags flags,
3874 const cl_image_format image_format,
3875 const cl_image_desc* image_desc,
3876 void* host_ptr,
3877 cl_int* errcode_ret)
3878 {
3879 UNUSED(context);
3880 UNUSED(flags);
3881 UNUSED(image_format);
3882 UNUSED(image_desc);
3883 UNUSED(host_ptr);
3884
3885 *errcode_ret = CL_INVALID_OPERATION;
3886 return NULL;
3887 }
3888
3889 // 1.2 -> 1.0 wrapper: clCreateImage2D using clCreateImage
eclCreateImage2D(cl_context context,cl_mem_flags mem_flags,const cl_image_format * format,size_t width,size_t height,size_t row_pitch,void * host_ptr,cl_int * err)3890 cl_mem CL_CALLBACK eclCreateImage2D(cl_context context,
3891 cl_mem_flags mem_flags,
3892 const cl_image_format * format,
3893 size_t width,
3894 size_t height,
3895 size_t row_pitch,
3896 void * host_ptr,
3897 cl_int *err)
3898 {
3899 cl_image_desc desc;
3900
3901 desc.image_type = CL_MEM_OBJECT_IMAGE2D;
3902 desc.image_width = width;
3903 desc.image_height = height;
3904 desc.image_depth = 1; // used with IMAGE3D
3905 desc.image_array_size = 1; // used with IMAGE2D/3D_ARRAY?
3906 desc.image_row_pitch = row_pitch;
3907 desc.image_slice_pitch = 0; // maybe 0 for 2D image
3908 desc.num_mip_levels = 0; // must be 0
3909 desc.num_samples= 0; // must be 0
3910 desc.buffer = NULL; // used when CL_MEM_OBJECT_IMAGE1D_BUFFER
3911
3912 return ECL_CALL(clCreateImage)(context,
3913 mem_flags,
3914 format,
3915 &desc,
3916 host_ptr,
3917 err);
3918 }
3919
ecl_create_image2d(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])3920 static ERL_NIF_TERM ecl_create_image2d(ErlNifEnv* env, int argc,
3921 const ERL_NIF_TERM argv[])
3922 {
3923 ecl_object_t* o_context;
3924 size_t width;
3925 size_t height;
3926 size_t row_pitch;
3927 cl_image_format format;
3928 cl_mem_flags mem_flags;
3929 cl_mem mem;
3930 ErlNifBinary bin;
3931 void* host_ptr = 0;
3932 cl_int err;
3933 UNUSED(argc);
3934
3935 if (!get_ecl_object(env, argv[0], &context_r, false, &o_context))
3936 return enif_make_badarg(env);
3937 if (!get_bitfields(env, argv[1], &mem_flags, kv_mem_flags))
3938 return enif_make_badarg(env);
3939
3940 if (!get_image_format(env, argv[2], &format))
3941 return enif_make_badarg(env);
3942
3943 if (!ecl_get_sizet(env, argv[3], &width))
3944 return enif_make_badarg(env);
3945 if (!ecl_get_sizet(env, argv[4], &height))
3946 return enif_make_badarg(env);
3947 if (!ecl_get_sizet(env, argv[5], &row_pitch))
3948 return enif_make_badarg(env);
3949
3950 if (!enif_inspect_iolist_as_binary(env, argv[6], &bin))
3951 return enif_make_badarg(env);
3952 // How do we keep binary data (CL_MEM_USE_HOST_PTR) (read_only)
3953 // we must be able to reference count the binary object!
3954 if (bin.size > 0) {
3955 host_ptr = bin.data;
3956 mem_flags |= CL_MEM_COPY_HOST_PTR;
3957 }
3958 else if (width && height)
3959 mem_flags |= CL_MEM_ALLOC_HOST_PTR;
3960 mem = ECL_CALL(clCreateImage2D)(o_context->context, mem_flags, &format,
3961 width, height, row_pitch,
3962 host_ptr, &err);
3963 if (!err) {
3964 ERL_NIF_TERM t;
3965 t = ecl_make_object(env, &mem_r,(void*) mem, o_context);
3966 return enif_make_tuple2(env, ATOM(ok), t);
3967 }
3968 return ecl_make_error(env, err);
3969 }
3970
3971 // 1.2 -> 1.1 wrapper: clCreateImage3D using clCreateImage
eclCreateImage3D(cl_context context,cl_mem_flags mem_flags,const cl_image_format * format,size_t width,size_t height,size_t depth,size_t row_pitch,size_t slice_pitch,void * host_ptr,cl_int * err)3972 cl_mem CL_CALLBACK eclCreateImage3D(cl_context context,
3973 cl_mem_flags mem_flags,
3974 const cl_image_format* format,
3975 size_t width,
3976 size_t height,
3977 size_t depth,
3978 size_t row_pitch,
3979 size_t slice_pitch,
3980 void * host_ptr,
3981 cl_int *err)
3982 {
3983 cl_image_desc desc;
3984
3985 desc.image_type = CL_MEM_OBJECT_IMAGE3D;
3986 desc.image_width = width;
3987 desc.image_height = height;
3988 desc.image_depth = depth; // used with IMAGE3D
3989 desc.image_array_size = 1; // used with IMAGE2D/3D_ARRAY?
3990 desc.image_row_pitch = row_pitch;
3991 desc.image_slice_pitch = slice_pitch; // maybe 0 for 2D image
3992 desc.num_mip_levels = 0; // must be 0
3993 desc.num_samples= 0; // must be 0
3994 desc.buffer = NULL; // used when CL_MEM_OBJECT_IMAGE1D_BUFFER
3995
3996 return ECL_CALL(clCreateImage)(context,
3997 mem_flags,
3998 format,
3999 &desc,
4000 host_ptr,
4001 err);
4002 }
4003
4004
ecl_create_image3d(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])4005 static ERL_NIF_TERM ecl_create_image3d(ErlNifEnv* env, int argc,
4006 const ERL_NIF_TERM argv[])
4007 {
4008 ecl_object_t* o_context;
4009 size_t width;
4010 size_t height;
4011 size_t depth;
4012 size_t row_pitch;
4013 size_t slice_pitch;
4014 cl_image_format format;
4015 cl_mem_flags mem_flags;
4016 cl_mem mem;
4017 ErlNifBinary bin;
4018 void* host_ptr = 0;
4019 cl_int err;
4020 UNUSED(argc);
4021
4022 if (!get_ecl_object(env, argv[0], &context_r, false, &o_context))
4023 return enif_make_badarg(env);
4024 if (!get_bitfields(env, argv[1], &mem_flags, kv_mem_flags))
4025 return enif_make_badarg(env);
4026
4027 if (!get_image_format(env, argv[2], &format))
4028 return enif_make_badarg(env);
4029
4030 if (!ecl_get_sizet(env, argv[3], &width))
4031 return enif_make_badarg(env);
4032 if (!ecl_get_sizet(env, argv[4], &height))
4033 return enif_make_badarg(env);
4034 if (!ecl_get_sizet(env, argv[5], &depth))
4035 return enif_make_badarg(env);
4036 if (!ecl_get_sizet(env, argv[6], &row_pitch))
4037 return enif_make_badarg(env);
4038 if (!ecl_get_sizet(env, argv[7], &slice_pitch))
4039 return enif_make_badarg(env);
4040
4041 if (!enif_inspect_iolist_as_binary(env, argv[8], &bin))
4042 return enif_make_badarg(env);
4043 // How do we keep binary data (CL_MEM_USE_HOST_PTR) (read_only)
4044 // we must be able to reference count the binary object!
4045 if (bin.size > 0) {
4046 host_ptr = bin.data;
4047 mem_flags |= CL_MEM_COPY_HOST_PTR;
4048 }
4049 else if (width && height && depth)
4050 mem_flags |= CL_MEM_ALLOC_HOST_PTR;
4051 mem = ECL_CALL(clCreateImage3D)(o_context->context, mem_flags, &format,
4052 width, height, depth, row_pitch,
4053 slice_pitch,
4054 host_ptr, &err);
4055 if (mem) {
4056 ERL_NIF_TERM t;
4057 t = ecl_make_object(env, &mem_r,(void*) mem, o_context);
4058 return enif_make_tuple2(env, ATOM(ok), t);
4059 }
4060 return ecl_make_error(env, err);
4061 }
4062
4063 //
4064 // cl:create_image(Context, MemFlags, ImageFormat, ImageDesc, Data) ->
4065 //
4066 //
4067 #if CL_VERSION_1_2 == 1
ecl_create_image(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])4068 static ERL_NIF_TERM ecl_create_image(ErlNifEnv* env, int argc,
4069 const ERL_NIF_TERM argv[])
4070 {
4071 ecl_object_t* o_context;
4072 cl_image_format format;
4073 cl_image_desc desc;
4074 cl_mem_flags mem_flags;
4075 cl_mem mem;
4076 ErlNifBinary bin;
4077 void* host_ptr = 0;
4078 cl_int err;
4079 UNUSED(argc);
4080
4081 if (!get_ecl_object(env, argv[0], &context_r, false, &o_context))
4082 return enif_make_badarg(env);
4083 if (!get_bitfields(env, argv[1], &mem_flags, kv_mem_flags))
4084 return enif_make_badarg(env);
4085
4086 if (!get_image_format(env, argv[2], &format))
4087 return enif_make_badarg(env);
4088
4089 if (!get_image_desc(env, argv[3], &desc))
4090 return enif_make_badarg(env);
4091
4092 if (!enif_inspect_iolist_as_binary(env, argv[4], &bin))
4093 return enif_make_badarg(env);
4094
4095 if (bin.size > 0) {
4096 host_ptr = bin.data;
4097 mem_flags |= CL_MEM_COPY_HOST_PTR;
4098 }
4099 else if (desc.image_width && desc.image_height && desc.image_depth)
4100 mem_flags |= CL_MEM_ALLOC_HOST_PTR;
4101
4102 mem = ECL_CALL(clCreateImage)(o_context->context,
4103 mem_flags,
4104 &format,
4105 &desc,
4106 host_ptr,
4107 &err);
4108 if (mem) {
4109 ERL_NIF_TERM t;
4110 t = ecl_make_object(env, &mem_r,(void*) mem, o_context);
4111 return enif_make_tuple2(env, ATOM(ok), t);
4112 }
4113 return ecl_make_error(env, err);
4114 }
4115 #endif
4116
ecl_get_supported_image_formats(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])4117 static ERL_NIF_TERM ecl_get_supported_image_formats(ErlNifEnv* env, int argc,
4118 const ERL_NIF_TERM argv[])
4119 {
4120 cl_context context;
4121 cl_mem_flags flags;
4122 cl_mem_object_type image_type;
4123 cl_image_format image_format[MAX_IMAGE_FORMATS];
4124 cl_uint num_image_formats;
4125 cl_int err;
4126 UNUSED(argc);
4127
4128 if (!get_object(env, argv[0], &context_r, false, (void**) &context))
4129 return enif_make_badarg(env);
4130 if (!get_bitfields(env, argv[1], &flags, kv_mem_flags))
4131 return enif_make_badarg(env);
4132 if (!get_enum(env, argv[2], &image_type, kv_mem_object_type))
4133 return enif_make_badarg(env);
4134 err = ECL_CALL(clGetSupportedImageFormats)(context, flags, image_type,
4135 MAX_IMAGE_FORMATS,
4136 image_format,
4137 &num_image_formats);
4138 if (!err) {
4139 int i = (int) num_image_formats;
4140 ERL_NIF_TERM list = enif_make_list(env, 0);
4141
4142 while(i) {
4143 ERL_NIF_TERM channel_order, channel_type;
4144 ERL_NIF_TERM elem;
4145 i--;
4146 channel_order = make_enum(env,
4147 image_format[i].image_channel_order,
4148 kv_channel_order);
4149 channel_type = make_enum(env,
4150 image_format[i].image_channel_data_type,
4151 kv_channel_type);
4152 elem = enif_make_tuple2(env, channel_order, channel_type);
4153 list = enif_make_list_cell(env, elem, list);
4154 }
4155 return enif_make_tuple2(env, ATOM(ok), list);
4156 }
4157 return ecl_make_error(env, err);
4158 }
4159
4160
ecl_get_mem_object_info(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])4161 static ERL_NIF_TERM ecl_get_mem_object_info(ErlNifEnv* env, int argc,
4162 const ERL_NIF_TERM argv[])
4163 {
4164 ecl_object_t* o_mem;
4165 UNUSED(argc);
4166
4167 if (!get_ecl_object(env, argv[0], &mem_r, false, &o_mem))
4168 return enif_make_badarg(env);
4169 return make_object_info(env, argv[1], o_mem,
4170 (info_fn_t*) ECL_FUNC_PTR(clGetMemObjectInfo),
4171 mem_info,
4172 sizeof_array(mem_info));
4173 }
4174
ecl_get_image_info(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])4175 static ERL_NIF_TERM ecl_get_image_info(ErlNifEnv* env, int argc,
4176 const ERL_NIF_TERM argv[])
4177 {
4178 ecl_object_t* o_mem;
4179 UNUSED(argc);
4180
4181 if (!get_ecl_object(env, argv[0], &mem_r, false, &o_mem))
4182 return enif_make_badarg(env);
4183 return make_object_info(env, argv[1], o_mem,
4184 (info_fn_t*) ECL_FUNC_PTR(clGetImageInfo),
4185 image_info,
4186 sizeof_array(image_info));
4187 }
4188
4189 //
4190 // cl:create_sampler(Context::cl_context(),Normalized::boolean(),
4191 // AddressingMode::cl_addressing_mode(),
4192 // FilterMode::cl_filter_mode()) ->
4193 // {'ok', cl_sampler()} | {'error', cl_error()}.
4194 //
4195
ecl_create_sampler(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])4196 static ERL_NIF_TERM ecl_create_sampler(ErlNifEnv* env, int argc,
4197 const ERL_NIF_TERM argv[])
4198 {
4199 ecl_object_t* o_context;
4200 cl_bool normalized_coords;
4201 cl_addressing_mode addressing_mode;
4202 cl_filter_mode filter_mode;
4203 cl_sampler sampler;
4204 cl_int err;
4205 UNUSED(argc);
4206
4207 if (!get_ecl_object(env, argv[0], &context_r, false, &o_context))
4208 return enif_make_badarg(env);
4209 if (!get_bool(env, argv[1], &normalized_coords))
4210 return enif_make_badarg(env);
4211 if (!get_enum(env, argv[2], &addressing_mode, kv_addressing_mode))
4212 return enif_make_badarg(env);
4213 if (!get_enum(env, argv[3], &filter_mode, kv_filter_mode))
4214 return enif_make_badarg(env);
4215
4216 sampler = ECL_CALL(clCreateSampler)(o_context->context,
4217 normalized_coords, addressing_mode, filter_mode,
4218 &err);
4219 if (!err) {
4220 ERL_NIF_TERM t;
4221 t = ecl_make_object(env, &sampler_r,(void*) sampler, o_context);
4222 return enif_make_tuple2(env, ATOM(ok), t);
4223 }
4224 return ecl_make_error(env, err);
4225 }
4226
4227
ecl_get_sampler_info(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])4228 static ERL_NIF_TERM ecl_get_sampler_info(ErlNifEnv* env, int argc,
4229 const ERL_NIF_TERM argv[])
4230 {
4231 ecl_object_t* o_sampler;
4232 UNUSED(argc);
4233
4234 if (!get_ecl_object(env, argv[0], &sampler_r, false, &o_sampler))
4235 return enif_make_badarg(env);
4236 return make_object_info(env, argv[1], o_sampler,
4237 (info_fn_t*) ECL_FUNC_PTR(clGetSamplerInfo),
4238 sampler_info,
4239 sizeof_array(sampler_info));
4240 }
4241
4242 //
4243 // cl:create_program_with_source(Context::cl_context(), Source::iodata()) ->
4244 // {'ok', cl_program()} | {'error', cl_error()}
4245 //
ecl_create_program_with_source(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])4246 static ERL_NIF_TERM ecl_create_program_with_source(ErlNifEnv* env, int argc,
4247 const ERL_NIF_TERM argv[])
4248 {
4249 ecl_object_t* o_context;
4250 cl_program program;
4251 ErlNifBinary source;
4252 char* strings[1];
4253 size_t lengths[1];
4254 cl_int err;
4255 UNUSED(argc);
4256
4257 if (!get_ecl_object(env, argv[0], &context_r, false, &o_context))
4258 return enif_make_badarg(env);
4259 if (!enif_inspect_iolist_as_binary(env, argv[1], &source))
4260 return enif_make_badarg(env);
4261 strings[0] = (char*) source.data;
4262 lengths[0] = source.size;
4263 program = ECL_CALL(clCreateProgramWithSource)(o_context->context,
4264 1,
4265 (const char**) strings,
4266 lengths,
4267 &err);
4268 if (!err) {
4269 ERL_NIF_TERM t;
4270 t = ecl_make_object(env, &program_r,(void*) program, o_context);
4271 return enif_make_tuple2(env, ATOM(ok), t);
4272 }
4273 return ecl_make_error(env, err);
4274 }
4275
4276 //
4277 // cl:create_program_with_binary(Context::cl_context(),
4278 // DeviceList::[cl_device_id()],
4279 // BinaryList::[binary()]) ->
4280 // {'ok', cl_program()} | {'error', cl_error()}
4281 //
ecl_create_program_with_binary(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])4282 static ERL_NIF_TERM ecl_create_program_with_binary(ErlNifEnv* env, int argc,
4283 const ERL_NIF_TERM argv[])
4284 {
4285 ecl_object_t* o_context;
4286 cl_program program;
4287 cl_device_id device_list[MAX_DEVICES];
4288 cl_uint num_devices = MAX_DEVICES;
4289 ErlNifBinary binary_list[MAX_DEVICES];
4290 size_t num_binaries = MAX_DEVICES;
4291 size_t lengths[MAX_DEVICES];
4292 unsigned char* data[MAX_DEVICES];
4293 cl_uint i;
4294 cl_int status[MAX_DEVICES];
4295 cl_int err;
4296 UNUSED(argc);
4297
4298 if (!get_ecl_object(env, argv[0], &context_r, false, &o_context))
4299 return enif_make_badarg(env);
4300 if (!get_object_list(env, argv[1], &device_r, false,
4301 (void**) device_list, &num_devices))
4302 return enif_make_badarg(env);
4303 if (!get_binary_list(env, argv[2], binary_list, &num_binaries))
4304 return enif_make_badarg(env);
4305 if (num_binaries != num_devices)
4306 return enif_make_badarg(env);
4307
4308 for (i = 0; i < num_devices; i++) {
4309 lengths[i] = binary_list[i].size;
4310 data[i] = binary_list[i].data;
4311 }
4312 program = ECL_CALL(clCreateProgramWithBinary)(o_context->context,
4313 num_devices,
4314 (const cl_device_id*) device_list,
4315 (const size_t*) lengths,
4316 (const unsigned char**) data,
4317 status,
4318 &err);
4319 if (!err) {
4320 ERL_NIF_TERM t;
4321 t = ecl_make_object(env, &program_r,(void*) program, o_context);
4322 return enif_make_tuple2(env, ATOM(ok), t);
4323 }
4324 // FIXME: handle the value in the status array
4325 // In cases of error we can then detect which binary was corrupt...
4326 return ecl_make_error(env, err);
4327 }
4328
4329
4330 //
4331 // cl:create_program_with_builtin_kernels(Context::cl_context(),
4332 // DeviceList::[cl_device_id()],
4333 // KernelNames::string()) ->
4334 // {'ok', cl_program()} | {'error', cl_error()}
4335 //
4336 #if CL_VERSION_1_2 == 1
4337
ecl_create_program_with_builtin_kernels(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])4338 static ERL_NIF_TERM ecl_create_program_with_builtin_kernels(
4339 ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
4340 {
4341 ecl_object_t* o_context;
4342 cl_program program;
4343 cl_device_id device_list[MAX_DEVICES];
4344 cl_uint num_devices = MAX_DEVICES;
4345 char kernel_names[MAX_KERNEL_NAME];
4346 cl_int err;
4347 UNUSED(argc);
4348
4349 if (!get_ecl_object(env, argv[0], &context_r, false, &o_context))
4350 return enif_make_badarg(env);
4351 if (!get_object_list(env, argv[1], &device_r, false,
4352 (void**) device_list, &num_devices))
4353 return enif_make_badarg(env);
4354 if (!enif_get_string(env, argv[2], kernel_names, sizeof(kernel_names),
4355 ERL_NIF_LATIN1))
4356 return enif_make_badarg(env);
4357
4358 program = ECL_CALL(clCreateProgramWithBuiltInKernels)(
4359 o_context->context,
4360 num_devices,
4361 (const cl_device_id*) device_list,
4362 kernel_names,
4363 &err);
4364 if (!err) {
4365 ERL_NIF_TERM t;
4366 t = ecl_make_object(env, &program_r,(void*) program, o_context);
4367 return enif_make_tuple2(env, ATOM(ok), t);
4368 }
4369 return ecl_make_error(env, err);
4370 }
4371 #endif
4372
4373 //
4374 // @spec async_build_program(Program::cl_program(),
4375 // DeviceList::[cl_device_id()],
4376 // Options::string()) ->
4377 // {'ok',Ref} | {'error', cl_error()}
4378 //
4379 //
4380 // Notification functio for clBuildProgram
4381 // Passed to main thread by sending a async response
4382 // FIXME: lock needed?
4383 //
4384 typedef struct {
4385 ErlNifPid sender; // sender pid
4386 ErlNifEnv* s_env; // senders message environment (ref, bin's etc)
4387 ErlNifEnv* r_env; // receiver message environment (ref, bin's etc)
4388 ErlNifTid tid; // Calling thread
4389 ERL_NIF_TERM ref; // ref (in env!)
4390 ecl_object_t* program;
4391 } ecl_build_data_t;
4392
ecl_build_notify(cl_program program,void * user_data)4393 void CL_CALLBACK ecl_build_notify(cl_program program, void* user_data)
4394 {
4395 ecl_build_data_t* bp = user_data;
4396 ERL_NIF_TERM reply;
4397 ErlNifEnv* s_env;
4398 int res;
4399 UNUSED(program);
4400 UNUSED(res);
4401
4402 DBG("ecl_build_notify: done program=%p, user_data=%p",
4403 program, user_data);
4404
4405 // FIXME: check all devices for build_status!
4406 // clGetProgramBuildInfo(bp->program->program, CL_PROGRAM_BUILD_STATUS,
4407
4408 // reply = !err ? ATOM(ok) : ecl_make_error(bp->env, err);
4409
4410 if(enif_equal_tids(bp->tid, enif_thread_self()))
4411 s_env = bp->s_env;
4412 else
4413 s_env = 0;
4414
4415 reply = ATOM(ok);
4416 res = enif_send(s_env, &bp->sender, bp->r_env,
4417 enif_make_tuple3(bp->r_env,
4418 ATOM(cl_async),
4419 bp->ref,
4420 reply));
4421 DBG("ecl_build_notify: send r=%d", res);
4422 enif_free_env(bp->r_env);
4423 if (bp->program)
4424 enif_release_resource(bp->program);
4425 enif_free(bp);
4426 }
4427
4428
ecl_async_build_program(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])4429 static ERL_NIF_TERM ecl_async_build_program(ErlNifEnv* env, int argc,
4430 const ERL_NIF_TERM argv[])
4431 {
4432 ecl_object_t* o_program;
4433 cl_device_id device_list[MAX_DEVICES];
4434 cl_uint num_devices = MAX_DEVICES;
4435 char options[MAX_OPTION_LIST];
4436 ERL_NIF_TERM ref;
4437 ecl_build_data_t* bp;
4438 cl_int err;
4439 UNUSED(argc);
4440
4441 if (!get_ecl_object(env, argv[0], &program_r, false, &o_program))
4442 return enif_make_badarg(env);
4443 if (!get_object_list(env, argv[1], &device_r, false,
4444 (void**) device_list, &num_devices))
4445 return enif_make_badarg(env);
4446 if (!enif_get_string(env, argv[2], options, sizeof(options),ERL_NIF_LATIN1))
4447 return enif_make_badarg(env);
4448 if (!(bp = enif_alloc(sizeof(ecl_build_data_t))))
4449 return ecl_make_error(env, CL_OUT_OF_RESOURCES); // enomem?
4450
4451 if (!(bp->r_env = enif_alloc_env())) {
4452 enif_free(bp);
4453 return ecl_make_error(env, CL_OUT_OF_RESOURCES); // enomem?
4454 }
4455 ref = enif_make_ref(env);
4456 (void) enif_self(env, &bp->sender);
4457 bp->ref = enif_make_copy(bp->r_env, ref);
4458 bp->program = o_program;
4459 bp->s_env = env;
4460 bp->tid = enif_thread_self();
4461 enif_keep_resource(o_program); // keep while operation is running
4462
4463 err = ECL_CALL(clBuildProgram)(o_program->program,
4464 num_devices,
4465 device_list,
4466 (const char*) options,
4467 ecl_build_notify,
4468 bp);
4469 DBG("ecl_async_build_program: err=%d user_data=%p", err, bp);
4470
4471 if ((err==CL_SUCCESS) ||
4472 // This should not be returned, it is not according to spec!!!!
4473 (err==CL_BUILD_PROGRAM_FAILURE))
4474 return enif_make_tuple2(env, ATOM(ok), ref);
4475 else {
4476 enif_free_env(bp->r_env);
4477 enif_release_resource(bp->program);
4478 enif_free(bp);
4479 return ecl_make_error(env, err);
4480 }
4481 }
4482
4483 #if CL_VERSION_1_2 == 1
ecl_unload_platform_compiler(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])4484 static ERL_NIF_TERM ecl_unload_platform_compiler(ErlNifEnv* env, int argc,
4485 const ERL_NIF_TERM argv[])
4486 {
4487 cl_int err;
4488 cl_platform_id platform;
4489 ecl_env_t* ecl = enif_priv_data(env);
4490 UNUSED(argc);
4491
4492 if(ecl->icd_version < 12)
4493 return ecl_make_error(env, CL_INVALID_OPERATION);
4494 if (!get_object(env, argv[0], &platform_r, true,(void**)&platform))
4495 return enif_make_badarg(env);
4496 err = ECL_CALL(clUnloadPlatformCompiler)(platform);
4497 // err = eclUnloadPlatformCompiler(platform);
4498 if (err)
4499 return ecl_make_error(env, err);
4500 return ATOM(ok);
4501 }
4502 #endif
4503
4504 #if CL_VERSION_1_2 == 1
4505 // -spec compile_program(Program::cl_program(),
4506 // DeviceList::[cl_device_id()],
4507 // Options::string(),
4508 // Headers::[cl_program()],
4509 // Names::[string()]) ->
4510 // 'ok' | {'error', cl_error()}.
4511
4512 #define MAX_HEADERS 128
4513
ecl_async_compile_program(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])4514 static ERL_NIF_TERM ecl_async_compile_program(ErlNifEnv* env, int argc,
4515 const ERL_NIF_TERM argv[])
4516 {
4517 ecl_object_t* o_program;
4518 cl_device_id device_list[MAX_DEVICES];
4519 cl_uint num_devices = MAX_DEVICES;
4520 char options[MAX_OPTION_LIST];
4521 cl_uint num_input_headers = MAX_HEADERS;
4522 cl_program input_headers[MAX_HEADERS];
4523 size_t num_header_include_names = MAX_HEADERS;
4524 char* header_include_names[MAX_HEADERS];
4525 ERL_NIF_TERM ref;
4526 ecl_build_data_t* bp = NULL;
4527 cl_int err;
4528 UNUSED(argc);
4529
4530 if (!get_ecl_object(env, argv[0], &program_r, false, &o_program))
4531 return enif_make_badarg(env);
4532 if (!get_object_list(env, argv[1], &device_r, false,
4533 (void**) device_list, &num_devices))
4534 return enif_make_badarg(env);
4535 if (!enif_get_string(env, argv[2], options, sizeof(options),ERL_NIF_LATIN1))
4536 return enif_make_badarg(env);
4537 if (!get_object_list(env, argv[3], &program_r, false,
4538 (void**) input_headers, &num_input_headers))
4539 return enif_make_badarg(env);
4540 num_header_include_names = num_input_headers;
4541 if (!get_string_list(env, argv[4], header_include_names,
4542 &num_header_include_names))
4543 return enif_make_badarg(env);
4544
4545 if (!(bp = enif_alloc(sizeof(ecl_build_data_t)))) {
4546 err = CL_OUT_OF_RESOURCES;
4547 goto error;
4548 }
4549 if (!(bp->r_env = enif_alloc_env())) {
4550 err = CL_OUT_OF_RESOURCES;
4551 goto error;
4552 }
4553
4554 ref = enif_make_ref(env);
4555 (void) enif_self(env, &bp->sender);
4556 bp->ref = enif_make_copy(bp->r_env, ref);
4557 bp->program = o_program;
4558 bp->s_env = env;
4559 bp->tid = enif_thread_self();
4560 enif_keep_resource(o_program); // keep while operation is running
4561
4562 DBG("ecl_async_compile_program: program: %p, num_input_headers: %d, bp=%p",
4563 o_program->program, num_input_headers, bp);
4564
4565 err = ECL_CALL(clCompileProgram)(o_program->program,
4566 num_devices,
4567 device_list,
4568 (const char*) options,
4569 num_input_headers,
4570 num_input_headers ? input_headers : NULL,
4571 num_input_headers ?
4572 (const char**)header_include_names : NULL,
4573 ecl_build_notify,
4574 bp);
4575 DBG("ecl_async_compile_program: err=%d user_data=%p", err, bp);
4576
4577 if ((err==CL_SUCCESS) || (err==CL_BUILD_PROGRAM_FAILURE)) {
4578 // check if we need to save this until complete!
4579 free_string_list(header_include_names, num_header_include_names);
4580 return enif_make_tuple2(env, ATOM(ok), ref);
4581 }
4582
4583 error:
4584 free_string_list(header_include_names, num_header_include_names);
4585 if (bp) {
4586 if (bp->program) enif_release_resource(bp->program);
4587 if (bp->r_env) enif_free_env(bp->r_env);
4588 enif_free(bp);
4589 }
4590 return ecl_make_error(env, err);
4591 }
4592 #endif
4593
4594 #if CL_VERSION_1_2 == 1
4595 // -spec link_program(Context::cl_context(),
4596 // DeviceList::[cl_device_id()],
4597 // Options::string(),
4598 // Programs::[cl_program()]) ->
4599 // {'ok',cl_program()} | {'error', cl_error()}.
4600
4601 #define MAX_INPUT_PROGRAMS 128
4602
ecl_async_link_program(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])4603 static ERL_NIF_TERM ecl_async_link_program(ErlNifEnv* env, int argc,
4604 const ERL_NIF_TERM argv[])
4605 {
4606 ecl_object_t* o_context;
4607 cl_program program;
4608 cl_device_id device_list[MAX_DEVICES];
4609 cl_uint num_devices = MAX_DEVICES;
4610 char options[MAX_OPTION_LIST];
4611 cl_uint num_input_programs = MAX_INPUT_PROGRAMS;
4612 cl_program input_programs[MAX_INPUT_PROGRAMS];
4613 ERL_NIF_TERM ref;
4614 ERL_NIF_TERM prog;
4615 ecl_build_data_t* bp;
4616 cl_int err;
4617 UNUSED(argc);
4618
4619 if (!get_ecl_object(env, argv[0], &context_r, false, &o_context))
4620 return enif_make_badarg(env);
4621 if (!get_object_list(env, argv[1], &device_r, false,
4622 (void**) device_list, &num_devices))
4623 return enif_make_badarg(env);
4624 if (!enif_get_string(env, argv[2], options, sizeof(options),ERL_NIF_LATIN1))
4625 return enif_make_badarg(env);
4626 if (!get_object_list(env, argv[3], &program_r, false,
4627 (void**) input_programs, &num_input_programs))
4628 return enif_make_badarg(env);
4629
4630 if (!(bp = enif_alloc(sizeof(ecl_build_data_t))))
4631 return ecl_make_error(env, CL_OUT_OF_RESOURCES);
4632 if (!(bp->r_env = enif_alloc_env())) {
4633 enif_free(bp);
4634 return ecl_make_error(env, CL_OUT_OF_RESOURCES); // enomem?
4635 }
4636
4637 ref = enif_make_ref(env);
4638 (void) enif_self(env, &bp->sender);
4639 bp->ref = enif_make_copy(bp->r_env, ref);
4640 bp->program = NULL;
4641 bp->s_env = env;
4642 bp->tid = enif_thread_self();
4643
4644 DBG("ecl_async_link_program: context: %p, num_input_programs %d, bp=%p",
4645 o_context->context, num_input_programs, bp);
4646
4647 // lock callback inorder avoid race?
4648 program = ECL_CALL(clLinkProgram)(o_context->context,
4649 num_devices,
4650 num_devices ? device_list : NULL,
4651 (const char*) options,
4652 num_input_programs,
4653 input_programs,
4654 ecl_build_notify,
4655 bp,
4656 &err);
4657 DBG("ecl_async_link_program: err=%d program %p, user_data=%p",
4658 err, program, bp);
4659
4660 if (program == NULL) {
4661 enif_free_env(bp->r_env);
4662 enif_free(bp);
4663 return ecl_make_error(env, err);
4664 }
4665 prog = ecl_make_object(env, &program_r,(void*) program, o_context);
4666 return enif_make_tuple2(env, ATOM(ok),
4667 enif_make_tuple2(env, ref, prog));
4668 }
4669
4670 #endif
4671
ecl_unload_compiler(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])4672 static ERL_NIF_TERM ecl_unload_compiler(ErlNifEnv* env, int argc,
4673 const ERL_NIF_TERM argv[])
4674 {
4675 cl_int err;
4676 ecl_env_t* ecl = enif_priv_data(env);
4677
4678 UNUSED(argc);
4679 UNUSED(argv);
4680
4681 if (ecl->icd_version >= 12) {
4682 ecl_env_t* ecl = enif_priv_data(env);
4683 cl_platform_id platform;
4684 if (ecl->nplatforms <= 0)
4685 return ecl_make_error(env, CL_INVALID_VALUE);
4686 platform = (cl_platform_id) ecl->platform[0].o_platform->opaque;
4687 err = ECL_CALL(clUnloadPlatformCompiler)(platform);
4688 // err = eclUnloadPlatformCompiler(platform);
4689 } else {
4690 err = ECL_CALL(clUnloadCompiler)();
4691 }
4692 if (err)
4693 return ecl_make_error(env, err);
4694 return ATOM(ok);
4695 }
4696
4697 // Special (workaround) for checking if program may have binaries
program_may_have_binaries(cl_program program)4698 static int program_may_have_binaries(cl_program program)
4699 {
4700 cl_int num_devices;
4701 size_t returned_size;
4702 cl_device_id devices[MAX_DEVICES];
4703 int i;
4704
4705 if (ECL_CALL(clGetProgramInfo)
4706 (program,
4707 CL_PROGRAM_NUM_DEVICES,
4708 sizeof(num_devices),
4709 &num_devices,
4710 &returned_size) != CL_SUCCESS)
4711 return 0;
4712
4713 if (ECL_CALL(clGetProgramInfo)(program, CL_PROGRAM_DEVICES,
4714 num_devices*sizeof(cl_device_id),
4715 devices, NULL) != CL_SUCCESS)
4716 return 0;
4717
4718 for (i = 0; i < num_devices; i++) {
4719 cl_build_status build_status = CL_BUILD_NONE;
4720 if (ECL_CALL(clGetProgramBuildInfo)
4721 (program, devices[i], CL_PROGRAM_BUILD_STATUS,
4722 sizeof(build_status),
4723 &build_status, NULL) != CL_SUCCESS)
4724 return 0;
4725 if (build_status != CL_BUILD_SUCCESS) return 0;
4726 }
4727 return 1;
4728 }
4729
4730 // Special util to extract program binary_sizes
make_program_binary_sizes(ErlNifEnv * env,cl_program program)4731 static ERL_NIF_TERM make_program_binary_sizes(ErlNifEnv* env,
4732 cl_program program)
4733 {
4734 cl_int err;
4735 ERL_NIF_TERM list;
4736 size_t returned_size;
4737 cl_uint num_devices;
4738 size_t size[MAX_DEVICES];
4739 int i;
4740
4741 memset(size, 0, sizeof(size));
4742
4743 if ((err = ECL_CALL(clGetProgramInfo)
4744 (program,
4745 CL_PROGRAM_NUM_DEVICES,
4746 sizeof(num_devices),
4747 &num_devices,
4748 &returned_size)))
4749 return ecl_make_error(env, err);
4750
4751 if (program_may_have_binaries(program)) {
4752 if ((err = ECL_CALL(clGetProgramInfo)
4753 (program,
4754 CL_PROGRAM_BINARY_SIZES,
4755 num_devices*sizeof(size_t),
4756 &size[0],
4757 &returned_size)))
4758 return ecl_make_error(env, err);
4759 }
4760 list = enif_make_list(env, 0);
4761 for (i = num_devices-1; i >= 0; i--) {
4762 ERL_NIF_TERM elem = ecl_make_sizet(env, size[i]);
4763 list = enif_make_list_cell(env, elem, list);
4764 }
4765 return enif_make_tuple2(env, ATOM(ok), list);
4766 }
4767
4768
4769 // Special util to extract program binaries
make_program_binaries(ErlNifEnv * env,cl_program program)4770 static ERL_NIF_TERM make_program_binaries(ErlNifEnv* env, cl_program program)
4771 {
4772 cl_int err;
4773 ERL_NIF_TERM list;
4774 size_t returned_size;
4775 cl_uint num_devices;
4776 int i;
4777
4778 if ((err = ECL_CALL(clGetProgramInfo)
4779 (program,
4780 CL_PROGRAM_NUM_DEVICES,
4781 sizeof(num_devices),
4782 &num_devices,
4783 &returned_size)))
4784 return ecl_make_error(env, err);
4785
4786 if (!program_may_have_binaries(program)) {
4787 ErlNifBinary empty;
4788 enif_alloc_binary(0, &empty);
4789
4790 list = enif_make_list(env, 0);
4791 for (i = num_devices-1; i >= 0; i--) {
4792 ERL_NIF_TERM elem;
4793 elem = enif_make_binary(env, &empty);
4794 list = enif_make_list_cell(env, elem, list);
4795 }
4796 enif_release_binary(&empty);
4797 return enif_make_tuple2(env, ATOM(ok), list);
4798 }
4799 else {
4800 size_t size[MAX_DEVICES];
4801 ErlNifBinary binary[MAX_DEVICES];
4802 unsigned char* data[MAX_DEVICES];
4803
4804 memset(size, 0, sizeof(size));
4805 memset(binary, 0, sizeof(binary));
4806
4807 if ((err = ECL_CALL(clGetProgramInfo)
4808 (program,
4809 CL_PROGRAM_BINARY_SIZES,
4810 num_devices*sizeof(size_t),
4811 &size[0],
4812 &returned_size)))
4813 return ecl_make_error(env, err);
4814 i = 0;
4815 while (i < (int) num_devices) {
4816 if (!enif_alloc_binary(size[i], &binary[i])) {
4817 err = CL_OUT_OF_HOST_MEMORY;
4818 goto cleanup;
4819 }
4820 data[i] = binary[i].data;
4821 i++;
4822 }
4823 if ((err = ECL_CALL(clGetProgramInfo)
4824 (program,
4825 CL_PROGRAM_BINARIES,
4826 sizeof(unsigned char*)*num_devices,
4827 data,
4828 &returned_size)))
4829 goto cleanup;
4830
4831 list = enif_make_list(env, 0);
4832 for (i = num_devices-1; i >= 0; i--) {
4833 ERL_NIF_TERM elem = enif_make_binary(env, &binary[i]);
4834 list = enif_make_list_cell(env, elem, list);
4835 }
4836 return enif_make_tuple2(env, ATOM(ok), list);
4837
4838 cleanup:
4839 while(i > 0) {
4840 i--;
4841 enif_release_binary(&binary[i]);
4842 }
4843 return ecl_make_error(env, err);
4844 }
4845 }
4846
ecl_get_program_info(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])4847 static ERL_NIF_TERM ecl_get_program_info(ErlNifEnv* env, int argc,
4848 const ERL_NIF_TERM argv[])
4849 {
4850 ecl_object_t* o_program;
4851 UNUSED(argc);
4852
4853 if (!get_ecl_object(env, argv[0], &program_r, false, &o_program))
4854 return enif_make_badarg(env);
4855
4856 if (argv[1] == ATOM(binaries))
4857 return make_program_binaries(env, o_program->program);
4858 else if (argv[1] == ATOM(binary_sizes))
4859 return make_program_binary_sizes(env, o_program->program);
4860 else
4861 return make_object_info(env, argv[1], o_program,
4862 (info_fn_t*) ECL_FUNC_PTR(clGetProgramInfo),
4863 program_info,
4864 sizeof_array(program_info));
4865 }
4866
ecl_get_program_build_info(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])4867 static ERL_NIF_TERM ecl_get_program_build_info(ErlNifEnv* env, int argc,
4868 const ERL_NIF_TERM argv[])
4869 {
4870 ecl_object_t* o_program;
4871 ecl_object_t* o_device;
4872 UNUSED(argc);
4873
4874 if (!get_ecl_object(env, argv[0], &program_r, false, &o_program))
4875 return enif_make_badarg(env);
4876 if (!get_ecl_object(env, argv[1], &device_r, false, &o_device))
4877 return enif_make_badarg(env);
4878 return make_object_info2(env, argv[2], o_program, o_device->opaque,
4879 (info2_fn_t*) ECL_FUNC_PTR(clGetProgramBuildInfo),
4880 build_info,
4881 sizeof_array(build_info));
4882 }
4883
4884
ecl_create_kernel(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])4885 static ERL_NIF_TERM ecl_create_kernel(ErlNifEnv* env, int argc,
4886 const ERL_NIF_TERM argv[])
4887 {
4888 ecl_object_t* o_program;
4889 cl_kernel kernel;
4890 char kernel_name[MAX_KERNEL_NAME];
4891 cl_int err;
4892 UNUSED(argc);
4893
4894 if (!get_ecl_object(env, argv[0], &program_r, false, &o_program))
4895 return enif_make_badarg(env);
4896 if (!enif_get_string(env, argv[1], kernel_name, sizeof(kernel_name),
4897 ERL_NIF_LATIN1))
4898 return enif_make_badarg(env);
4899
4900 kernel = ECL_CALL(clCreateKernel)(o_program->program,kernel_name, &err);
4901 if (!err) {
4902 ERL_NIF_TERM t;
4903 t = ecl_make_kernel(env, kernel, o_program);
4904 return enif_make_tuple2(env, ATOM(ok), t);
4905 }
4906 return ecl_make_error(env, err);
4907 }
4908
4909
4910 //
4911 // @spec create_kernels_in_program(Program::cl_program()) ->
4912 // {'ok', [cl_kernel()]} | {'error', cl_error()}
4913 //
ecl_create_kernels_in_program(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])4914 static ERL_NIF_TERM ecl_create_kernels_in_program(ErlNifEnv* env, int argc,
4915 const ERL_NIF_TERM argv[])
4916 {
4917 ecl_object_t* o_program;
4918 ERL_NIF_TERM kernv[MAX_KERNELS];
4919 ERL_NIF_TERM kernel_list;
4920 cl_kernel kernel[MAX_KERNELS];
4921 cl_uint num_kernels_ret;
4922 cl_uint i;
4923 cl_int err;
4924 UNUSED(argc);
4925
4926 if (!get_ecl_object(env, argv[0], &program_r, false, &o_program))
4927 return enif_make_badarg(env);
4928
4929 err = ECL_CALL(clCreateKernelsInProgram)
4930 (o_program->program,
4931 MAX_KERNELS,
4932 kernel,
4933 &num_kernels_ret);
4934 if (err)
4935 return ecl_make_error(env, err);
4936 for (i = 0; i < num_kernels_ret; i++) {
4937 // FIXME: handle out of memory
4938 kernv[i] = ecl_make_kernel(env, kernel[i], o_program);
4939 }
4940 kernel_list = enif_make_list_from_array(env, kernv, num_kernels_ret);
4941 return enif_make_tuple2(env, ATOM(ok), kernel_list);
4942 }
4943
4944
4945 //
4946 // cl:set_kernel_arg(Kernel::cl_kernel(), Index::non_neg_integer(),
4947 // Argument::cl_kernel_arg()) ->
4948 // {Type,Value}
4949 // {'size',Value}
4950 // {ecl_object,Handle,<<Res>>} object (special for sampler)
4951 // integer() == {'int', Value}
4952 // float() == {'float', Value}
4953 // list == Raw data
4954 // binary == Raw data
4955 //
ecl_set_kernel_arg(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])4956 static ERL_NIF_TERM ecl_set_kernel_arg(ErlNifEnv* env, int argc,
4957 const ERL_NIF_TERM argv[])
4958 {
4959 ecl_kernel_t* o_kernel;
4960 unsigned char arg_buf[16*sizeof(double)]; // vector type buffer
4961 cl_uint arg_index;
4962 size_t arg_size;
4963 void* arg_value;
4964 const ERL_NIF_TERM* array;
4965 double fval;
4966 int ival;
4967 long lval;
4968 unsigned long luval;
4969 size_t sval;
4970 ErlNifUInt64 u64val;
4971 ErlNifSInt64 i64val;
4972 ErlNifBinary bval;
4973 cl_int int_arg;
4974 cl_float float_arg;
4975 void* ptr_arg = 0;
4976 int arity;
4977 cl_int err;
4978 int arg_type = KERNEL_ARG_OTHER;
4979 UNUSED(argc);
4980
4981 if (!get_ecl_object(env,argv[0],&kernel_r,false,(ecl_object_t**)&o_kernel))
4982 return enif_make_badarg(env);
4983 if (!enif_get_uint(env, argv[1], &arg_index))
4984 return enif_make_badarg(env);
4985 if (enif_get_tuple(env, argv[2], &arity, &array)) {
4986 if (arity == 3) {
4987 if (array[0] == ATOM(mem_t)) {
4988 if (!get_object(env,argv[2],&mem_r,true,&ptr_arg))
4989 return enif_make_badarg(env);
4990 arg_type = KERNEL_ARG_MEM;
4991 arg_value = &ptr_arg;
4992 arg_size = sizeof(cl_mem);
4993 goto do_kernel_arg;
4994 }
4995 else if (array[0] == ATOM(sampler_t)) {
4996 if (!get_object(env,argv[2],&sampler_r,false,&ptr_arg))
4997 return enif_make_badarg(env);
4998 arg_type = KERNEL_ARG_SAMPLER;
4999 arg_value = &ptr_arg;
5000 arg_size = sizeof(cl_sampler);
5001 goto do_kernel_arg;
5002 }
5003 return enif_make_badarg(env);
5004 }
5005 else if (arity == 2) {
5006 cl_uint typen;
5007 ocl_type_t base_type;
5008 size_t base_size;
5009 int vec_size;
5010 int value_arity;
5011 const ERL_NIF_TERM* values;
5012 unsigned char* ptr = arg_buf;
5013 int i;
5014
5015 if (!get_enum(env, array[0], &typen, kv_cl_type))
5016 return enif_make_badarg(env);
5017 vec_size = typen >> 16;
5018 base_type = typen & 0xFFFF;
5019 base_size = ecl_sizeof(base_type);
5020 if ((vec_size == 1) && !enif_is_tuple(env, array[1])) {
5021 value_arity = 1;
5022 values = &array[1];
5023 }
5024 else if (!enif_get_tuple(env, array[1], &value_arity, &values))
5025 return enif_make_badarg(env);
5026 if (value_arity != vec_size)
5027 return enif_make_badarg(env);
5028 for (i = 0; i < vec_size; i++) {
5029 switch(base_type) {
5030 case OCL_CHAR:
5031 if (!enif_get_long(env, values[i], &lval))
5032 return enif_make_badarg(env);
5033 *((cl_char*)ptr) = (cl_char) lval;
5034 break;
5035 case OCL_UCHAR:
5036 if (!enif_get_ulong(env, values[i], &luval))
5037 return enif_make_badarg(env);
5038 *((cl_uchar*)ptr) = (cl_uchar) luval;
5039 break;
5040 case OCL_SHORT:
5041 if (!enif_get_long(env, values[i], &lval))
5042 return enif_make_badarg(env);
5043 *((cl_short*)ptr) = (cl_short) lval;
5044 break;
5045 case OCL_USHORT:
5046 if (!enif_get_ulong(env, values[i], &luval))
5047 return enif_make_badarg(env);
5048 *((cl_ushort*)ptr) = (cl_ushort) luval;
5049 break;
5050 case OCL_INT:
5051 if (!enif_get_long(env, values[i], &lval))
5052 return enif_make_badarg(env);
5053 *((cl_int*)ptr) = (cl_int) lval;
5054 break;
5055 case OCL_UINT:
5056 if (!enif_get_ulong(env, values[i], &luval))
5057 return enif_make_badarg(env);
5058 *((cl_uint*)ptr) = (cl_uint) luval;
5059 break;
5060 case OCL_LONG:
5061 if (!enif_get_int64(env, values[i], &i64val))
5062 return enif_make_badarg(env);
5063 *((cl_long*)ptr) = i64val;
5064 break;
5065 case OCL_ULONG:
5066 if (!enif_get_uint64(env, values[i], &u64val))
5067 return enif_make_badarg(env);
5068 *((cl_ulong*)ptr) = u64val;
5069 break;
5070 case OCL_HALF:
5071 if (!enif_get_ulong(env, values[i], &luval))
5072 return enif_make_badarg(env);
5073 *((cl_half*)ptr) = (cl_half) luval;
5074 break;
5075 case OCL_FLOAT:
5076 if (!enif_get_double(env, values[i], &fval))
5077 return enif_make_badarg(env);
5078 *((cl_float*)ptr) = (cl_float) fval;
5079 break;
5080
5081 case OCL_DOUBLE:
5082 if (!enif_get_double(env, values[i], &fval))
5083 return enif_make_badarg(env);
5084 *((cl_double*)ptr) = fval;
5085 break;
5086 case OCL_SIZE:
5087 if (!ecl_get_sizet(env, values[i], &sval))
5088 return enif_make_badarg(env);
5089 *((size_t*)ptr) = sval;
5090 break;
5091 case OCL_BOOL:
5092 case OCL_STRING:
5093 case OCL_ENUM:
5094 case OCL_BITFIELD:
5095 case OCL_POINTER:
5096 case OCL_PLATFORM:
5097 case OCL_DEVICE:
5098 case OCL_CONTEXT:
5099 case OCL_PROGRAM:
5100 case OCL_COMMAND_QUEUE:
5101 case OCL_IMAGE_FORMAT:
5102 case OCL_DEVICE_PARTITION:
5103 case OCL_NUM_TYPES:
5104 default:
5105 return enif_make_badarg(env);
5106 }
5107 ptr += base_size;
5108 }
5109 arg_value = arg_buf;
5110 arg_size = base_size*vec_size;
5111 goto do_kernel_arg;
5112 }
5113 return enif_make_badarg(env);
5114 }
5115 else if (enif_get_int(env, argv[2], &ival)) {
5116 int_arg = ival;
5117 arg_value = &int_arg;
5118 arg_size = sizeof(int_arg);
5119 goto do_kernel_arg;
5120 }
5121 else if (enif_get_double(env, argv[2], &fval)) {
5122 float_arg = (float) fval;
5123 arg_value = &float_arg;
5124 arg_size = sizeof(float_arg);
5125 goto do_kernel_arg;
5126 }
5127 else if (enif_inspect_iolist_as_binary(env, argv[2], &bval)) {
5128 // rule your own case
5129 arg_value = bval.data;
5130 arg_size = bval.size;
5131 goto do_kernel_arg;
5132 }
5133 return enif_make_badarg(env);
5134
5135 do_kernel_arg:
5136 err = ECL_CALL(clSetKernelArg)
5137 (o_kernel->obj.kernel,
5138 arg_index,
5139 arg_size,
5140 arg_value);
5141 if (!err) {
5142 set_kernel_arg(o_kernel, arg_index, arg_type, ptr_arg);
5143 return ATOM(ok);
5144 }
5145 return ecl_make_error(env, err);
5146 }
5147
5148 // cl:set_kernel_arg_size(Kernel::cl_kernel(), Index::non_neg_integer(),
5149 // Size::non_neg_integer()) ->
5150 // 'ok' | {'error', cl_error()}
5151 //
5152 // cl special to set kernel arg with size only (local mem etc)
5153 //
ecl_set_kernel_arg_size(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])5154 static ERL_NIF_TERM ecl_set_kernel_arg_size(ErlNifEnv* env, int argc,
5155 const ERL_NIF_TERM argv[])
5156 {
5157 ecl_kernel_t* o_kernel;
5158 cl_uint arg_index;
5159 size_t arg_size;
5160 unsigned char* arg_value = 0;
5161 cl_int err;
5162 UNUSED(argc);
5163
5164 if (!get_ecl_object(env,argv[0],&kernel_r,false,(ecl_object_t**)&o_kernel))
5165 return enif_make_badarg(env);
5166 if (!enif_get_uint(env, argv[1], &arg_index))
5167 return enif_make_badarg(env);
5168 if (!ecl_get_sizet(env, argv[2], &arg_size))
5169 return enif_make_badarg(env);
5170
5171 err = ECL_CALL(clSetKernelArg)
5172 (o_kernel->obj.kernel,
5173 arg_index,
5174 arg_size,
5175 arg_value);
5176 if (!err) {
5177 set_kernel_arg(o_kernel, arg_index, KERNEL_ARG_OTHER, (void*) 0);
5178 return ATOM(ok);
5179 }
5180 return ecl_make_error(env, err);
5181
5182 }
5183
ecl_get_kernel_info(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])5184 static ERL_NIF_TERM ecl_get_kernel_info(ErlNifEnv* env, int argc,
5185 const ERL_NIF_TERM argv[])
5186 {
5187 ecl_object_t* o_kernel;
5188 UNUSED(argc);
5189
5190 if (!get_ecl_object(env, argv[0], &kernel_r, false, &o_kernel))
5191 return enif_make_badarg(env);
5192 return make_object_info(env, argv[1], o_kernel,
5193 (info_fn_t*) ECL_FUNC_PTR(clGetKernelInfo),
5194 kernel_info,
5195 sizeof_array(kernel_info));
5196 }
5197
ecl_get_kernel_workgroup_info(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])5198 static ERL_NIF_TERM ecl_get_kernel_workgroup_info(ErlNifEnv* env, int argc,
5199 const ERL_NIF_TERM argv[])
5200 {
5201 ecl_object_t* o_kernel;
5202 ecl_object_t* o_device;
5203 UNUSED(argc);
5204
5205 if (!get_ecl_object(env, argv[0], &kernel_r, false, &o_kernel))
5206 return enif_make_badarg(env);
5207 if (!get_ecl_object(env, argv[1], &device_r, false, &o_device))
5208 return enif_make_badarg(env);
5209 return make_object_info2(env, argv[2], o_kernel, o_device->opaque,
5210 (info2_fn_t*) ECL_FUNC_PTR(clGetKernelWorkGroupInfo),
5211 workgroup_info,
5212 sizeof_array(workgroup_info));
5213 }
5214
5215 #if CL_VERSION_1_2 == 1
ecl_get_kernel_arg_info(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])5216 static ERL_NIF_TERM ecl_get_kernel_arg_info(ErlNifEnv* env, int argc,
5217 const ERL_NIF_TERM argv[])
5218 {
5219 ecl_object_t* o_kernel;
5220 cl_uint arg_index;
5221 UNUSED(argc);
5222
5223 if (!get_ecl_object(env, argv[0], &kernel_r, false, &o_kernel))
5224 return enif_make_badarg(env);
5225 if (!enif_get_uint(env, argv[1], &arg_index))
5226 return enif_make_badarg(env);
5227 return make_object_info2(env, argv[2], o_kernel,
5228 (void*) (size_t) arg_index,
5229 (info2_fn_t*) ECL_FUNC_PTR(clGetKernelArgInfo),
5230 arg_info,
5231 sizeof_array(arg_info));
5232 }
5233 #endif
5234
5235 //
5236 // cl:enqueue_task(Queue::cl_queue(), Kernel::cl_kernel(),
5237 // WaitList::[cl_event()], WantEvent::boolean()) ->
5238 // 'ok' | {'ok', cl_event()} | {'error', cl_error()}
5239 //
ecl_enqueue_task(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])5240 static ERL_NIF_TERM ecl_enqueue_task(ErlNifEnv* env, int argc,
5241 const ERL_NIF_TERM argv[])
5242 {
5243 ecl_object_t* o_queue;
5244 cl_kernel kernel;
5245 cl_event wait_list[MAX_WAIT_LIST];
5246 cl_uint num_events = MAX_WAIT_LIST;
5247 cl_event event;
5248 cl_int err;
5249 cl_bool want_event;
5250 UNUSED(argc);
5251
5252 if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
5253 return enif_make_badarg(env);
5254 if (!get_object(env, argv[1], &kernel_r, false,(void**)&kernel))
5255 return enif_make_badarg(env);
5256 if (!get_object_list(env, argv[2], &event_r, false,
5257 (void**) wait_list, &num_events))
5258 return enif_make_badarg(env);
5259 if (!get_bool(env, argv[3], &want_event))
5260 return enif_make_badarg(env);
5261
5262 err = ECL_CALL(clEnqueueTask)
5263 (o_queue->queue,
5264 kernel,
5265 num_events,
5266 num_events ? wait_list : NULL,
5267 want_event ? &event : NULL);
5268 if (!err) {
5269 if (want_event) {
5270 ERL_NIF_TERM t;
5271 t = ecl_make_event(env, event, false, false, 0, 0, o_queue);
5272 return enif_make_tuple2(env, ATOM(ok), t);
5273 }
5274 return ATOM(ok);
5275 }
5276 return ecl_make_error(env, err);
5277 }
5278 //
5279 // cl:enqueue_nd_range_kernel(Queue::cl_queue(), Kernel::cl_kernel(),
5280 // Global::[non_neg_integer()],
5281 // Local::[non_neg_integer()],
5282 // WaitList::[cl_event()], WantEvent::boolean()) ->
5283 // 'ok' | {'ok', cl_event()} | {'error', cl_error()}
5284 //
ecl_enqueue_nd_range_kernel(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])5285 static ERL_NIF_TERM ecl_enqueue_nd_range_kernel(ErlNifEnv* env, int argc,
5286 const ERL_NIF_TERM argv[])
5287 {
5288 ecl_object_t* o_queue;
5289 cl_kernel kernel;
5290 cl_event wait_list[MAX_WAIT_LIST];
5291 cl_uint num_events = MAX_WAIT_LIST;
5292 size_t global_work_size[MAX_WORK_SIZE];
5293 size_t local_work_size[MAX_WORK_SIZE];
5294 size_t work_dim = MAX_WORK_SIZE;
5295 size_t temp_dim = MAX_WORK_SIZE;
5296 cl_event event;
5297 cl_int err;
5298 cl_bool want_event;
5299 UNUSED(argc);
5300
5301 if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
5302 return enif_make_badarg(env);
5303 if (!get_object(env, argv[1], &kernel_r, false, (void**) &kernel))
5304 return enif_make_badarg(env);
5305 if (!get_sizet_list(env, argv[2], global_work_size, &work_dim))
5306 return enif_make_badarg(env);
5307 if (!get_sizet_list(env, argv[3], local_work_size, &temp_dim))
5308 return enif_make_badarg(env);
5309 if (!get_object_list(env, argv[4], &event_r, false,
5310 (void**) wait_list, &num_events))
5311 return enif_make_badarg(env);
5312 if (!get_bool(env, argv[5], &want_event))
5313 return enif_make_badarg(env);
5314
5315 if (work_dim == 0) {
5316 return enif_make_badarg(env);
5317 }
5318
5319 if ((temp_dim > 0) && (work_dim != temp_dim)) {
5320 return enif_make_badarg(env);
5321 }
5322
5323 err = ECL_CALL(clEnqueueNDRangeKernel)
5324 (o_queue->queue, kernel,
5325 (cl_uint) work_dim,
5326 0, // global_work_offset,
5327 global_work_size,
5328 temp_dim ? local_work_size : NULL,
5329 num_events,
5330 num_events ? wait_list : NULL,
5331 want_event ? &event : NULL);
5332 if (!err) {
5333 if (want_event) {
5334 ERL_NIF_TERM t;
5335 t = ecl_make_event(env, event, false, false, 0, 0, o_queue);
5336 return enif_make_tuple2(env, ATOM(ok), t);
5337 }
5338 return ATOM(ok);
5339 }
5340 return ecl_make_error(env, err);
5341 }
5342
5343 // 1.2 -> 1.1 wrapper: clEnqueueMarkerWithWaitList implement clEnqueueMarker
eclEnqueueMarker(cl_command_queue queue,cl_event * event)5344 cl_int CL_CALLBACK eclEnqueueMarker(cl_command_queue queue,
5345 cl_event * event)
5346 {
5347 return ECL_CALL(clEnqueueMarkerWithWaitList)(queue,0, NULL,event);
5348 }
5349
ecl_enqueue_marker(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])5350 static ERL_NIF_TERM ecl_enqueue_marker(ErlNifEnv* env, int argc,
5351 const ERL_NIF_TERM argv[])
5352 {
5353 ecl_object_t* o_queue;
5354 cl_event event;
5355 cl_int err;
5356 ERL_NIF_TERM t;
5357 UNUSED(argc);
5358
5359 if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
5360 return enif_make_badarg(env);
5361 if (o_queue->version >= 12) {
5362 err = eclEnqueueMarker(o_queue->queue, &event);
5363 } else { // deprecated in 1.2 available in 1.1
5364 err = ECL_CALL(clEnqueueMarker)(o_queue->queue, &event);
5365 }
5366 if (!err) {
5367 t = ecl_make_event(env, event, false, false, 0, 0, o_queue);
5368 return enif_make_tuple2(env, ATOM(ok), t);
5369 }
5370 return ecl_make_error(env, err);
5371 }
5372
5373 // 1.2 -> 1.1 wrapper: clEnqueueMarkerWithWaitList implement clEnqueueWaitForEvents
eclEnqueueWaitForEvents(cl_command_queue queue,cl_uint num_events,const cl_event * event_list)5374 cl_int CL_CALLBACK eclEnqueueWaitForEvents(cl_command_queue queue,
5375 cl_uint num_events,
5376 const cl_event * event_list)
5377 {
5378 return ECL_CALL(clEnqueueMarkerWithWaitList)(queue,
5379 num_events,
5380 num_events ? event_list : NULL,
5381 NULL);
5382 }
5383
5384 //
5385 // cl:enqueue_wait_for_events(Queue::cl_queue(), WaitList::[cl_event()]) ->
5386 // 'ok' | {'error', cl_error()}
5387 //
ecl_enqueue_wait_for_events(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])5388 static ERL_NIF_TERM ecl_enqueue_wait_for_events(ErlNifEnv* env, int argc,
5389 const ERL_NIF_TERM argv[])
5390 {
5391 ecl_object_t* o_queue;
5392 cl_event wait_list[MAX_WAIT_LIST];
5393 cl_uint num_events = MAX_WAIT_LIST;
5394 cl_int err;
5395 UNUSED(argc);
5396
5397 if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
5398 return enif_make_badarg(env);
5399 if (!get_object_list(env, argv[1], &event_r, false,
5400 (void**) wait_list, &num_events))
5401 return enif_make_badarg(env);
5402 if(o_queue->version >= 12) {
5403 err = eclEnqueueWaitForEvents(o_queue->queue,
5404 num_events,
5405 num_events ? wait_list : NULL);
5406 } else {
5407 err = ECL_CALL(clEnqueueWaitForEvents)(o_queue->queue,
5408 num_events,
5409 num_events ? wait_list : NULL);
5410 }
5411 if (!err)
5412 return ATOM(ok);
5413 return ecl_make_error(env, err);
5414 }
5415 //
5416 // cl:enqueue_read_buffer(Queue::cl_queue(), Buffer::cl_mem(),
5417 // Offset::non_neg_integer(),
5418 // Size::non_neg_integer(),
5419 // WaitList::[cl_event()]) ->
5420 // {'ok', cl_event()} | {'error', cl_error()}
ecl_enqueue_read_buffer(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])5421 static ERL_NIF_TERM ecl_enqueue_read_buffer(ErlNifEnv* env, int argc,
5422 const ERL_NIF_TERM argv[])
5423 {
5424 ecl_object_t* o_queue;
5425 cl_mem buffer;
5426 size_t offset;
5427 size_t size;
5428 cl_event wait_list[MAX_WAIT_LIST];
5429 cl_uint num_events = MAX_WAIT_LIST;
5430 cl_event event;
5431 ErlNifBinary* bin;
5432 cl_int err;
5433 UNUSED(argc);
5434
5435 if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
5436 return enif_make_badarg(env);
5437 if (!get_object(env, argv[1], &mem_r, false, (void**)&buffer))
5438 return enif_make_badarg(env);
5439 if (!ecl_get_sizet(env, argv[2], &offset))
5440 return enif_make_badarg(env);
5441 if (!ecl_get_sizet(env, argv[3], &size))
5442 return enif_make_badarg(env);
5443 if (!get_object_list(env, argv[4], &event_r, false,
5444 (void**) wait_list, &num_events))
5445 return enif_make_badarg(env);
5446 if (!(bin = enif_alloc(sizeof(ErlNifBinary))))
5447 return ecl_make_error(env, CL_OUT_OF_RESOURCES); // enomem?
5448 if (!enif_alloc_binary(size, bin)) {
5449 enif_free(bin);
5450 return ecl_make_error(env, CL_OUT_OF_RESOURCES); // enomem?
5451 }
5452 err = ECL_CALL(clEnqueueReadBuffer)
5453 (o_queue->queue, buffer,
5454 CL_FALSE,
5455 offset,
5456 size,
5457 bin->data,
5458 num_events,
5459 num_events ? wait_list : 0,
5460 &event);
5461 if (!err) {
5462 ERL_NIF_TERM t;
5463 t = ecl_make_event(env, event, true, false, 0, bin, o_queue);
5464 return enif_make_tuple2(env, ATOM(ok), t);
5465 }
5466 else {
5467 enif_free(bin);
5468 return ecl_make_error(env, err);
5469 }
5470 }
5471 //
5472 // cl:enqueue_write_buffer(Queue::cl_queue(), Buffer::cl_mem(),
5473 // Offset::non_neg_integer(),
5474 // Size::non_neg_integer(),
5475 // Data::binary(),
5476 // WaitList::[cl_event()],
5477 // WantEvent::boolean()
5478 // ) ->
5479 // {'ok', cl_event()} | {'error', cl_error()}
5480 //
ecl_enqueue_write_buffer(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])5481 static ERL_NIF_TERM ecl_enqueue_write_buffer(ErlNifEnv* env, int argc,
5482 const ERL_NIF_TERM argv[])
5483 {
5484 ecl_object_t* o_queue;
5485 cl_mem buffer;
5486 size_t offset;
5487 size_t size;
5488 cl_event wait_list[MAX_WAIT_LIST];
5489 cl_uint num_events = MAX_WAIT_LIST;
5490 cl_event event;
5491 ErlNifBinary bin;
5492 ErlNifEnv* bin_env;
5493 cl_int err;
5494 cl_bool want_event;
5495 UNUSED(argc);
5496
5497 if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
5498 return enif_make_badarg(env);
5499 if (!get_object(env, argv[1], &mem_r, false, (void**)&buffer))
5500 return enif_make_badarg(env);
5501 if (!ecl_get_sizet(env, argv[2], &offset))
5502 return enif_make_badarg(env);
5503 if (!ecl_get_sizet(env, argv[3], &size))
5504 return enif_make_badarg(env);
5505 /* Check argv[4] (bin) last */
5506 if (!get_object_list(env, argv[5], &event_r, false,
5507 (void**) wait_list, &num_events))
5508 return enif_make_badarg(env);
5509 if (!get_bool(env, argv[6], &want_event))
5510 return enif_make_badarg(env);
5511
5512 if (!(bin_env = enif_alloc_env())) { // create binary environment
5513 return ecl_make_error(env, CL_OUT_OF_RESOURCES); // enomem?
5514 }
5515 if (!ecl_make_binary(env, argv[4], bin_env, &bin)) {
5516 enif_free_env(bin_env);
5517 return enif_make_badarg(env);
5518 }
5519
5520 // handle binary and iolist as binary
5521 if (bin.size < size) { // FIXME: handle offset!
5522 return enif_make_badarg(env);
5523 }
5524
5525 err = ECL_CALL(clEnqueueWriteBuffer)(o_queue->queue, buffer,
5526 !want_event, // FALSE for async
5527 offset,
5528 size,
5529 bin.data,
5530 num_events,
5531 num_events ? wait_list : NULL,
5532 want_event ? &event : NULL);
5533 if (!err) {
5534 if (want_event) {
5535 ERL_NIF_TERM t;
5536 t = ecl_make_event(env, event, false, true, bin_env, NULL, o_queue);
5537 return enif_make_tuple2(env, ATOM(ok), t);
5538 } else {
5539 enif_free_env(bin_env);
5540 }
5541 return ATOM(ok);
5542 }
5543 else {
5544 enif_free_env(bin_env);
5545 return ecl_make_error(env, err);
5546 }
5547 }
5548
5549 //
5550 // enqueue_read_image(_Queue, _Image, _Origin, _Region, _RowPitch, _SlicePitch,
5551 // _WaitList) -> {'ok',Event} | {error,Error}
5552 //
ecl_enqueue_read_image(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])5553 static ERL_NIF_TERM ecl_enqueue_read_image(ErlNifEnv* env, int argc,
5554 const ERL_NIF_TERM argv[])
5555 {
5556 ecl_object_t* o_queue;
5557 cl_mem buffer;
5558 size_t origin[3];
5559 size_t region[3];
5560 size_t row_pitch;
5561 size_t slice_pitch;
5562 cl_event wait_list[MAX_WAIT_LIST];
5563 cl_uint num_events = MAX_WAIT_LIST;
5564 size_t num_origin = 3;
5565 size_t num_region = 3;
5566 size_t psize;
5567 size_t size;
5568 cl_event event;
5569 ErlNifBinary* bin;
5570 cl_int err;
5571 UNUSED(argc);
5572
5573 if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
5574 return enif_make_badarg(env);
5575 if (!get_object(env, argv[1], &mem_r, false, (void**)&buffer))
5576 return enif_make_badarg(env);
5577 origin[0] = origin[1] = origin[2] = 0;
5578 if (!get_sizet_list(env, argv[2], origin, &num_origin))
5579 return enif_make_badarg(env);
5580 region[0] = region[1] = region[2] = 1;
5581 if (!get_sizet_list(env, argv[3], region, &num_region))
5582 return enif_make_badarg(env);
5583 if (!ecl_get_sizet(env, argv[4], &row_pitch))
5584 return enif_make_badarg(env);
5585 if (!ecl_get_sizet(env, argv[5], &slice_pitch))
5586 return enif_make_badarg(env);
5587 if (!get_object_list(env, argv[6], &event_r, false,
5588 (void**) wait_list, &num_events))
5589 return enif_make_badarg(env);
5590 if (!(bin = enif_alloc(sizeof(ErlNifBinary))))
5591 return ecl_make_error(env, CL_OUT_OF_RESOURCES); // enomem?
5592
5593 // calculate the read size of the image, FIXME: check error return
5594 clGetImageInfo(buffer, CL_IMAGE_ELEMENT_SIZE, sizeof(psize), &psize, 0);
5595 size = region[0]*region[1]*region[2]*psize;
5596 if (!enif_alloc_binary(size, bin)) {
5597 enif_free(bin);
5598 return ecl_make_error(env, CL_OUT_OF_RESOURCES); // enomem?
5599 }
5600 err = ECL_CALL(clEnqueueReadImage)(o_queue->queue, buffer,
5601 CL_FALSE,
5602 origin,
5603 region,
5604 row_pitch,
5605 slice_pitch,
5606 bin->data,
5607 num_events,
5608 num_events ? wait_list : 0,
5609 &event);
5610 if (!err) {
5611 ERL_NIF_TERM t;
5612 t = ecl_make_event(env, event, true, false, 0, bin, o_queue);
5613 return enif_make_tuple2(env, ATOM(ok), t);
5614 }
5615 else {
5616 enif_free(bin);
5617 return ecl_make_error(env, err);
5618 }
5619 }
5620
5621 //
5622 // enqueue_read_buffer_rect(_Queue, _Buffer,
5623 // BufferOrigin, HostOrigin, Region,
5624 // BufferRowPitch, BufferSlicePitch,
5625 // HostRowPitch, HostSlicePitch,
5626 // WaitList) -> {'ok',Event} | {error,Error}
5627 //
5628 #if CL_VERSION_1_1 == 1
ecl_enqueue_read_buffer_rect(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])5629 static ERL_NIF_TERM ecl_enqueue_read_buffer_rect(ErlNifEnv* env, int argc,
5630 const ERL_NIF_TERM argv[])
5631 {
5632 ecl_object_t* o_queue;
5633 cl_mem buffer;
5634 size_t buffer_origin[3];
5635 size_t host_origin[3];
5636 size_t region[3];
5637 size_t buffer_row_pitch;
5638 size_t buffer_slice_pitch;
5639 size_t host_row_pitch;
5640 size_t host_slice_pitch;
5641 cl_event wait_list[MAX_WAIT_LIST];
5642 cl_uint num_events = MAX_WAIT_LIST;
5643 size_t num_buffer_origin = 3;
5644 size_t num_host_origin = 3;
5645 size_t num_region = 3;
5646 size_t size;
5647 cl_event event;
5648 ErlNifBinary* bin;
5649 cl_int err;
5650 UNUSED(argc);
5651
5652 if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
5653 return enif_make_badarg(env);
5654 if (!get_object(env, argv[1], &mem_r, false, (void**)&buffer))
5655 return enif_make_badarg(env);
5656 buffer_origin[0] = buffer_origin[1] = buffer_origin[2] = 0;
5657 if (!get_sizet_list(env, argv[2], buffer_origin, &num_buffer_origin))
5658 return enif_make_badarg(env);
5659 host_origin[0] = host_origin[1] = host_origin[2] = 0;
5660 if (!get_sizet_list(env, argv[3], host_origin, &num_host_origin))
5661 return enif_make_badarg(env);
5662 region[0] = region[1] = region[2] = 1;
5663 if (!get_sizet_list(env, argv[4], region, &num_region))
5664 return enif_make_badarg(env);
5665 if (!ecl_get_sizet(env, argv[5], &buffer_row_pitch))
5666 return enif_make_badarg(env);
5667 if (!ecl_get_sizet(env, argv[6], &buffer_slice_pitch))
5668 return enif_make_badarg(env);
5669 if (!ecl_get_sizet(env, argv[7], &host_row_pitch))
5670 return enif_make_badarg(env);
5671 if (!ecl_get_sizet(env, argv[8], &host_slice_pitch))
5672 return enif_make_badarg(env);
5673 if (!get_object_list(env, argv[9], &event_r, false,
5674 (void**) wait_list, &num_events))
5675 return enif_make_badarg(env);
5676 if (!(bin = enif_alloc(sizeof(ErlNifBinary))))
5677 return ecl_make_error(env, CL_OUT_OF_RESOURCES); // enomem?
5678
5679 // calculate the read size of the image, FIXME: check error return
5680 size = (host_origin[0]+region[0])*(host_origin[1]+region[1])*
5681 (host_origin[2]+region[2]);
5682 if (!enif_alloc_binary(size, bin)) {
5683 enif_free(bin);
5684 return ecl_make_error(env, CL_OUT_OF_RESOURCES); // enomem?
5685 }
5686 err = ECL_CALL(clEnqueueReadBufferRect)(o_queue->queue, buffer,
5687 CL_FALSE,
5688 buffer_origin,
5689 host_origin,
5690 region,
5691 buffer_row_pitch,
5692 buffer_slice_pitch,
5693 host_row_pitch,
5694 host_slice_pitch,
5695 bin->data,
5696 num_events,
5697 num_events ? wait_list : 0,
5698 &event);
5699 if (!err) {
5700 ERL_NIF_TERM t;
5701 t = ecl_make_event(env, event, true, false, 0, bin, o_queue);
5702 return enif_make_tuple2(env, ATOM(ok), t);
5703 }
5704 else {
5705 enif_free(bin);
5706 return ecl_make_error(env, err);
5707 }
5708 }
5709 #endif
5710
5711
5712 //
5713 // enqueue_write_buffer_rect(_Queue, _Buffer,
5714 // BufferOrigin, HostOrigin, Region,
5715 // BufferRowPitch, BufferSlicePitch,
5716 // HostRowPitch, HostSlicePitch,
5717 // Data::binary(),
5718 // WaitList) -> {'ok',Event} | {error,Error}
5719 //
5720 #if CL_VERSION_1_1 == 1
ecl_enqueue_write_buffer_rect(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])5721 static ERL_NIF_TERM ecl_enqueue_write_buffer_rect(ErlNifEnv* env, int argc,
5722 const ERL_NIF_TERM argv[])
5723 {
5724 ecl_object_t* o_queue;
5725 cl_mem buffer;
5726 size_t buffer_origin[3];
5727 size_t host_origin[3];
5728 size_t region[3];
5729 size_t buffer_row_pitch;
5730 size_t buffer_slice_pitch;
5731 size_t host_row_pitch;
5732 size_t host_slice_pitch;
5733 cl_event wait_list[MAX_WAIT_LIST];
5734 cl_uint num_events = MAX_WAIT_LIST;
5735 size_t num_buffer_origin = 3;
5736 size_t num_host_origin = 3;
5737 size_t num_region = 3;
5738 size_t size;
5739 cl_event event;
5740 ErlNifBinary bin;
5741 ErlNifEnv* bin_env;
5742 cl_int err;
5743 UNUSED(argc);
5744
5745 if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
5746 return enif_make_badarg(env);
5747 if (!get_object(env, argv[1], &mem_r, false, (void**)&buffer))
5748 return enif_make_badarg(env);
5749 buffer_origin[0] = buffer_origin[1] = buffer_origin[2] = 0;
5750 if (!get_sizet_list(env, argv[2], buffer_origin, &num_buffer_origin))
5751 return enif_make_badarg(env);
5752 host_origin[0] = host_origin[1] = host_origin[2] = 0;
5753 if (!get_sizet_list(env, argv[3], host_origin, &num_host_origin))
5754 return enif_make_badarg(env);
5755 region[0] = region[1] = region[2] = 1;
5756 if (!get_sizet_list(env, argv[4], region, &num_region))
5757 return enif_make_badarg(env);
5758
5759 if (!ecl_get_sizet(env, argv[5], &buffer_row_pitch))
5760 return enif_make_badarg(env);
5761 if (!ecl_get_sizet(env, argv[6], &buffer_slice_pitch))
5762 return enif_make_badarg(env);
5763
5764 if (!ecl_get_sizet(env, argv[7], &host_row_pitch))
5765 return enif_make_badarg(env);
5766 if (!ecl_get_sizet(env, argv[8], &host_slice_pitch))
5767 return enif_make_badarg(env);
5768 /* Check argv[9] (bin) last */
5769 if (!get_object_list(env, argv[10], &event_r, false,
5770 (void**) wait_list, &num_events))
5771 return enif_make_badarg(env);
5772
5773 if (!(bin_env = enif_alloc_env())) { // create binary environment
5774 return ecl_make_error(env, CL_OUT_OF_RESOURCES); // enomem?
5775 }
5776 if (!ecl_make_binary(env, argv[9], bin_env, &bin)) {
5777 enif_free_env(bin_env);
5778 return enif_make_badarg(env);
5779 }
5780 size = (host_origin[0]+region[0])*(host_origin[1]+region[1])*
5781 (host_origin[2]+region[2]);
5782 // handle binary and iolist as binary
5783 if (bin.size < size) { // FIXME: handle offset!
5784 return enif_make_badarg(env);
5785 }
5786 err = ECL_CALL(clEnqueueWriteBufferRect)(o_queue->queue, buffer,
5787 CL_FALSE,
5788 buffer_origin,
5789 host_origin,
5790 region,
5791 buffer_row_pitch,
5792 buffer_slice_pitch,
5793 host_row_pitch,
5794 host_slice_pitch,
5795 bin.data,
5796 num_events,
5797 num_events ? wait_list : 0,
5798 &event);
5799 if (!err) {
5800 ERL_NIF_TERM t;
5801 t = ecl_make_event(env, event, false, true, bin_env, NULL, o_queue);
5802 return enif_make_tuple2(env, ATOM(ok), t);
5803 }
5804 else {
5805 enif_free_env(bin_env);
5806 return ecl_make_error(env, err);
5807 }
5808 }
5809 #endif
5810
5811 //
5812 // cl:enqueue_fill_buffer(Queue, Buffer, Pattern, Offset, Size, WaitList) ->
5813 // {ok,Event} | {error,Reason}
5814 //
5815 #if CL_VERSION_1_2 == 1
ecl_enqueue_fill_buffer(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])5816 static ERL_NIF_TERM ecl_enqueue_fill_buffer(ErlNifEnv* env, int argc,
5817 const ERL_NIF_TERM argv[])
5818 {
5819 ecl_object_t* o_queue;
5820 cl_mem buffer;
5821 ErlNifBinary pattern;
5822 size_t offset;
5823 size_t size;
5824 cl_event wait_list[MAX_WAIT_LIST];
5825 cl_uint num_events = MAX_WAIT_LIST;
5826 cl_event event;
5827 cl_int err;
5828 UNUSED(argc);
5829
5830 if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
5831 return enif_make_badarg(env);
5832 if (!get_object(env, argv[1], &mem_r, false, (void**)&buffer))
5833 return enif_make_badarg(env);
5834 if (!enif_inspect_binary(env, argv[2], &pattern))
5835 return enif_make_badarg(env);
5836 if (!ecl_get_sizet(env, argv[3], &offset))
5837 return enif_make_badarg(env);
5838 if (!ecl_get_sizet(env, argv[4], &size))
5839 return enif_make_badarg(env);
5840 if (!get_object_list(env, argv[5], &event_r, false,
5841 (void**) wait_list, &num_events))
5842 return enif_make_badarg(env);
5843
5844 // Note: pattern must not be retained, it can be freed after this call
5845 // according to spec.
5846 err = ECL_CALL(clEnqueueFillBuffer)(o_queue->queue, buffer,
5847 pattern.data,
5848 pattern.size,
5849 offset,
5850 size,
5851 num_events,
5852 num_events ? wait_list : 0,
5853 &event);
5854 if (!err) {
5855 ERL_NIF_TERM t;
5856 t = ecl_make_event(env, event, false, false, 0, 0, o_queue);
5857 return enif_make_tuple2(env, ATOM(ok), t);
5858 }
5859 return ecl_make_error(env, err);
5860 }
5861 #endif
5862
5863
5864 //
5865 // enqueue_write_image(_Queue, _Image, _Origin, _Region, _RowPitch, _SlicePitch,
5866 // _Data, _WaitList, _WantEvent) ->
5867 //
ecl_enqueue_write_image(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])5868 static ERL_NIF_TERM ecl_enqueue_write_image(ErlNifEnv* env, int argc,
5869 const ERL_NIF_TERM argv[])
5870 {
5871 ecl_object_t* o_queue;
5872 cl_mem buffer;
5873 size_t origin[3];
5874 size_t region[3];
5875 size_t row_pitch;
5876 size_t slice_pitch;
5877 cl_event wait_list[MAX_WAIT_LIST];
5878 cl_uint num_events = MAX_WAIT_LIST;
5879 size_t num_origin = 3;
5880 size_t num_region = 3;
5881 size_t psize;
5882 size_t size;
5883 cl_event event;
5884 ErlNifBinary bin;
5885 ErlNifEnv* bin_env;
5886 cl_int err;
5887 cl_bool want_event;
5888 UNUSED(argc);
5889
5890 if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
5891 return enif_make_badarg(env);
5892 if (!get_object(env, argv[1], &mem_r, false, (void**)&buffer))
5893 return enif_make_badarg(env);
5894 origin[0] = origin[1] = origin[2] = 0;
5895 if (!get_sizet_list(env, argv[2], origin, &num_origin))
5896 return enif_make_badarg(env);
5897 region[0] = region[1] = region[2] = 1;
5898 if (!get_sizet_list(env, argv[3], region, &num_region))
5899 return enif_make_badarg(env);
5900 if (!ecl_get_sizet(env, argv[4], &row_pitch))
5901 return enif_make_badarg(env);
5902 if (!ecl_get_sizet(env, argv[5], &slice_pitch))
5903 return enif_make_badarg(env);
5904 /* Check argv[6] (bin) last */
5905 if (!get_object_list(env, argv[7], &event_r, false,
5906 (void**) wait_list, &num_events))
5907 return enif_make_badarg(env);
5908 if (!get_bool(env, argv[8], &want_event))
5909 return enif_make_badarg(env);
5910 if (!(bin_env = enif_alloc_env())) { // create binary environment
5911 return ecl_make_error(env, CL_OUT_OF_RESOURCES); // enomem?
5912 }
5913 if (!ecl_make_binary(env, argv[6], bin_env, &bin)) {
5914 enif_free_env(bin_env);
5915 return enif_make_badarg(env);
5916 }
5917
5918 // calculate the read size of the image FIXME: check error return
5919 clGetImageInfo(buffer, CL_IMAGE_ELEMENT_SIZE, sizeof(psize), &psize, 0);
5920 size = region[0]*region[1]*region[2]*psize;
5921 if (bin.size < size) {
5922 return enif_make_badarg(env);
5923 }
5924
5925 err = ECL_CALL(clEnqueueWriteImage)(o_queue->queue, buffer,
5926 !want_event, // FALSE for ASYNC
5927 origin,
5928 region,
5929 row_pitch,
5930 slice_pitch,
5931 bin.data,
5932 num_events,
5933 num_events ? wait_list : NULL,
5934 want_event ? &event : NULL );
5935 if (!err) {
5936 if (want_event) {
5937 ERL_NIF_TERM t;
5938 t = ecl_make_event(env, event, false, true, bin_env, NULL, o_queue);
5939 return enif_make_tuple2(env, ATOM(ok), t);
5940 } else {
5941 enif_free_env(bin_env);
5942 }
5943 return ATOM(ok);
5944 }
5945 else {
5946 enif_free_env(bin_env);
5947 return ecl_make_error(env, err);
5948 }
5949 }
5950
5951 //
5952 // cl:enqueue_copy_buffer(Queue, SrcBuffer, DstBuffer,
5953 // SrcOffset, DstOffset, Cb,
5954 // WaitList) ->
5955 //
ecl_enqueue_copy_buffer(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])5956 static ERL_NIF_TERM ecl_enqueue_copy_buffer(ErlNifEnv* env, int argc,
5957 const ERL_NIF_TERM argv[])
5958 {
5959 ecl_object_t* o_queue;
5960 cl_mem src_buffer;
5961 cl_mem dst_buffer;
5962 size_t src_offset;
5963 size_t dst_offset;
5964 size_t cb;
5965 cl_event wait_list[MAX_WAIT_LIST];
5966 cl_uint num_events = MAX_WAIT_LIST;
5967 cl_event event;
5968 cl_int err;
5969 UNUSED(argc);
5970
5971 if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
5972 return enif_make_badarg(env);
5973 if (!get_object(env, argv[1], &mem_r, false, (void**)&src_buffer))
5974 return enif_make_badarg(env);
5975 if (!get_object(env, argv[2], &mem_r, false, (void**)&dst_buffer))
5976 return enif_make_badarg(env);
5977 if (!ecl_get_sizet(env, argv[3], &src_offset))
5978 return enif_make_badarg(env);
5979 if (!ecl_get_sizet(env, argv[4], &dst_offset))
5980 return enif_make_badarg(env);
5981 if (!ecl_get_sizet(env, argv[5], &cb))
5982 return enif_make_badarg(env);
5983 if (!get_object_list(env, argv[6], &event_r, false,
5984 (void**) wait_list, &num_events))
5985 return enif_make_badarg(env);
5986 err = ECL_CALL(clEnqueueCopyBuffer)(o_queue->queue,
5987 src_buffer,
5988 dst_buffer,
5989 src_offset,
5990 dst_offset,
5991 cb,
5992 num_events,
5993 num_events ? wait_list : 0,
5994 &event);
5995 if (!err) {
5996 ERL_NIF_TERM t;
5997 t = ecl_make_event(env, event, false, false, 0, 0, o_queue);
5998 return enif_make_tuple2(env, ATOM(ok), t);
5999 }
6000 return ecl_make_error(env, err);
6001 }
6002
6003 //
6004 // enqueue_copy_buffer_rect(_Queue, _SrcBuffer, _DstBuffer,
6005 // SrcOrigin, DstOrigin, Region,
6006 // SrcRowPitch, SrcSlicePitch,
6007 // DstRowPitch, DstSlicePitch,
6008 // WaitList) -> {'ok',Event} | {error,Error}
6009 //
6010 #if CL_VERSION_1_1 == 1
ecl_enqueue_copy_buffer_rect(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])6011 static ERL_NIF_TERM ecl_enqueue_copy_buffer_rect(ErlNifEnv* env, int argc,
6012 const ERL_NIF_TERM argv[])
6013 {
6014 ecl_object_t* o_queue;
6015 cl_mem src_buffer;
6016 cl_mem dst_buffer;
6017 size_t src_origin[3];
6018 size_t dst_origin[3];
6019 size_t region[3];
6020 size_t src_row_pitch;
6021 size_t src_slice_pitch;
6022 size_t dst_row_pitch;
6023 size_t dst_slice_pitch;
6024 cl_event wait_list[MAX_WAIT_LIST];
6025 cl_uint num_events = MAX_WAIT_LIST;
6026 size_t num_src_origin = 3;
6027 size_t num_dst_origin = 3;
6028 size_t num_region = 3;
6029 cl_event event;
6030 ErlNifBinary* bin;
6031 cl_int err;
6032 UNUSED(argc);
6033
6034 if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
6035 return enif_make_badarg(env);
6036 if (!get_object(env, argv[1], &mem_r, false, (void**)&src_buffer))
6037 return enif_make_badarg(env);
6038 if (!get_object(env, argv[2], &mem_r, false, (void**)&dst_buffer))
6039 return enif_make_badarg(env);
6040 src_origin[0] = src_origin[1] = src_origin[2] = 0;
6041 if (!get_sizet_list(env, argv[3], src_origin, &num_src_origin))
6042 return enif_make_badarg(env);
6043 dst_origin[0] = dst_origin[1] = dst_origin[2] = 0;
6044 if (!get_sizet_list(env, argv[4], dst_origin, &num_dst_origin))
6045 return enif_make_badarg(env);
6046 region[0] = region[1] = region[2] = 1;
6047 if (!get_sizet_list(env, argv[5], region, &num_region))
6048 return enif_make_badarg(env);
6049
6050 if (!ecl_get_sizet(env, argv[6], &src_row_pitch))
6051 return enif_make_badarg(env);
6052 if (!ecl_get_sizet(env, argv[7], &src_slice_pitch))
6053 return enif_make_badarg(env);
6054
6055 if (!ecl_get_sizet(env, argv[8], &dst_row_pitch))
6056 return enif_make_badarg(env);
6057 if (!ecl_get_sizet(env, argv[9], &dst_slice_pitch))
6058 return enif_make_badarg(env);
6059
6060 if (!get_object_list(env, argv[10], &event_r, false,
6061 (void**) wait_list, &num_events))
6062 return enif_make_badarg(env);
6063 if (!(bin = enif_alloc(sizeof(ErlNifBinary))))
6064 return ecl_make_error(env, CL_OUT_OF_RESOURCES); // enomem?
6065
6066 err = ECL_CALL(clEnqueueCopyBufferRect)(o_queue->queue,
6067 src_buffer, dst_buffer,
6068 src_origin, dst_origin,
6069 region,
6070 src_row_pitch, src_slice_pitch,
6071 dst_row_pitch, dst_slice_pitch,
6072 num_events,
6073 num_events ? wait_list : 0,
6074 &event);
6075 if (!err) {
6076 ERL_NIF_TERM t;
6077 t = ecl_make_event(env, event, true, false, 0, bin, o_queue);
6078 return enif_make_tuple2(env, ATOM(ok), t);
6079 }
6080 else {
6081 enif_free(bin);
6082 return ecl_make_error(env, err);
6083 }
6084 }
6085 #endif
6086
ecl_enqueue_copy_image(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])6087 static ERL_NIF_TERM ecl_enqueue_copy_image(ErlNifEnv* env, int argc,
6088 const ERL_NIF_TERM argv[])
6089 {
6090 ecl_object_t* o_queue;
6091 cl_mem src_image;
6092 cl_mem dst_image;
6093 size_t src_origin[3];
6094 size_t dst_origin[3];
6095 size_t region[3];
6096 cl_event wait_list[MAX_WAIT_LIST];
6097 cl_uint num_events = MAX_WAIT_LIST;
6098 size_t num_src_origin = 3;
6099 size_t num_dst_origin = 3;
6100 size_t num_region = 3;
6101 cl_event event;
6102 cl_int err;
6103 UNUSED(argc);
6104
6105 if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
6106 return enif_make_badarg(env);
6107 if (!get_object(env, argv[1], &mem_r, false, (void**)&src_image))
6108 return enif_make_badarg(env);
6109 if (!get_object(env, argv[2], &mem_r, false, (void**)&dst_image))
6110 return enif_make_badarg(env);
6111 src_origin[0] = src_origin[1] = src_origin[2] = 0;
6112 if (!get_sizet_list(env, argv[3], src_origin, &num_src_origin))
6113 return enif_make_badarg(env);
6114 dst_origin[0] = dst_origin[1] = dst_origin[2] = 0;
6115 if (!get_sizet_list(env, argv[4], dst_origin, &num_dst_origin))
6116 return enif_make_badarg(env);
6117 region[0] = region[1] = region[2] = 1;
6118 if (!get_sizet_list(env, argv[5], region, &num_region))
6119 return enif_make_badarg(env);
6120 if (!get_object_list(env, argv[6], &event_r, false,
6121 (void**) wait_list, &num_events))
6122 return enif_make_badarg(env);
6123 err = ECL_CALL(clEnqueueCopyImage)(o_queue->queue, src_image, dst_image,
6124 src_origin,
6125 dst_origin,
6126 region,
6127 num_events,
6128 num_events ? wait_list : 0,
6129 &event);
6130 if (!err) {
6131 ERL_NIF_TERM t;
6132 t = ecl_make_event(env, event, false, false, 0, 0, o_queue);
6133 return enif_make_tuple2(env, ATOM(ok), t);
6134 }
6135 return ecl_make_error(env, err);
6136 }
6137
6138 //
6139 // cl:enqueue_fill_image(Queue,Image,FillColor,Origin,Region,WaitList) ->
6140 // FillColor = <<R:32/unsigned,G:32/unsigned,B:32/unsigned,A:32/unsigned>>
6141 // | <<R:32/signed,G:32/signed,B:32/signed,A:32/signed>>
6142 // | <<R:32/float,G:32/float,B:32/float,A:32/float>>
6143 // Use device endian! check device_info(D, endian_little)
6144 //
6145 //
6146 #if CL_VERSION_1_2 == 1
6147
ecl_enqueue_fill_image(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])6148 static ERL_NIF_TERM ecl_enqueue_fill_image(ErlNifEnv* env, int argc,
6149 const ERL_NIF_TERM argv[])
6150 {
6151 ecl_object_t* o_queue;
6152 cl_mem image;
6153 ErlNifBinary fill_color;
6154 size_t origin[3];
6155 size_t region[3];
6156 cl_event wait_list[MAX_WAIT_LIST];
6157 cl_uint num_events = MAX_WAIT_LIST;
6158 size_t num_origin = 3;
6159 size_t num_region = 3;
6160 cl_event event;
6161 cl_int err;
6162 UNUSED(argc);
6163
6164 if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
6165 return enif_make_badarg(env);
6166 if (!get_object(env, argv[1], &mem_r, false, (void**)&image))
6167 return enif_make_badarg(env);
6168 if (!enif_inspect_binary(env, argv[2], &fill_color))
6169 return enif_make_badarg(env);
6170 if (fill_color.size != 4*4)
6171 return enif_make_badarg(env);
6172 origin[0] = origin[1] = origin[2] = 0;
6173 if (!get_sizet_list(env, argv[3], origin, &num_origin))
6174 return enif_make_badarg(env);
6175 region[0] = region[1] = region[2] = 1;
6176 if (!get_sizet_list(env, argv[4], region, &num_region))
6177 return enif_make_badarg(env);
6178 if (!get_object_list(env, argv[5], &event_r, false,
6179 (void**) wait_list, &num_events))
6180 return enif_make_badarg(env);
6181
6182 err = ECL_CALL(clEnqueueFillImage)(o_queue->queue, image,
6183 fill_color.data, // validate size etc!
6184 origin,
6185 region,
6186 num_events,
6187 num_events ? wait_list : 0,
6188 &event);
6189 if (!err) {
6190 ERL_NIF_TERM t;
6191 t = ecl_make_event(env, event, false, false, 0, 0, o_queue);
6192 return enif_make_tuple2(env, ATOM(ok), t);
6193 }
6194 return ecl_make_error(env, err);
6195 }
6196 #endif
6197
6198 // cl:enqueue_copy_image_to_buffer(_Queue, _SrcImage, _DstBuffer,
6199 // _Origin, _Region,
6200 // _DstOffset, _WaitList) ->
ecl_enqueue_copy_image_to_buffer(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])6201 static ERL_NIF_TERM ecl_enqueue_copy_image_to_buffer(ErlNifEnv* env, int argc,
6202 const ERL_NIF_TERM argv[])
6203 {
6204 ecl_object_t* o_queue;
6205 cl_mem src_image;
6206 cl_mem dst_buffer;
6207 size_t origin[3];
6208 size_t region[3];
6209 size_t dst_offset;
6210 cl_event wait_list[MAX_WAIT_LIST];
6211 cl_uint num_events = MAX_WAIT_LIST;
6212 size_t num_src_origin = 3;
6213 size_t num_region = 3;
6214 cl_event event;
6215 cl_int err;
6216 UNUSED(argc);
6217
6218 if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
6219 return enif_make_badarg(env);
6220 if (!get_object(env, argv[1], &mem_r, false, (void**)&src_image))
6221 return enif_make_badarg(env);
6222 if (!get_object(env, argv[2], &mem_r, false, (void**)&dst_buffer))
6223 return enif_make_badarg(env);
6224 origin[0] = origin[1] = origin[2] = 0;
6225 if (!get_sizet_list(env, argv[3], origin, &num_src_origin))
6226 return enif_make_badarg(env);
6227 region[0] = region[1] = region[2] = 1;
6228 if (!get_sizet_list(env, argv[4], region, &num_region))
6229 return enif_make_badarg(env);
6230 if (!ecl_get_sizet(env, argv[5], &dst_offset))
6231 return enif_make_badarg(env);
6232 if (!get_object_list(env, argv[6], &event_r, false,
6233 (void**) wait_list, &num_events))
6234 return enif_make_badarg(env);
6235 err = ECL_CALL(clEnqueueCopyImageToBuffer)(o_queue->queue,
6236 src_image,
6237 dst_buffer,
6238 origin,
6239 region,
6240 dst_offset,
6241 num_events,
6242 num_events ? wait_list : 0,
6243 &event);
6244 if (!err) {
6245 ERL_NIF_TERM t;
6246 t = ecl_make_event(env, event, false, false, 0, 0, o_queue);
6247 return enif_make_tuple2(env, ATOM(ok), t);
6248 }
6249 return ecl_make_error(env, err);
6250 }
6251 //
6252 // cl:enqueue_copy_buffer_to_image(_Queue, _SrcBuffer, _DstImage,
6253 // _SrcOffset, _DstOrigin,
6254 // _Region, _WaitList) ->
6255 //
ecl_enqueue_copy_buffer_to_image(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])6256 static ERL_NIF_TERM ecl_enqueue_copy_buffer_to_image(ErlNifEnv* env, int argc,
6257 const ERL_NIF_TERM argv[])
6258 {
6259 ecl_object_t* o_queue;
6260 cl_mem src_buffer;
6261 cl_mem dst_image;
6262 size_t src_offset;
6263 size_t origin[3];
6264 size_t region[3];
6265 cl_event wait_list[MAX_WAIT_LIST];
6266 cl_uint num_events = MAX_WAIT_LIST;
6267 size_t num_src_origin = 3;
6268 size_t num_region = 3;
6269 cl_event event;
6270 cl_int err;
6271 UNUSED(argc);
6272
6273 if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
6274 return enif_make_badarg(env);
6275 if (!get_object(env, argv[1], &mem_r, false, (void**)&src_buffer))
6276 return enif_make_badarg(env);
6277 if (!get_object(env, argv[2], &mem_r, false, (void**)&dst_image))
6278 return enif_make_badarg(env);
6279 if (!ecl_get_sizet(env, argv[3], &src_offset))
6280 return enif_make_badarg(env);
6281 origin[0] = origin[1] = origin[2] = 0;
6282 if (!get_sizet_list(env, argv[4], origin, &num_src_origin))
6283 return enif_make_badarg(env);
6284 region[0] = region[1] = region[2] = 1;
6285 if (!get_sizet_list(env, argv[5], region, &num_region))
6286 return enif_make_badarg(env);
6287 if (!get_object_list(env, argv[6], &event_r, false,
6288 (void**) wait_list, &num_events))
6289 return enif_make_badarg(env);
6290 err = ECL_CALL(clEnqueueCopyBufferToImage)(o_queue->queue,
6291 src_buffer,
6292 dst_image,
6293 src_offset,
6294 origin,
6295 region,
6296 num_events,
6297 num_events ? wait_list : 0,
6298 &event);
6299 if (!err) {
6300 ERL_NIF_TERM t;
6301 t = ecl_make_event(env, event, false, false, 0, 0, o_queue);
6302 return enif_make_tuple2(env, ATOM(ok), t);
6303 }
6304 return ecl_make_error(env, err);
6305 }
6306
ecl_enqueue_map_buffer(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])6307 static ERL_NIF_TERM ecl_enqueue_map_buffer(ErlNifEnv* env, int argc,
6308 const ERL_NIF_TERM argv[])
6309 {
6310 ecl_object_t* o_queue;
6311 cl_mem buffer;
6312 cl_map_flags map_flags;
6313 size_t offset;
6314 size_t size;
6315 cl_event wait_list[MAX_WAIT_LIST];
6316 cl_uint num_events = MAX_WAIT_LIST;
6317 cl_event event;
6318 cl_int err;
6319 void* ptr;
6320 UNUSED(argc);
6321 UNUSED(ptr);
6322
6323 if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
6324 return enif_make_badarg(env);
6325 if (!get_object(env, argv[1], &mem_r, false, (void**)&buffer))
6326 return enif_make_badarg(env);
6327 if (!get_bitfields(env, argv[2], &map_flags, kv_map_flags))
6328 return enif_make_badarg(env);
6329 if (!ecl_get_sizet(env, argv[3], &offset))
6330 return enif_make_badarg(env);
6331 if (!ecl_get_sizet(env, argv[4], &size))
6332 return enif_make_badarg(env);
6333 if (!get_object_list(env, argv[5], &event_r, false,
6334 (void**) wait_list, &num_events))
6335 return enif_make_badarg(env);
6336
6337 ptr = clEnqueueMapBuffer(o_queue->queue,
6338 buffer,
6339 CL_FALSE,
6340 map_flags,
6341 offset,
6342 size,
6343 num_events,
6344 num_events ? wait_list : 0,
6345 &event,
6346 &err);
6347 if (!err) {
6348 ERL_NIF_TERM t;
6349 // FIXME: how should we handle ptr????
6350 t = ecl_make_event(env, event, false, false, 0, 0, o_queue);
6351 return enif_make_tuple2(env, ATOM(ok), t);
6352 }
6353 return ecl_make_error(env, err);
6354 }
6355
6356 //
6357 // enqueue_map_image(_Queue, _Image, _MapFlags, _Origin, _Region, _WaitList) ->
6358 //
ecl_enqueue_map_image(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])6359 static ERL_NIF_TERM ecl_enqueue_map_image(ErlNifEnv* env, int argc,
6360 const ERL_NIF_TERM argv[])
6361 {
6362 ecl_object_t* o_queue;
6363 cl_mem image;
6364 cl_map_flags map_flags;
6365 size_t origin[3];
6366 size_t region[3];
6367 size_t row_pitch;
6368 size_t slice_pitch;
6369 cl_event wait_list[MAX_WAIT_LIST];
6370 cl_uint num_events = MAX_WAIT_LIST;
6371 size_t num_origin = 3;
6372 size_t num_region = 3;
6373 cl_event event;
6374 cl_int err;
6375 void* ptr;
6376 UNUSED(argc);
6377 UNUSED(ptr);
6378
6379 if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
6380 return enif_make_badarg(env);
6381 if (!get_object(env, argv[1], &mem_r, false, (void**)&image))
6382 return enif_make_badarg(env);
6383 if (!get_bitfields(env, argv[2], &map_flags, kv_map_flags))
6384 return enif_make_badarg(env);
6385 origin[0] = origin[1] = origin[2] = 0;
6386 if (!get_sizet_list(env, argv[3], origin, &num_origin))
6387 return enif_make_badarg(env);
6388 region[0] = region[1] = region[2] = 1;
6389 if (!get_sizet_list(env, argv[4], region, &num_region))
6390 return enif_make_badarg(env);
6391 if (!get_object_list(env, argv[5], &event_r, false,
6392 (void**) wait_list, &num_events))
6393 return enif_make_badarg(env);
6394
6395 ptr = clEnqueueMapImage(o_queue->queue,
6396 image,
6397 CL_FALSE,
6398 map_flags,
6399 origin,
6400 region,
6401 &row_pitch,
6402 &slice_pitch,
6403 num_events,
6404 num_events ? wait_list : 0,
6405 &event,
6406 &err);
6407 if (!err) {
6408 ERL_NIF_TERM t;
6409 // FIXME: send binary+event to event thread
6410 t = ecl_make_event(env, event, false, false, 0, 0, o_queue);
6411 return enif_make_tuple2(env, ATOM(ok), t);
6412 }
6413 return ecl_make_error(env, err);
6414 }
6415
6416 //
6417 // enqueue_unmap_mem_object(_Queue, _Mem, _WaitList) ->
6418 //
6419 //
ecl_enqueue_unmap_mem_object(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])6420 static ERL_NIF_TERM ecl_enqueue_unmap_mem_object(ErlNifEnv* env, int argc,
6421 const ERL_NIF_TERM argv[])
6422 {
6423 ecl_object_t* o_queue;
6424 cl_mem memobj;
6425 cl_event wait_list[MAX_WAIT_LIST];
6426 cl_uint num_events = MAX_WAIT_LIST;
6427 cl_event event;
6428 void* mapped_ptr;
6429 cl_int err;
6430 UNUSED(argc);
6431
6432 if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
6433 return enif_make_badarg(env);
6434 if (!get_object(env, argv[1], &mem_r, false, (void**)&memobj))
6435 return enif_make_badarg(env);
6436 if (!get_object_list(env, argv[3], &event_r, false,
6437 (void**) wait_list, &num_events))
6438 return enif_make_badarg(env);
6439 mapped_ptr = 0; // FIXME!!!!
6440
6441 err = ECL_CALL(clEnqueueUnmapMemObject)(o_queue->queue, memobj,
6442 mapped_ptr,
6443 num_events,
6444 num_events ? wait_list : 0,
6445 &event);
6446 if (!err) {
6447 ERL_NIF_TERM t;
6448 t = ecl_make_event(env, event, false, false, 0, 0, o_queue);
6449 return enif_make_tuple2(env, ATOM(ok), t);
6450 }
6451 return ecl_make_error(env, err);
6452 }
6453
6454 #if CL_VERSION_1_2 == 1
6455 //
ecl_enqueue_migrate_mem_objects(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])6456 static ERL_NIF_TERM ecl_enqueue_migrate_mem_objects(ErlNifEnv* env, int argc,
6457 const ERL_NIF_TERM argv[])
6458 {
6459 ecl_object_t* o_queue;
6460 cl_uint num_mem_objects = MAX_MEM_OBJECTS;
6461 cl_mem mem_objects[MAX_MEM_OBJECTS];
6462 cl_mem_migration_flags flags = 0;
6463 cl_event wait_list[MAX_WAIT_LIST];
6464 cl_uint num_events = MAX_WAIT_LIST;
6465 cl_event event;
6466 cl_int err;
6467 UNUSED(argc);
6468
6469 if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
6470 return enif_make_badarg(env);
6471 if (!get_object_list(env, argv[1], &mem_r, false,
6472 (void**) mem_objects, &num_mem_objects))
6473 return enif_make_badarg(env);
6474 if (!get_bitfields(env, argv[2], &flags, kv_migration_flags))
6475 return enif_make_badarg(env);
6476
6477 if (!get_object_list(env, argv[3], &event_r, false,
6478 (void**) wait_list, &num_events))
6479 return enif_make_badarg(env);
6480
6481 err = ECL_CALL(clEnqueueMigrateMemObjects)(o_queue->queue,
6482 num_mem_objects,
6483 num_mem_objects ? mem_objects : NULL,
6484 flags,
6485 num_events,
6486 num_events ? wait_list : 0,
6487 &event);
6488 if (!err) {
6489 ERL_NIF_TERM t;
6490 t = ecl_make_event(env, event, false, false, 0, 0, o_queue);
6491 return enif_make_tuple2(env, ATOM(ok), t);
6492 }
6493 return ecl_make_error(env, err);
6494 }
6495 #endif
6496
eclEnqueueBarrier(cl_command_queue queue)6497 cl_int eclEnqueueBarrier(cl_command_queue queue)
6498 {
6499 return ECL_CALL(clEnqueueBarrierWithWaitList)(queue,0,NULL,NULL);
6500 }
6501
ecl_enqueue_barrier(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])6502 static ERL_NIF_TERM ecl_enqueue_barrier(ErlNifEnv* env, int argc,
6503 const ERL_NIF_TERM argv[])
6504 {
6505 ecl_object_t* o_queue;
6506 cl_int err;
6507 UNUSED(argc);
6508
6509 if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
6510 return enif_make_badarg(env);
6511 if(o_queue->version >= 12) {
6512 if (!(err = eclEnqueueBarrier(o_queue->queue))) {
6513 return ATOM(ok);
6514 }
6515 } else { // deprecated in 1.2, available in 1.1
6516 if (!(err = ECL_CALL(clEnqueueBarrier)(o_queue->queue))) {
6517 return ATOM(ok);
6518 }
6519 }
6520 return ecl_make_error(env, err);
6521 }
6522
6523 #if CL_VERSION_1_2 == 1
6524 //
6525 // cl:enqueue_barrier_with_wait_list(Queue::cl_queue(),
6526 // WaitList::[cl_event()]) ->
6527 // {'ok',cl_event()} | {'error', cl_error()}
6528 //
ecl_enqueue_barrier_with_wait_list(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])6529 static ERL_NIF_TERM ecl_enqueue_barrier_with_wait_list(ErlNifEnv* env,
6530 int argc,
6531 const ERL_NIF_TERM argv[])
6532 {
6533 ecl_object_t* o_queue;
6534 cl_event wait_list[MAX_WAIT_LIST];
6535 cl_uint num_events = MAX_WAIT_LIST;
6536 cl_event event;
6537 cl_int err;
6538 cl_bool want_event = true; // make this an arg?
6539 UNUSED(argc);
6540
6541 if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
6542 return enif_make_badarg(env);
6543 if (!get_object_list(env, argv[1], &event_r, false,
6544 (void**) wait_list, &num_events))
6545 return enif_make_badarg(env);
6546 err = ECL_CALL(clEnqueueBarrierWithWaitList)(o_queue->queue,num_events,
6547 num_events ? wait_list : NULL,
6548 want_event ? &event : NULL );
6549 if (!err) {
6550 if (want_event) {
6551 ERL_NIF_TERM t;
6552 t = ecl_make_event(env, event, false, false, 0, 0, o_queue);
6553 return enif_make_tuple2(env, ATOM(ok), t);
6554 }
6555 return ATOM(ok);
6556 }
6557 return ecl_make_error(env, err);
6558 }
6559
6560 //
6561 // cl:enqueue_marker_with_wait_list(Queue::cl_queue(),
6562 // WaitList::[cl_event()]) ->
6563 // {'ok',cl_event()} | {'error', cl_error()}
6564 //
ecl_enqueue_marker_with_wait_list(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])6565 static ERL_NIF_TERM ecl_enqueue_marker_with_wait_list(ErlNifEnv* env,
6566 int argc,
6567 const ERL_NIF_TERM argv[])
6568 {
6569 ecl_object_t* o_queue;
6570 cl_event wait_list[MAX_WAIT_LIST];
6571 cl_uint num_events = MAX_WAIT_LIST;
6572 cl_int err;
6573 cl_event event;
6574 cl_bool want_event = true; // make this an arg?
6575 UNUSED(argc);
6576
6577 if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
6578 return enif_make_badarg(env);
6579 if (!get_object_list(env, argv[1], &event_r, false,
6580 (void**) wait_list, &num_events))
6581 return enif_make_badarg(env);
6582 err = ECL_CALL(clEnqueueMarkerWithWaitList)(o_queue->queue,num_events,
6583 num_events ? wait_list : NULL,
6584 want_event ? &event : NULL );
6585 if (!err) {
6586 if (want_event) {
6587 ERL_NIF_TERM t;
6588 t = ecl_make_event(env, event, false, false, 0, 0, o_queue);
6589 return enif_make_tuple2(env, ATOM(ok), t);
6590 }
6591 return ATOM(ok);
6592 }
6593 return ecl_make_error(env, err);
6594 }
6595 #endif
6596
6597
6598 //
6599 // cl:async_flush(Queue::cl_queue()) -> reference()
6600 //
ecl_async_flush(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])6601 static ERL_NIF_TERM ecl_async_flush(ErlNifEnv* env, int argc,
6602 const ERL_NIF_TERM argv[])
6603 {
6604 ecl_object_t* o_queue;
6605 ecl_context_t* o_context;
6606 ecl_message_t m;
6607 ERL_NIF_TERM ref;
6608 UNUSED(argc);
6609
6610 if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
6611 return enif_make_badarg(env);
6612 if (!(o_context = (ecl_context_t*) o_queue->parent)) // must have context
6613 return enif_make_badarg(env);
6614 if (!(m.env = enif_alloc_env()))
6615 return ecl_make_error(env, CL_OUT_OF_RESOURCES); // enomem?
6616 ref = enif_make_ref(env);
6617
6618 m.type = ECL_MESSAGE_FLUSH;
6619 (void) enif_self(env, &m.sender);
6620 m.ref = enif_make_copy(m.env, ref);
6621 m.queue = o_queue;
6622 // keep while operation is running, release after operation in the thread
6623 enif_keep_resource(o_queue);
6624 ecl_message_send(o_context->thr, &m);
6625 return enif_make_tuple2(env, ATOM(ok), ref);
6626 }
6627
6628 //
6629 // cl:async_finish(Queue::cl_queue()) -> reference()
6630 //
ecl_async_finish(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])6631 static ERL_NIF_TERM ecl_async_finish(ErlNifEnv* env, int argc,
6632 const ERL_NIF_TERM argv[])
6633 {
6634 ecl_object_t* o_queue;
6635 ecl_context_t* o_context;
6636 ecl_message_t m;
6637 ERL_NIF_TERM ref;
6638 UNUSED(argc);
6639
6640 if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))
6641 return enif_make_badarg(env);
6642 if (!(o_context = (ecl_context_t*) o_queue->parent)) // must have context
6643 return enif_make_badarg(env);
6644 if (!(m.env = enif_alloc_env()))
6645 return ecl_make_error(env, CL_OUT_OF_RESOURCES); // enomem?
6646 ref = enif_make_ref(env);
6647
6648 m.type = ECL_MESSAGE_FINISH;
6649 (void) enif_self(env, &m.sender);
6650 m.ref = enif_make_copy(m.env, ref);
6651 m.queue = o_queue;
6652 // keep while operation is running, release after operation in the thread
6653 enif_keep_resource(o_queue);
6654 ecl_message_send(o_context->thr, &m);
6655 return enif_make_tuple2(env, ATOM(ok), ref);
6656 }
6657 //
6658 // cl:async_wait_for_event(Event) -> {ok,Ref} | {error,Reason}
6659 // async reply {cl_event, Ref, Result}
6660 //
ecl_async_wait_for_event(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])6661 static ERL_NIF_TERM ecl_async_wait_for_event(ErlNifEnv* env, int argc,
6662 const ERL_NIF_TERM argv[])
6663 {
6664 ecl_event_t* o_event;
6665 ecl_object_t* o_queue;
6666 ecl_context_t* o_context;
6667 ecl_message_t m;
6668 ERL_NIF_TERM ref;
6669 UNUSED(argc);
6670
6671 if (!get_ecl_object(env, argv[0],&event_r,false,(ecl_object_t**)&o_event))
6672 return enif_make_badarg(env);
6673 if (!(o_queue = o_event->obj.parent)) // queue not found !
6674 return enif_make_badarg(env);
6675 if (!(o_context = (ecl_context_t*) o_queue->parent)) // must have context
6676 return enif_make_badarg(env);
6677 if (!(m.env = enif_alloc_env()))
6678 return ecl_make_error(env, CL_OUT_OF_RESOURCES); // enomem?
6679 ref = enif_make_ref(env);
6680
6681 m.type = ECL_MESSAGE_WAIT_FOR_EVENT;
6682 (void) enif_self(env, &m.sender);
6683 m.ref = enif_make_copy(m.env, ref);
6684 m.event = o_event;
6685 // keep while operation is running, release after operation in the thread
6686 enif_keep_resource(o_event);
6687 ecl_message_send(o_context->thr, &m);
6688 return enif_make_tuple2(env, ATOM(ok), ref);
6689 }
6690
6691 // return event info
ecl_get_event_info(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])6692 static ERL_NIF_TERM ecl_get_event_info(ErlNifEnv* env, int argc,
6693 const ERL_NIF_TERM argv[])
6694 {
6695 ecl_object_t* o_event;
6696 UNUSED(argc);
6697
6698 if (!get_ecl_object(env, argv[0], &event_r, false, &o_event))
6699 return enif_make_badarg(env);
6700 return make_object_info(env, argv[1], o_event,
6701 (info_fn_t*) clGetEventInfo,
6702 event_info,
6703 sizeof_array(event_info));
6704 }
6705
get_version(char * version)6706 static cl_uint get_version(char *version)
6707 {
6708 cl_uint ver = 0;
6709 version += 7;
6710 if(*version >= 48 && *version <= 57)
6711 ver += (*version-48)*10;
6712 version++;
6713 if(*version == 46) {
6714 version++;
6715 if(*version >= 48 && *version <= 57)
6716 ver += (*version-48);
6717 }
6718 /* fprintf(stderr, "V3 %s %d\r\n", version, ver); */
6719 return ver;
6720 }
6721
6722 // pre-Load Platform Ids and Device Ids, this will make the
6723 // internal IDs kind of static for the application code. The IDs
6724 // can then be used in matching etc.
6725
ecl_pre_load(ErlNifEnv * env,ecl_env_t * ecl,cl_int * rerr)6726 static int ecl_pre_load(ErlNifEnv* env, ecl_env_t* ecl, cl_int* rerr)
6727 {
6728 cl_platform_id platform_id[MAX_PLATFORMS];
6729 cl_uint num_platforms;
6730 cl_uint i;
6731 cl_int err;
6732
6733 if ((err = ECL_CALL(clGetPlatformIDs)
6734 (MAX_PLATFORMS, platform_id, &num_platforms))) {
6735 *rerr = err;
6736 return -1;
6737 }
6738
6739 ecl->platform = enif_alloc(num_platforms*sizeof(ecl_platform_t*));
6740 ecl->nplatforms = num_platforms;
6741 ecl->icd_version = 11;
6742
6743 for (i = 0; i < num_platforms; i++) {
6744 ecl_object_t* obj;
6745 cl_device_id device_id[MAX_DEVICES];
6746 cl_uint num_devices;
6747 cl_uint j;
6748 char version[128];
6749 cl_int ver = -1;
6750
6751 if(CL_SUCCESS == ECL_CALL(clGetPlatformInfo)
6752 (platform_id[i], CL_PLATFORM_VERSION, 64, version, NULL)) {
6753 if((ver = get_version(version)) > ecl->icd_version)
6754 ecl->icd_version = ver;
6755 }
6756 obj = ecl_new(env, &platform_r,platform_id[i],0,ver);
6757 ecl->platform[i].o_platform = obj;
6758
6759 if ((err = ECL_CALL(clGetDeviceIDs)
6760 (platform_id[i], CL_DEVICE_TYPE_ALL,
6761 MAX_DEVICES, device_id, &num_devices))) {
6762 *rerr = err;
6763 return -1;
6764 }
6765 ecl->platform[i].o_device=enif_alloc(num_devices*sizeof(ecl_object_t));
6766 ecl->platform[i].ndevices = num_devices;
6767 for (j = 0; j < num_devices; j++) {
6768 ver = ecl->icd_version;
6769 if(CL_SUCCESS == ECL_CALL(clGetDeviceInfo)
6770 (device_id[j], CL_DEVICE_VERSION, 64, version, NULL)) {
6771 ver = get_version(version);
6772 }
6773 obj = ecl_new(env, &device_r, device_id[j],0, ver);
6774 ecl->platform[i].o_device[j] = obj;
6775 }
6776 }
6777
6778 return 0;
6779 }
6780
ecl_load(ErlNifEnv * env,void ** priv_data,ERL_NIF_TERM load_info)6781 static int ecl_load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info)
6782 {
6783 ErlNifResourceFlags tried;
6784 ecl_env_t* ecl;
6785 cl_int err;
6786 lhash_func_t func = { ref_hash, ref_cmp, ref_release, 0 };
6787 UNUSED(env);
6788 UNUSED(load_info);
6789
6790 DBG("ecl_load");
6791
6792 if (!(ecl = enif_alloc(sizeof(ecl_env_t))))
6793 return -1;
6794 ecl->ref_count = 1;
6795 ecl->context_list = NULL;
6796
6797 if (!(ecl->ref_lock = enif_rwlock_create("ref_lock")))
6798 return -1;
6799 if (!(ecl->context_list_lock = enif_rwlock_create("context_list_lock")))
6800 return -1;
6801 if (ecl_queue_init(&ecl->q) < 0)
6802 return -1;
6803 lhash_init(&ecl->ref, "ref", 2, &func);
6804
6805 DBG("ecl_load: ecl=%p", ecl);
6806 DBG("ecl_load: ecl->context_list_lock=%p", ecl->context_list_lock);
6807
6808 // load OpenCL functions
6809 if (ecl_load_dynfunctions(ecl) < 0)
6810 return -1;
6811
6812 // Load atoms
6813
6814 // General atoms
6815 LOAD_ATOM(ok);
6816 LOAD_ATOM(error);
6817 LOAD_ATOM(unknown);
6818 LOAD_ATOM(undefined);
6819 LOAD_ATOM(true);
6820 LOAD_ATOM(false);
6821
6822 // async messages
6823 LOAD_ATOM(cl_async);
6824 LOAD_ATOM(cl_event);
6825
6826 // Type names
6827 LOAD_ATOM(platform_t);
6828 LOAD_ATOM(device_t);
6829 LOAD_ATOM(context_t);
6830 LOAD_ATOM(command_queue_t);
6831 LOAD_ATOM(mem_t);
6832 LOAD_ATOM(sampler_t);
6833 LOAD_ATOM(program_t);
6834 LOAD_ATOM(kernel_t);
6835 LOAD_ATOM(event_t);
6836
6837 LOAD_ATOM(char);
6838 LOAD_ATOM(char2);
6839 LOAD_ATOM(char4);
6840 LOAD_ATOM(char8);
6841 LOAD_ATOM(char16);
6842
6843 LOAD_ATOM(uchar);
6844 LOAD_ATOM(uchar2);
6845 LOAD_ATOM(uchar4);
6846 LOAD_ATOM(uchar8);
6847 LOAD_ATOM(uchar16);
6848
6849 LOAD_ATOM(short);
6850 LOAD_ATOM(short2);
6851 LOAD_ATOM(short4);
6852 LOAD_ATOM(short8);
6853 LOAD_ATOM(short16);
6854
6855 LOAD_ATOM(ushort);
6856 LOAD_ATOM(ushort2);
6857 LOAD_ATOM(ushort4);
6858 LOAD_ATOM(ushort8);
6859 LOAD_ATOM(ushort16);
6860
6861 LOAD_ATOM(int);
6862 LOAD_ATOM(int2);
6863 LOAD_ATOM(int4);
6864 LOAD_ATOM(int8);
6865 LOAD_ATOM(int16);
6866
6867 LOAD_ATOM(uint);
6868 LOAD_ATOM(uint2);
6869 LOAD_ATOM(uint4);
6870 LOAD_ATOM(uint8);
6871 LOAD_ATOM(uint16);
6872
6873 LOAD_ATOM(long);
6874 LOAD_ATOM(long2);
6875 LOAD_ATOM(long4);
6876 LOAD_ATOM(long8);
6877 LOAD_ATOM(long16);
6878
6879 LOAD_ATOM(ulong);
6880 LOAD_ATOM(ulong2);
6881 LOAD_ATOM(ulong4);
6882 LOAD_ATOM(ulong8);
6883 LOAD_ATOM(ulong16);
6884
6885 LOAD_ATOM(half);
6886
6887 LOAD_ATOM(float);
6888 LOAD_ATOM(float2);
6889 LOAD_ATOM(float4);
6890 LOAD_ATOM(float8);
6891 LOAD_ATOM(float16);
6892
6893 LOAD_ATOM(double);
6894 LOAD_ATOM(double2);
6895 LOAD_ATOM(double4);
6896 LOAD_ATOM(double8);
6897 LOAD_ATOM(double16);
6898
6899 // records
6900 LOAD_ATOM(cl_image_desc);
6901 LOAD_ATOM(cl_image_format);
6902
6903 // channel type
6904 LOAD_ATOM(snorm_int8);
6905 LOAD_ATOM(snorm_int16);
6906 LOAD_ATOM(unorm_int8);
6907 LOAD_ATOM(unorm_int16);
6908 LOAD_ATOM(unorm_int24);
6909 LOAD_ATOM(unorm_short_565);
6910 LOAD_ATOM(unorm_short_555);
6911 LOAD_ATOM(unorm_int_101010);
6912 LOAD_ATOM(signed_int8);
6913 LOAD_ATOM(signed_int16);
6914 LOAD_ATOM(signed_int32);
6915 LOAD_ATOM(unsigned_int8);
6916 LOAD_ATOM(unsigned_int16);
6917 LOAD_ATOM(unsigned_int32);
6918 LOAD_ATOM(half_float);
6919
6920 // channel order
6921 LOAD_ATOM(r);
6922 LOAD_ATOM(a);
6923 LOAD_ATOM(rg);
6924 LOAD_ATOM(ra);
6925 LOAD_ATOM(rgb);
6926 LOAD_ATOM(rgba);
6927 LOAD_ATOM(bgra);
6928 LOAD_ATOM(argb);
6929 LOAD_ATOM(intensity);
6930 LOAD_ATOM(luminance);
6931 LOAD_ATOM(rx);
6932 LOAD_ATOM(rgx);
6933 LOAD_ATOM(rgbx);
6934 LOAD_ATOM(depth);
6935 LOAD_ATOM(depth_stencil);
6936
6937 // partition_property
6938 LOAD_ATOM(equally);
6939 LOAD_ATOM(by_counts);
6940 LOAD_ATOM(by_counts_list_end);
6941 LOAD_ATOM(by_affinity_domain);
6942
6943 // affinity_domain
6944 LOAD_ATOM(numa);
6945 LOAD_ATOM(l4_cache);
6946 LOAD_ATOM(l3_cache);
6947 LOAD_ATOM(l2_cache);
6948 LOAD_ATOM(l1_cache);
6949 LOAD_ATOM(next_partitionable);
6950
6951 // Load options & flags
6952
6953 // Device info
6954 LOAD_ATOM(type);
6955 LOAD_ATOM(vendor_id);
6956 LOAD_ATOM(max_compute_units);
6957 LOAD_ATOM(max_work_item_dimensions);
6958 LOAD_ATOM(max_work_group_size);
6959 LOAD_ATOM(max_work_item_sizes);
6960 LOAD_ATOM(preferred_vector_width_char);
6961 LOAD_ATOM(preferred_vector_width_short);
6962 LOAD_ATOM(preferred_vector_width_int);
6963 LOAD_ATOM(preferred_vector_width_long);
6964 LOAD_ATOM(preferred_vector_width_float);
6965 LOAD_ATOM(preferred_vector_width_double);
6966 LOAD_ATOM(max_clock_frequency);
6967 LOAD_ATOM(address_bits);
6968 LOAD_ATOM(max_read_image_args);
6969 LOAD_ATOM(max_write_image_args);
6970 LOAD_ATOM(max_mem_alloc_size);
6971 LOAD_ATOM(image2d_max_width);
6972 LOAD_ATOM(image2d_max_height);
6973 LOAD_ATOM(image3d_max_width);
6974 LOAD_ATOM(image3d_max_height);
6975 LOAD_ATOM(image3d_max_depth);
6976 LOAD_ATOM(image_support);
6977 LOAD_ATOM(max_parameter_size);
6978 LOAD_ATOM(max_samplers);
6979 LOAD_ATOM(mem_base_addr_align);
6980 LOAD_ATOM(min_data_type_align_size);
6981 LOAD_ATOM(single_fp_config);
6982 LOAD_ATOM(global_mem_cache_type);
6983 LOAD_ATOM(global_mem_cacheline_size);
6984 LOAD_ATOM(global_mem_cache_size);
6985 LOAD_ATOM(global_mem_size);
6986 LOAD_ATOM(max_constant_buffer_size);
6987 LOAD_ATOM(max_constant_args);
6988 LOAD_ATOM(local_mem_type);
6989 LOAD_ATOM(local_mem_size);
6990 LOAD_ATOM(error_correction_support);
6991 LOAD_ATOM(profiling_timer_resolution);
6992 LOAD_ATOM(endian_little);
6993 LOAD_ATOM(available);
6994 LOAD_ATOM(compiler_available);
6995 LOAD_ATOM(execution_capabilities);
6996 LOAD_ATOM(queue_properties);
6997 LOAD_ATOM(name);
6998 LOAD_ATOM(vendor);
6999 LOAD_ATOM(driver_version);
7000 LOAD_ATOM(profile);
7001 LOAD_ATOM(version);
7002 LOAD_ATOM(extensions);
7003 LOAD_ATOM(platform);
7004
7005 LOAD_ATOM(double_fp_config);
7006 LOAD_ATOM(half_fp_config);
7007 LOAD_ATOM(preferred_vector_width_half);
7008 LOAD_ATOM(host_unified_memory);
7009 LOAD_ATOM(native_vector_width_char);
7010 LOAD_ATOM(native_vector_width_short);
7011 LOAD_ATOM(native_vector_width_int);
7012 LOAD_ATOM(native_vector_width_long);
7013 LOAD_ATOM(native_vector_width_float);
7014 LOAD_ATOM(native_vector_width_double);
7015 LOAD_ATOM(native_vector_width_half);
7016 LOAD_ATOM(opencl_c_version);
7017 LOAD_ATOM(linker_available);
7018 LOAD_ATOM(built_in_kernels);
7019 LOAD_ATOM(image_max_buffer_size);
7020 LOAD_ATOM(image_max_array_size);
7021 LOAD_ATOM(parent_device);
7022 LOAD_ATOM(partition_max_sub_devices);
7023 LOAD_ATOM(partition_properties);
7024 LOAD_ATOM(partition_affinity_domain);
7025 LOAD_ATOM(partition_type);
7026 LOAD_ATOM(reference_count);
7027 LOAD_ATOM(preferred_interop_user_sync);
7028 LOAD_ATOM(printf_buffer_size);
7029 LOAD_ATOM(image_pitch_alignment);
7030 LOAD_ATOM(image_base_address_alignment);
7031 // cl_nv_device_attribute_query extension
7032 LOAD_ATOM(compute_capability_major_nv);
7033 LOAD_ATOM(compute_capability_minor_nv);
7034 LOAD_ATOM(registers_per_block_nv);
7035 LOAD_ATOM(warp_size_nv);
7036 LOAD_ATOM(gpu_overlap_nv);
7037 LOAD_ATOM(kernel_exec_timeout_nv);
7038 LOAD_ATOM(device_integrated_memory_nv);
7039
7040 // Platform info
7041 LOAD_ATOM(profile);
7042 LOAD_ATOM(version);
7043 LOAD_ATOM(name);
7044 LOAD_ATOM(vendor);
7045 LOAD_ATOM(extensions);
7046
7047 // Context info
7048 LOAD_ATOM(reference_count);
7049 LOAD_ATOM(devices);
7050 LOAD_ATOM(properties);
7051
7052 // Queue info
7053 LOAD_ATOM(context);
7054 LOAD_ATOM(num_devices);
7055 LOAD_ATOM(device);
7056 LOAD_ATOM(reference_count);
7057 LOAD_ATOM(properties);
7058
7059 // Mem info
7060 LOAD_ATOM(object_type);
7061 LOAD_ATOM(flags);
7062 LOAD_ATOM(size);
7063 LOAD_ATOM(host_ptr);
7064 LOAD_ATOM(map_count);
7065 LOAD_ATOM(reference_count);
7066 LOAD_ATOM(context);
7067
7068 // Image info
7069 LOAD_ATOM(format);
7070 LOAD_ATOM(element_size);
7071 LOAD_ATOM(row_pitch);
7072 LOAD_ATOM(slice_pitch);
7073 LOAD_ATOM(width);
7074 LOAD_ATOM(height);
7075 LOAD_ATOM(depth);
7076
7077 // Sampler info
7078 LOAD_ATOM(reference_count);
7079 LOAD_ATOM(context);
7080 LOAD_ATOM(normalized_coords);
7081 LOAD_ATOM(addressing_mode);
7082 LOAD_ATOM(filter_mode);
7083
7084 // Program info
7085 LOAD_ATOM(reference_count);
7086 LOAD_ATOM(context);
7087 LOAD_ATOM(num_decices);
7088 LOAD_ATOM(devices);
7089 LOAD_ATOM(source);
7090 LOAD_ATOM(binary_sizes);
7091 LOAD_ATOM(binaries);
7092
7093 // Build Info
7094 LOAD_ATOM(status);
7095 LOAD_ATOM(options);
7096 LOAD_ATOM(log);
7097 LOAD_ATOM(binary_type);
7098
7099 // Kernel Info
7100 LOAD_ATOM(function_name);
7101 LOAD_ATOM(num_args);
7102 LOAD_ATOM(reference_count);
7103 LOAD_ATOM(context);
7104 LOAD_ATOM(program);
7105
7106 // Event Info
7107 LOAD_ATOM(command_queue);
7108 LOAD_ATOM(command_type);
7109 LOAD_ATOM(reference_count);
7110 LOAD_ATOM(execution_status);
7111
7112 // Workgroup info
7113 LOAD_ATOM(work_group_size);
7114 LOAD_ATOM(compile_work_group_size);
7115 LOAD_ATOM(local_mem_size);
7116 LOAD_ATOM(preferred_work_group_size_multiple);
7117 LOAD_ATOM(private_mem_size);
7118 LOAD_ATOM(global_work_size);
7119
7120 // Error codes
7121 LOAD_ATOM(device_not_found);
7122 LOAD_ATOM(device_not_available);
7123 LOAD_ATOM(compiler_not_available);
7124 LOAD_ATOM(mem_object_allocation_failure);
7125 LOAD_ATOM(out_of_resources);
7126 LOAD_ATOM(out_of_host_memory);
7127 LOAD_ATOM(profiling_info_not_available);
7128 LOAD_ATOM(mem_copy_overlap);
7129 LOAD_ATOM(image_format_mismatch);
7130 LOAD_ATOM(image_format_not_supported);
7131 LOAD_ATOM(build_program_failure);
7132 LOAD_ATOM(map_failure);
7133 LOAD_ATOM(invalid_value);
7134 LOAD_ATOM(invalid_device_type);
7135 LOAD_ATOM(invalid_platform);
7136 LOAD_ATOM(invalid_device);
7137 LOAD_ATOM(invalid_context);
7138 LOAD_ATOM(invalid_queue_properties);
7139 LOAD_ATOM(invalid_command_queue);
7140 LOAD_ATOM(invalid_host_ptr);
7141 LOAD_ATOM(invalid_mem_object);
7142 LOAD_ATOM(invalid_image_format_descriptor);
7143 LOAD_ATOM(invalid_image_size);
7144 LOAD_ATOM(invalid_sampler);
7145 LOAD_ATOM(invalid_binary);
7146 LOAD_ATOM(invalid_build_options);
7147 LOAD_ATOM(invalid_program);
7148 LOAD_ATOM(invalid_program_executable);
7149 LOAD_ATOM(invalid_kernel_name);
7150 LOAD_ATOM(invalid_kernel_definition);
7151 LOAD_ATOM(invalid_kernel);
7152 LOAD_ATOM(invalid_arg_index);
7153 LOAD_ATOM(invalid_arg_value);
7154 LOAD_ATOM(invalid_arg_size);
7155 LOAD_ATOM(invalid_kernel_args);
7156 LOAD_ATOM(invalid_work_dimension);
7157 LOAD_ATOM(invalid_work_group_size);
7158 LOAD_ATOM(invalid_work_item_size);
7159 LOAD_ATOM(invalid_global_offset);
7160 LOAD_ATOM(invalid_event_wait_list);
7161 LOAD_ATOM(invalid_event);
7162 LOAD_ATOM(invalid_operation);
7163 LOAD_ATOM(invalid_gl_object);
7164 LOAD_ATOM(invalid_buffer_size);
7165 LOAD_ATOM(invalid_mip_level);
7166 LOAD_ATOM(invalid_global_work_size);
7167
7168 // cl_device_type
7169 LOAD_ATOM(all);
7170 LOAD_ATOM(default);
7171 LOAD_ATOM(cpu);
7172 LOAD_ATOM(gpu);
7173 LOAD_ATOM(accelerator);
7174 LOAD_ATOM(custom);
7175
7176 // fp_config
7177 LOAD_ATOM(denorm);
7178 LOAD_ATOM(inf_nan);
7179 LOAD_ATOM(round_to_nearest);
7180 LOAD_ATOM(round_to_zero);
7181 LOAD_ATOM(round_to_inf);
7182 LOAD_ATOM(fma);
7183 LOAD_ATOM(soft_float);
7184 LOAD_ATOM(correctly_rounded_divide_sqrt);
7185
7186 // mem_cache_type
7187 LOAD_ATOM(none);
7188 LOAD_ATOM(read_only);
7189 LOAD_ATOM(read_write);
7190
7191 // local_mem_type
7192 LOAD_ATOM(local);
7193 LOAD_ATOM(global);
7194
7195 // exec capability
7196 LOAD_ATOM(kernel);
7197 LOAD_ATOM(native_kernel);
7198
7199 // command_queue_properties
7200 LOAD_ATOM(out_of_order_exec_mode_enable);
7201 LOAD_ATOM(profiling_enable);
7202
7203 // mem_flags
7204 LOAD_ATOM(read_write);
7205 LOAD_ATOM(write_only);
7206 LOAD_ATOM(read_only);
7207 LOAD_ATOM(use_host_ptr);
7208 LOAD_ATOM(alloc_host_ptr);
7209 LOAD_ATOM(copy_host_ptr);
7210
7211 // migration_flags
7212 LOAD_ATOM(host);
7213 LOAD_ATOM(content_undefined);
7214
7215 // mem_object_type
7216 LOAD_ATOM(buffer);
7217 LOAD_ATOM(image2d);
7218 LOAD_ATOM(image3d);
7219 LOAD_ATOM(image2d_array);
7220 LOAD_ATOM(image1d);
7221 LOAD_ATOM(image1d_array);
7222 LOAD_ATOM(image1d_buffer);
7223
7224 // addressing_mode
7225 LOAD_ATOM(none);
7226 LOAD_ATOM(clamp_to_edge);
7227 LOAD_ATOM(clamp);
7228 LOAD_ATOM(repeat);
7229
7230 // filter_mode
7231 LOAD_ATOM(nearest);
7232 LOAD_ATOM(linear);
7233
7234 // map_flags
7235 LOAD_ATOM(read);
7236 LOAD_ATOM(write);
7237
7238 // build_status
7239 LOAD_ATOM(success);
7240 LOAD_ATOM(none);
7241 LOAD_ATOM(error);
7242 LOAD_ATOM(in_progress);
7243
7244 // program_binary_type
7245 LOAD_ATOM(none);
7246 LOAD_ATOM(compiled_object);
7247 LOAD_ATOM(library);
7248 LOAD_ATOM(executable);
7249
7250 // command_type
7251 LOAD_ATOM(ndrange_kernel);
7252 LOAD_ATOM(task);
7253 LOAD_ATOM(native_kernel);
7254 LOAD_ATOM(read_buffer);
7255 LOAD_ATOM(write_buffer);
7256 LOAD_ATOM(copy_buffer);
7257 LOAD_ATOM(read_image);
7258 LOAD_ATOM(write_image);
7259 LOAD_ATOM(copy_image);
7260 LOAD_ATOM(copy_image_to_buffer);
7261 LOAD_ATOM(copy_buffer_to_image);
7262 LOAD_ATOM(map_buffer);
7263 LOAD_ATOM(map_image);
7264 LOAD_ATOM(unmap_mem_object);
7265 LOAD_ATOM(marker);
7266 LOAD_ATOM(aquire_gl_objects);
7267 LOAD_ATOM(release_gl_objects);
7268 LOAD_ATOM(migreate_mem_objects);
7269 LOAD_ATOM(fill_buffer);
7270 LOAD_ATOM(fill_image);
7271
7272 // execution_status
7273 LOAD_ATOM(complete);
7274 LOAD_ATOM(running);
7275 LOAD_ATOM(submitted);
7276 LOAD_ATOM(queued);
7277
7278 // arguments
7279 LOAD_ATOM(region);
7280
7281 LOAD_ATOM(global);
7282 LOAD_ATOM(local);
7283 LOAD_ATOM(constant);
7284 LOAD_ATOM(private);
7285
7286 LOAD_ATOM(read_only);
7287 LOAD_ATOM(write_only);
7288 LOAD_ATOM(read_write);
7289 LOAD_ATOM(none);
7290
7291 LOAD_ATOM(none);
7292 LOAD_ATOM(const);
7293 LOAD_ATOM(restrict);
7294 LOAD_ATOM(volatile);
7295
7296 LOAD_ATOM(address_qualifier);
7297 LOAD_ATOM(access_qualifier);
7298 LOAD_ATOM(type_name);
7299 LOAD_ATOM(type_qualifier);
7300 LOAD_ATOM(name);
7301
7302 // Create resource types
7303 ecl_resource_init(env, &platform_r, "platform_t",
7304 sizeof(ecl_object_t),
7305 ecl_platform_dtor,
7306 ERL_NIF_RT_CREATE, &tried);
7307 ecl_resource_init(env, &device_r, "device_t",
7308 sizeof(ecl_object_t),
7309 ecl_device_dtor,
7310 ERL_NIF_RT_CREATE, &tried);
7311 ecl_resource_init(env, &context_r, "context_t",
7312 sizeof(ecl_context_t), // NOTE! specialized!
7313 ecl_context_dtor,
7314 ERL_NIF_RT_CREATE, &tried);
7315 ecl_resource_init(env, &command_queue_r, "command_queue_t",
7316 sizeof(ecl_object_t),
7317 ecl_queue_dtor,
7318 ERL_NIF_RT_CREATE, &tried);
7319 ecl_resource_init(env, &mem_r, "mem_t",
7320 sizeof(ecl_object_t),
7321 ecl_mem_dtor,
7322 ERL_NIF_RT_CREATE, &tried);
7323 ecl_resource_init(env, &sampler_r, "sampler_t",
7324 sizeof(ecl_object_t),
7325 ecl_sampler_dtor,
7326 ERL_NIF_RT_CREATE, &tried);
7327 ecl_resource_init(env, &program_r, "program_t",
7328 sizeof(ecl_object_t),
7329 ecl_program_dtor,
7330 ERL_NIF_RT_CREATE, &tried);
7331 ecl_resource_init(env, &kernel_r, "kernel_t",
7332 sizeof(ecl_kernel_t), // NOTE! specialized!
7333 ecl_kernel_dtor,
7334 ERL_NIF_RT_CREATE, &tried);
7335 ecl_resource_init(env, &event_r, "event_t",
7336 sizeof(ecl_event_t), // NOTE! specialized!
7337 ecl_event_dtor,
7338 ERL_NIF_RT_CREATE, &tried);
7339 *priv_data = ecl;
7340
7341 if (ecl_pre_load(env, ecl, &err) < 0) {
7342 CL_ERROR("ecl_pre_load: error code = %d", err);
7343 }
7344
7345 return 0;
7346 }
7347
7348 #ifdef WIN32
7349 #define RTLD_LAZY 0
7350 #define OPENCL_LIB "opencl.dll"
7351 typedef HMODULE DL_LIB_P;
dlsym(HMODULE Lib,const char * func)7352 void * dlsym(HMODULE Lib, const char *func) {
7353 return (void *) GetProcAddress(Lib, func);
7354 }
7355
dlopen(const CHAR * DLL,int unused)7356 HMODULE dlopen(const CHAR *DLL, int unused) {
7357 UNUSED(unused);
7358 return LoadLibrary(DLL);
7359 }
7360 #else
7361 typedef void * DL_LIB_P;
7362 # ifdef DARWIN
7363 # define OPENCL_LIB "/System/Library/Frameworks/OpenCL.framework/OpenCL"
7364 # else
7365 # define OPENCL_LIB "libOpenCL.so"
7366 # endif
7367 #endif
7368
ecl_load_dynfunctions(ecl_env_t * ecl)7369 static int ecl_load_dynfunctions(ecl_env_t* ecl)
7370 {
7371 DL_LIB_P handle;
7372 // if(ecl->icd_version < 12)
7373 // return;
7374 if ((handle = dlopen(OPENCL_LIB, RTLD_LAZY))) {
7375 int i = 0;
7376
7377 while(ecl_function[i].name != NULL) {
7378 if (ecl_function[i].func != NULL) {
7379 fprintf(stderr, "function %s already loaded\r\n",
7380 ecl_function[i].name);
7381 }
7382 else {
7383 ecl_function[i].func = dlsym(handle, ecl_function[i].name);
7384 if (ecl_function[i].func == NULL) {
7385 #ifdef DEBUG
7386 fprintf(stderr, "unabled to load function %s\r\n",
7387 ecl_function[i].name);
7388 #endif
7389 }
7390 else {
7391 #ifdef DEBUG
7392 fprintf(stderr, "load function %s/%d.%d @ %p\r\n",
7393 ecl_function[i].name,
7394 ecl_function[i].version / 10,
7395 ecl_function[i].version % 10,
7396 ecl_function[i].func);
7397 #endif
7398 }
7399 }
7400 i++;
7401 }
7402 if (ecl_function[i_clCreateImage].func == NULL)
7403 ecl_function[i_clCreateImage].func = e_clCreateImage;
7404
7405 // patch functions not present or deprecated functions when possible
7406 if (ecl->icd_version >= 12) {
7407 ecl_function[i_clCreateImage2D].func = eclCreateImage2D;
7408 ecl_function[i_clCreateImage3D].func = eclCreateImage3D;
7409 }
7410 return 0;
7411 }
7412 fprintf(stderr, "Failed open OpenCL dynamic library\r\n");
7413 return -1;
7414 }
7415
ecl_upgrade(ErlNifEnv * env,void ** priv_data,void ** old_priv_data,ERL_NIF_TERM load_info)7416 static int ecl_upgrade(ErlNifEnv* env, void** priv_data, void** old_priv_data,
7417 ERL_NIF_TERM load_info)
7418 {
7419 ErlNifResourceFlags tried;
7420 ecl_context_t* ctx;
7421 ecl_env_t* ecl = (ecl_env_t*) *old_priv_data;
7422 int sync_count;
7423 UNUSED(load_info);
7424
7425 ecl->ref_count++;
7426 DBG("ecl_upgrade: ecl=%p", ecl, ecl->ref_count);
7427
7428 // upgrade resource types
7429 ecl_resource_init(env, &platform_r, "platform_t",
7430 sizeof(ecl_object_t),
7431 ecl_platform_dtor,
7432 ERL_NIF_RT_CREATE|ERL_NIF_RT_TAKEOVER, &tried);
7433 ecl_resource_init(env, &device_r, "device_t",
7434 sizeof(ecl_object_t),
7435 ecl_device_dtor,
7436 ERL_NIF_RT_CREATE|ERL_NIF_RT_TAKEOVER, &tried);
7437
7438 ecl_resource_init(env, &command_queue_r, "command_queue_t",
7439 sizeof(ecl_object_t),
7440 ecl_queue_dtor,
7441 ERL_NIF_RT_CREATE|ERL_NIF_RT_TAKEOVER, &tried);
7442 ecl_resource_init(env, &mem_r, "mem_t",
7443 sizeof(ecl_object_t),
7444 ecl_mem_dtor,
7445 ERL_NIF_RT_CREATE|ERL_NIF_RT_TAKEOVER, &tried);
7446 ecl_resource_init(env, &sampler_r, "sampler_t",
7447 sizeof(ecl_object_t),
7448 ecl_sampler_dtor,
7449 ERL_NIF_RT_CREATE|ERL_NIF_RT_TAKEOVER, &tried);
7450 ecl_resource_init(env, &program_r, "program_t",
7451 sizeof(ecl_object_t),
7452 ecl_program_dtor,
7453 ERL_NIF_RT_CREATE|ERL_NIF_RT_TAKEOVER, &tried);
7454 ecl_resource_init(env, &kernel_r, "kernel_t",
7455 sizeof(ecl_kernel_t), // NOTE! specialized!
7456 ecl_kernel_dtor,
7457 ERL_NIF_RT_CREATE|ERL_NIF_RT_TAKEOVER, &tried);
7458 ecl_resource_init(env, &event_r, "event_t",
7459 sizeof(ecl_event_t), // NOTE! specialized!
7460 ecl_event_dtor,
7461 ERL_NIF_RT_CREATE|ERL_NIF_RT_TAKEOVER, &tried);
7462
7463 ecl_resource_init(env, &context_r, "context_t",
7464 sizeof(ecl_context_t), // NOTE! specialized!
7465 ecl_context_dtor,
7466 ERL_NIF_RT_CREATE|ERL_NIF_RT_TAKEOVER, &tried);
7467
7468 // Scan through all contexts and initiate upgrade & sync of the threads
7469 DBG("ecl_upgrade: upgrade and sync ecl=%p", ecl);
7470 DBG("ecl_upgrade: upgrade and sync ecl->context_list_lock=%p",
7471 ecl->context_list_lock);
7472 sync_count = 0;
7473 enif_rwlock_rwlock(ecl->context_list_lock);
7474 for (ctx = ecl->context_list; ctx != NULL; ctx = ctx->next) {
7475 ecl_message_t m;
7476 DBG("ecl_upgrade: ctx=%p", ctx);
7477 m.type = ECL_MESSAGE_UPGRADE;
7478 m.upgrade = ecl_context_main;
7479 DBG("ecl_upgrade: send upgrade func=%p to %p",
7480 ecl_context_main, ctx->thr);
7481 ecl_message_send(ctx->thr, &m);
7482
7483 m.type = ECL_MESSAGE_SYNC;
7484 DBG("ecl_upgrade: send sync to %p", ctx->thr);
7485 ecl_message_send(ctx->thr, &m);
7486 sync_count++;
7487 }
7488 enif_rwlock_rwunlock(ecl->context_list_lock);
7489
7490 while(sync_count) {
7491 ecl_message_t m;
7492 int r;
7493 if ((r = ecl_queue_get(&ecl->q, &m)) < 0)
7494 return -1;
7495 if (m.type != ECL_MESSAGE_SYNC_ACK)
7496 return -1;
7497 sync_count--;
7498 }
7499
7500 *priv_data = *old_priv_data;
7501 return 0;
7502 }
7503
ecl_unload(ErlNifEnv * env,void * priv_data)7504 static void ecl_unload(ErlNifEnv* env, void* priv_data)
7505 {
7506 ecl_env_t* ecl = priv_data;
7507 UNUSED(env);
7508
7509 ecl->ref_count--;
7510 DBG("ecl_unload: ecl=%p ref_count=%d", ecl, ecl->ref_count);
7511 if (ecl->ref_count == 0) {
7512 cl_uint i;
7513 cl_uint j;
7514
7515 for (i = 0; i < ecl->nplatforms; i++) {
7516 ecl_object_t* obj;
7517
7518 for (j = 0; j < ecl->platform[i].ndevices; j++) {
7519 obj = ecl->platform[i].o_device[j];
7520 enif_release_resource(obj);
7521 }
7522 enif_free(ecl->platform[i].o_device);
7523
7524 obj = ecl->platform[i].o_platform;
7525 enif_release_resource(obj);
7526 }
7527 enif_free(ecl->platform);
7528
7529 enif_rwlock_rwlock(ecl->ref_lock);
7530 lhash_delete(&ecl->ref);
7531 enif_rwlock_rwunlock(ecl->ref_lock);
7532
7533 enif_rwlock_destroy(ecl->ref_lock);
7534
7535 enif_rwlock_rwlock(ecl->context_list_lock);
7536 DBG("ecl->context_list = %p", ecl->context_list);
7537 enif_rwlock_rwunlock(ecl->context_list_lock);
7538
7539 enif_rwlock_destroy(ecl->context_list_lock);
7540
7541 enif_free(ecl);
7542 }
7543 }
7544
7545 /*
7546 #warning "testing only, REMOVE before release"
7547 #define ERL_NIF_INIT_BODY \
7548 DBG("erl_nif_init")
7549 */
7550
7551 ERL_NIF_INIT(cl, ecl_funcs,
7552 ecl_load, NULL,
7553 ecl_upgrade, ecl_unload)
7554