1 // runtime.cpp (Oclgrind)
2 // Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith,
3 // University of Bristol. All rights reserved.
4 //
5 // This program is provided under a three-clause BSD license. For full
6 // license terms please see the LICENSE file distributed with this
7 // source code.
8 
9 #include "config.h"
10 
11 #include <cassert>
12 #include <cmath>
13 #include <cstring>
14 #include <iostream>
15 #include <sstream>
16 
17 #include "async_queue.h"
18 #include "icd.h"
19 
20 #include "CL/cl_half.h"
21 #include "core/Context.h"
22 #include "core/Kernel.h"
23 #include "core/Memory.h"
24 #include "core/Program.h"
25 #include "core/Queue.h"
26 
27 using namespace std;
28 
29 #define DEFAULT_GLOBAL_MEM_SIZE (128 * 1048576)
30 #define DEFAULT_CONSTANT_MEM_SIZE (65536)
31 #define DEFAULT_LOCAL_MEM_SIZE (32768)
32 #define DEFAULT_MAX_WGSIZE (1024)
33 
34 #define PLATFORM_NAME "Oclgrind"
35 #define PLATFORM_VENDOR "Oclgrind"
36 #ifdef ENABLE_OPENCL_3
37 #define PLATFORM_VERSION "OpenCL 3.0 (Oclgrind " PACKAGE_VERSION ")"
38 #else
39 #define PLATFORM_VERSION "OpenCL 1.2 (Oclgrind " PACKAGE_VERSION ")"
40 #endif
41 #define PLATFORM_PROFILE "FULL_PROFILE"
42 #define PLATFORM_SUFFIX "oclg"
43 
44 #define DEVICE_NAME "Oclgrind Simulator"
45 #define DEVICE_VENDOR "Oclgrind"
46 #define DEVICE_VENDOR_ID 0x0042
47 #ifdef ENABLE_OPENCL_3
48 #define DEVICE_VERSION "OpenCL 3.0 (Oclgrind " PACKAGE_VERSION ")"
49 #else
50 #define DEVICE_VERSION "OpenCL 1.2 (Oclgrind " PACKAGE_VERSION ")"
51 #endif
52 #define DEVICE_LANG_VERSION "OpenCL C 1.2 (Oclgrind " PACKAGE_VERSION ")"
53 #define DRIVER_VERSION "Oclgrind " PACKAGE_VERSION
54 #define DEVICE_PROFILE "FULL_PROFILE"
55 #define DEVICE_CTS_VERSION "v0000-01-01-00"
56 #define DEVICE_SPIR_VERSIONS "1.2"
57 #define DEVICE_TYPE                                                            \
58   (CL_DEVICE_TYPE_CPU | CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_ACCELERATOR |      \
59    CL_DEVICE_TYPE_DEFAULT)
60 
61 namespace
62 {
63 #define CASE(X)                                                                \
64   case X:                                                                      \
65     return #X;
CLErrorToString(cl_int err)66 const char* CLErrorToString(cl_int err)
67 {
68   switch (err)
69   {
70     CASE(CL_SUCCESS)
71     CASE(CL_DEVICE_NOT_FOUND)
72     CASE(CL_DEVICE_NOT_AVAILABLE)
73     CASE(CL_COMPILER_NOT_AVAILABLE)
74     CASE(CL_MEM_OBJECT_ALLOCATION_FAILURE)
75     CASE(CL_OUT_OF_RESOURCES)
76     CASE(CL_OUT_OF_HOST_MEMORY)
77     CASE(CL_PROFILING_INFO_NOT_AVAILABLE)
78     CASE(CL_MEM_COPY_OVERLAP)
79     CASE(CL_IMAGE_FORMAT_MISMATCH)
80     CASE(CL_IMAGE_FORMAT_NOT_SUPPORTED)
81     CASE(CL_BUILD_PROGRAM_FAILURE)
82     CASE(CL_MAP_FAILURE)
83     CASE(CL_MISALIGNED_SUB_BUFFER_OFFSET)
84     CASE(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST)
85     CASE(CL_COMPILE_PROGRAM_FAILURE)
86     CASE(CL_LINKER_NOT_AVAILABLE)
87     CASE(CL_LINK_PROGRAM_FAILURE)
88     CASE(CL_DEVICE_PARTITION_FAILED)
89     CASE(CL_KERNEL_ARG_INFO_NOT_AVAILABLE)
90     CASE(CL_INVALID_VALUE)
91     CASE(CL_INVALID_DEVICE_TYPE)
92     CASE(CL_INVALID_PLATFORM)
93     CASE(CL_INVALID_DEVICE)
94     CASE(CL_INVALID_CONTEXT)
95     CASE(CL_INVALID_QUEUE_PROPERTIES)
96     CASE(CL_INVALID_COMMAND_QUEUE)
97     CASE(CL_INVALID_HOST_PTR)
98     CASE(CL_INVALID_MEM_OBJECT)
99     CASE(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR)
100     CASE(CL_INVALID_IMAGE_SIZE)
101     CASE(CL_INVALID_SAMPLER)
102     CASE(CL_INVALID_BINARY)
103     CASE(CL_INVALID_BUILD_OPTIONS)
104     CASE(CL_INVALID_PROGRAM)
105     CASE(CL_INVALID_PROGRAM_EXECUTABLE)
106     CASE(CL_INVALID_KERNEL_NAME)
107     CASE(CL_INVALID_KERNEL_DEFINITION)
108     CASE(CL_INVALID_KERNEL)
109     CASE(CL_INVALID_ARG_INDEX)
110     CASE(CL_INVALID_ARG_VALUE)
111     CASE(CL_INVALID_ARG_SIZE)
112     CASE(CL_INVALID_KERNEL_ARGS)
113     CASE(CL_INVALID_WORK_DIMENSION)
114     CASE(CL_INVALID_WORK_GROUP_SIZE)
115     CASE(CL_INVALID_WORK_ITEM_SIZE)
116     CASE(CL_INVALID_GLOBAL_OFFSET)
117     CASE(CL_INVALID_EVENT_WAIT_LIST)
118     CASE(CL_INVALID_EVENT)
119     CASE(CL_INVALID_OPERATION)
120     CASE(CL_INVALID_GL_OBJECT)
121     CASE(CL_INVALID_BUFFER_SIZE)
122     CASE(CL_INVALID_MIP_LEVEL)
123     CASE(CL_INVALID_GLOBAL_WORK_SIZE)
124     CASE(CL_INVALID_PROPERTY)
125     CASE(CL_INVALID_IMAGE_DESCRIPTOR)
126     CASE(CL_INVALID_COMPILER_OPTIONS)
127     CASE(CL_INVALID_LINKER_OPTIONS)
128     CASE(CL_INVALID_DEVICE_PARTITION_COUNT)
129   }
130   return "Unknown";
131 }
132 #undef CASE
133 
notifyAPIError(cl_context context,cl_int err,const char * function,string info="")134 void notifyAPIError(cl_context context, cl_int err, const char* function,
135                     string info = "")
136 {
137   // Remove leading underscore from function name if necessary
138   if (!strncmp(function, "_cl", 3))
139   {
140     function++;
141   }
142 
143   // Build error message
144   ostringstream oss;
145   oss << endl
146       << "Oclgrind - OpenCL runtime error detected" << endl
147       << "\tFunction: " << function << endl
148       << "\tError:    " << CLErrorToString(err) << endl;
149   if (!info.empty())
150   {
151     oss << "\t" << info << endl;
152   }
153   string error = oss.str();
154 
155   // Output message to stderr if required
156   if (oclgrind::checkEnv("OCLGRIND_CHECK_API"))
157   {
158     cerr << error << endl;
159   }
160 
161   // Fire context callback if set
162   if (context && context->notify)
163   {
164     context->notify(error.c_str(), context->data, 0, NULL);
165   }
166 }
167 
releaseCommand(oclgrind::Command * command)168 void releaseCommand(oclgrind::Command* command)
169 {
170   if (command)
171   {
172     asyncQueueRelease(command);
173 
174     // Release dependent commands
175     while (!command->execBefore.empty())
176     {
177       oclgrind::Command* cmd = command->execBefore.front();
178       command->execBefore.pop_front();
179       releaseCommand(cmd);
180     }
181 
182     delete command;
183   }
184 }
185 } // namespace
186 
187 namespace
188 {
189 // Name of the API function currently being executed
190 thread_local static std::stack<const char*> g_apiCallStack;
191 
192 class APICallEntry
193 {
194 public:
APICallEntry(const char * name)195   APICallEntry(const char* name)
196   {
197     g_apiCallStack.push(name);
198   }
~APICallEntry()199   ~APICallEntry()
200   {
201     g_apiCallStack.pop();
202   }
203 };
204 
205 #define REGISTER_API APICallEntry apiCallEntry(__func__)
206 } // namespace
207 
208 #define ReturnErrorInfo(context, err, info)                                    \
209   {                                                                            \
210     ostringstream oss;                                                         \
211     oss << info;                                                               \
212     notifyAPIError(context, err, g_apiCallStack.top(), oss.str());             \
213     return err;                                                                \
214   }
215 #define ReturnErrorArg(context, err, arg)                                      \
216   ReturnErrorInfo(context, err, "For argument '" #arg "'")
217 #define ReturnError(context, err) ReturnErrorInfo(context, err, "")
218 
219 #define SetErrorInfo(context, err, info)                                       \
220   if (err != CL_SUCCESS)                                                       \
221   {                                                                            \
222     ostringstream oss;                                                         \
223     oss << info;                                                               \
224     notifyAPIError(context, err, g_apiCallStack.top(), oss.str());             \
225   }                                                                            \
226   if (errcode_ret)                                                             \
227   {                                                                            \
228     *errcode_ret = err;                                                        \
229   }
230 #define SetErrorArg(context, err, arg)                                         \
231   SetErrorInfo(context, err, "For argument '" #arg "'")
232 #define SetError(context, err) SetErrorInfo(context, err, "")
233 
234 #define ParamValueSizeTooSmall                                                 \
235   "param_value_size is " << param_value_size << ", but result requires "       \
236                          << result_size << " bytes"
237 
238 static struct _cl_platform_id* m_platform = NULL;
239 static struct _cl_device_id* m_device = NULL;
240 
clIcdGetPlatformIDsKHR(cl_uint num_entries,cl_platform_id * platforms,cl_uint * num_platforms)241 CL_API_ENTRY cl_int CL_API_CALL clIcdGetPlatformIDsKHR(
242   cl_uint num_entries, cl_platform_id* platforms, cl_uint* num_platforms)
243 {
244   REGISTER_API;
245 
246   if (platforms && num_entries < 1)
247   {
248     ReturnError(NULL, CL_INVALID_VALUE);
249   }
250 
251   if (!m_platform)
252   {
253     m_platform = new _cl_platform_id;
254     m_platform->dispatch = m_dispatchTable;
255 
256     m_device = new _cl_device_id;
257     m_device->dispatch = m_dispatchTable;
258     m_device->globalMemSize = oclgrind::getEnvInt(
259       "OCLGRIND_GLOBAL_MEM_SIZE", DEFAULT_GLOBAL_MEM_SIZE, false);
260     m_device->constantMemSize = oclgrind::getEnvInt(
261       "OCLGRIND_CONSTANT_MEM_SIZE", DEFAULT_CONSTANT_MEM_SIZE, false);
262     m_device->localMemSize = oclgrind::getEnvInt("OCLGRIND_LOCAL_MEM_SIZE",
263                                                  DEFAULT_LOCAL_MEM_SIZE, false);
264     m_device->maxWGSize =
265       oclgrind::getEnvInt("OCLGRIND_MAX_WGSIZE", DEFAULT_MAX_WGSIZE, false);
266   }
267 
268   if (platforms)
269   {
270     platforms[0] = m_platform;
271   }
272 
273   if (num_platforms)
274   {
275     *num_platforms = 1;
276   }
277 
278   return CL_SUCCESS;
279 }
280 
281 ////////////////////////////////////
282 // OpenCL Runtime API Definitions //
283 ////////////////////////////////////
284 
285 CL_API_ENTRY void* CL_API_CALL
clGetExtensionFunctionAddress(const char * funcname)286 clGetExtensionFunctionAddress(const char* funcname) CL_API_SUFFIX__VERSION_1_2
287 {
288   REGISTER_API;
289 
290   if (strcmp(funcname, "clIcdGetPlatformIDsKHR") == 0)
291   {
292     return (void*)clIcdGetPlatformIDsKHR;
293   }
294   else if (strcmp(funcname, "clGetPlatformInfo") == 0)
295   {
296     return (void*)clGetPlatformInfo;
297   }
298   else
299   {
300     return NULL;
301   }
302 }
303 
304 CL_API_ENTRY cl_int CL_API_CALL
clGetPlatformIDs(cl_uint num_entries,cl_platform_id * platforms,cl_uint * num_platforms)305 clGetPlatformIDs(cl_uint num_entries, cl_platform_id* platforms,
306                  cl_uint* num_platforms) CL_API_SUFFIX__VERSION_1_0
307 {
308   REGISTER_API;
309 
310   return clIcdGetPlatformIDsKHR(num_entries, platforms, num_platforms);
311 }
312 
clGetPlatformInfo(cl_platform_id platform,cl_platform_info param_name,size_t param_value_size,void * param_value,size_t * param_value_size_ret)313 CL_API_ENTRY cl_int CL_API_CALL clGetPlatformInfo(
314   cl_platform_id platform, cl_platform_info param_name, size_t param_value_size,
315   void* param_value, size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
316 {
317   REGISTER_API;
318 
319   // All possible return types
320   union
321   {
322     cl_ulong clulong;
323   } result_data;
324   size_t result_size = 0;
325   const void* data = NULL;
326 
327   static constexpr char extensions[] = "cl_khr_icd";
328   static constexpr cl_version numeric_version = CL_MAKE_VERSION(3, 0, 0);
329   static constexpr cl_name_version extension_versions[] = {
330     {CL_MAKE_VERSION(1, 0, 0), "cl_khr_icd"},
331   };
332 
333   // Select platform info
334   switch (param_name)
335   {
336   case CL_PLATFORM_PROFILE:
337     data = PLATFORM_PROFILE;
338     result_size = strlen(static_cast<const char*>(data)) + 1;
339     break;
340   case CL_PLATFORM_VERSION:
341     data = PLATFORM_VERSION;
342     result_size = strlen(static_cast<const char*>(data)) + 1;
343     break;
344   case CL_PLATFORM_NAME:
345     data = PLATFORM_NAME;
346     result_size = strlen(static_cast<const char*>(data)) + 1;
347     break;
348   case CL_PLATFORM_VENDOR:
349     data = PLATFORM_VENDOR;
350     result_size = strlen(static_cast<const char*>(data)) + 1;
351     break;
352   case CL_PLATFORM_EXTENSIONS:
353     data = extensions;
354     result_size = strlen(static_cast<const char*>(data)) + 1;
355     break;
356   case CL_PLATFORM_ICD_SUFFIX_KHR:
357     data = PLATFORM_SUFFIX;
358     result_size = strlen(static_cast<const char*>(data)) + 1;
359     break;
360   case CL_PLATFORM_NUMERIC_VERSION:
361     result_size = sizeof(numeric_version);
362     data = &numeric_version;
363     break;
364   case CL_PLATFORM_EXTENSIONS_WITH_VERSION:
365     result_size = sizeof(extension_versions);
366     data = extension_versions;
367     break;
368   case CL_PLATFORM_HOST_TIMER_RESOLUTION:
369     result_size = sizeof(cl_ulong);
370     result_data.clulong = 0;
371     break;
372   default:
373     ReturnErrorArg(NULL, CL_INVALID_VALUE, param_name);
374   }
375 
376   // Compute size of result
377   if (param_value_size_ret)
378   {
379     *param_value_size_ret = result_size;
380   }
381 
382   // Return result
383   if (param_value)
384   {
385     // Check destination is large enough
386     if (param_value_size < result_size)
387     {
388       ReturnErrorInfo(NULL, CL_INVALID_VALUE, ParamValueSizeTooSmall);
389     }
390     else
391     {
392       if (data)
393         memcpy(param_value, data, result_size);
394       else
395         memcpy(param_value, &result_data, result_size);
396     }
397   }
398 
399   return CL_SUCCESS;
400 }
401 
clGetDeviceIDs(cl_platform_id platform,cl_device_type device_type,cl_uint num_entries,cl_device_id * devices,cl_uint * num_devices)402 CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDs(
403   cl_platform_id platform, cl_device_type device_type, cl_uint num_entries,
404   cl_device_id* devices, cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_0
405 {
406   REGISTER_API;
407 
408   // Check parameters
409   if (devices && num_entries < 1)
410   {
411     ReturnError(NULL, CL_INVALID_VALUE);
412   }
413 
414   if (!(device_type & DEVICE_TYPE))
415   {
416     ReturnError(NULL, CL_DEVICE_NOT_FOUND);
417   }
418 
419   if (devices)
420   {
421     *devices = m_device;
422   }
423 
424   if (num_devices)
425   {
426     *num_devices = 1;
427   }
428 
429   return CL_SUCCESS;
430 }
431 
clGetDeviceInfo(cl_device_id device,cl_device_info param_name,size_t param_value_size,void * param_value,size_t * param_value_size_ret)432 CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfo(
433   cl_device_id device, cl_device_info param_name, size_t param_value_size,
434   void* param_value, size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
435 {
436   REGISTER_API;
437 
438   // Check device is valid
439   if (device != m_device)
440   {
441     ReturnErrorArg(NULL, CL_INVALID_DEVICE, device);
442   }
443 
444   size_t dummy;
445   size_t& result_size = param_value_size_ret ? *param_value_size_ret : dummy;
446   // All possible return types
447   union
448   {
449     cl_uint cluint;
450     size_t sizet;
451     size_t sizet3[3];
452     cl_ulong clulong;
453     cl_bool clbool;
454     cl_device_id cldeviceid;
455     cl_device_type cldevicetype;
456     cl_device_fp_config devicefpconfig;
457     cl_device_mem_cache_type devicememcachetype;
458     cl_device_local_mem_type devicelocalmemtype;
459     cl_device_exec_capabilities cldevexeccap;
460     cl_command_queue_properties clcmdqprop;
461     cl_platform_id clplatid;
462     cl_version clversion;
463     cl_device_partition_property cldevpartprop;
464     cl_device_affinity_domain cldevaffdom;
465     cl_device_svm_capabilities svm;
466     cl_device_atomic_capabilities atomiccaps;
467     cl_device_device_enqueue_capabilities devenqcaps;
468   } result_data;
469   // The result is data in memory that needs copying
470   const void* data = 0;
471 
472   static constexpr char extensions[] = " cl_khr_spir"
473                                        " cl_khr_3d_image_writes"
474                                        " cl_khr_global_int32_base_atomics"
475                                        " cl_khr_global_int32_extended_atomics"
476                                        " cl_khr_local_int32_base_atomics"
477                                        " cl_khr_local_int32_extended_atomics"
478                                        " cl_khr_int64_base_atomics"
479                                        " cl_khr_int64_extended_atomics"
480                                        " cl_khr_byte_addressable_store"
481                                        " cl_khr_fp64";
482 
483   static constexpr cl_name_version extension_versions[] = {
484     {CL_MAKE_VERSION(1, 0, 0), "cl_khr_spir"},
485     {CL_MAKE_VERSION(1, 0, 0), "cl_khr_3d_image_writes"},
486     {CL_MAKE_VERSION(1, 0, 0), "cl_khr_global_int32_base_atomics"},
487     {CL_MAKE_VERSION(1, 0, 0), "cl_khr_global_int32_extended_atomics"},
488     {CL_MAKE_VERSION(1, 0, 0), "cl_khr_local_int32_base_atomics"},
489     {CL_MAKE_VERSION(1, 0, 0), "cl_khr_local_int32_extended_atomics"},
490     {CL_MAKE_VERSION(1, 0, 0), "cl_khr_int64_base_atomics"},
491     {CL_MAKE_VERSION(1, 0, 0), "cl_khr_int64_extended_atomics"},
492     {CL_MAKE_VERSION(1, 0, 0), "cl_khr_byte_addressable_store"},
493     {CL_MAKE_VERSION(1, 0, 0), "cl_khr_fp64"},
494   };
495 
496   static constexpr cl_name_version opencl_c_all_versions[] = {
497     {CL_MAKE_VERSION(1, 0, 0), "OpenCL C"},
498     {CL_MAKE_VERSION(1, 1, 0), "OpenCL C"},
499     {CL_MAKE_VERSION(1, 2, 0), "OpenCL C"},
500     {CL_MAKE_VERSION(3, 0, 0), "OpenCL C"},
501   };
502 
503   // TODO: Populate this
504   // static constexpr cl_name_version il_versions[] = {};
505 
506   // TODO: Populate this
507   // static constexpr cl_name_version built_in_kernel_versions[] = {};
508 
509   // TODO: Populate this
510   // static constexpr cl_name_version opencl_c_features[] = {};
511 
512   switch (param_name)
513   {
514   case CL_DEVICE_TYPE:
515     result_size = sizeof(cl_device_type);
516     result_data.cldevicetype = DEVICE_TYPE;
517     break;
518   case CL_DEVICE_VENDOR_ID:
519     result_size = sizeof(cl_uint);
520     result_data.cluint = DEVICE_VENDOR_ID;
521     break;
522   case CL_DEVICE_MAX_COMPUTE_UNITS:
523     result_size = sizeof(cl_uint);
524     result_data.cluint =
525       oclgrind::getEnvInt("OCLGRIND_COMPUTE_UNITS", 1, false);
526     break;
527   case CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS:
528     result_size = sizeof(cl_uint);
529     result_data.cluint = 3;
530     break;
531   case CL_DEVICE_MAX_WORK_GROUP_SIZE:
532     result_size = sizeof(size_t);
533     result_data.sizet = m_device->maxWGSize;
534     break;
535   case CL_DEVICE_MAX_WORK_ITEM_SIZES:
536     result_size = 3 * sizeof(size_t);
537     result_data.sizet3[0] = m_device->maxWGSize;
538     result_data.sizet3[1] = m_device->maxWGSize;
539     result_data.sizet3[2] = m_device->maxWGSize;
540     break;
541   case CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR:
542   case CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT:
543   case CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT:
544   case CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG:
545   case CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT:
546   case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE:
547     result_size = sizeof(cl_uint);
548     result_data.cluint = 1;
549     break;
550   case CL_DEVICE_MAX_CLOCK_FREQUENCY:
551     result_size = sizeof(cl_uint);
552     result_data.cluint = 1;
553     break;
554   case CL_DEVICE_ADDRESS_BITS:
555     result_size = sizeof(cl_uint);
556     result_data.cluint = sizeof(size_t) << 3;
557     break;
558   case CL_DEVICE_MAX_READ_IMAGE_ARGS:
559     result_size = sizeof(cl_uint);
560     result_data.cluint = 128;
561     break;
562   case CL_DEVICE_MAX_WRITE_IMAGE_ARGS:
563     result_size = sizeof(cl_uint);
564     result_data.cluint = 64;
565     break;
566   case CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS:
567     result_size = sizeof(cl_uint);
568     result_data.cluint = 64;
569     break;
570   case CL_DEVICE_MAX_MEM_ALLOC_SIZE:
571     result_size = sizeof(cl_ulong);
572     result_data.clulong = m_device->globalMemSize;
573     break;
574   case CL_DEVICE_IMAGE2D_MAX_WIDTH:
575   case CL_DEVICE_IMAGE2D_MAX_HEIGHT:
576     result_size = sizeof(size_t);
577     result_data.sizet = 8192;
578     break;
579   case CL_DEVICE_IMAGE3D_MAX_WIDTH:
580   case CL_DEVICE_IMAGE3D_MAX_DEPTH:
581   case CL_DEVICE_IMAGE3D_MAX_HEIGHT:
582     result_size = sizeof(size_t);
583     result_data.sizet = 2048;
584     break;
585   case CL_DEVICE_IMAGE_SUPPORT:
586     result_size = sizeof(cl_bool);
587     result_data.clbool = CL_TRUE;
588     break;
589   case CL_DEVICE_MAX_PARAMETER_SIZE:
590     result_size = sizeof(size_t);
591     result_data.sizet = 1024;
592     break;
593   case CL_DEVICE_MAX_SAMPLERS:
594     result_size = sizeof(cl_uint);
595     result_data.cluint = 16;
596     break;
597   case CL_DEVICE_IMAGE_PITCH_ALIGNMENT:
598     result_size = sizeof(cl_uint);
599     result_data.cluint = 0;
600     break;
601   case CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT:
602     result_size = sizeof(cl_uint);
603     result_data.cluint = 0;
604     break;
605   case CL_DEVICE_MAX_PIPE_ARGS:
606     result_size = sizeof(cl_uint);
607     result_data.cluint = 0;
608     break;
609   case CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS:
610     result_size = sizeof(cl_uint);
611     result_data.cluint = 0;
612     break;
613   case CL_DEVICE_PIPE_MAX_PACKET_SIZE:
614     result_size = sizeof(cl_uint);
615     result_data.cluint = 0;
616     break;
617   case CL_DEVICE_MEM_BASE_ADDR_ALIGN:
618     result_size = sizeof(cl_uint);
619     result_data.cluint = sizeof(cl_long16) << 3;
620     break;
621   case CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE:
622     result_size = sizeof(cl_uint);
623     result_data.cluint = 1;
624     break;
625   case CL_DEVICE_SINGLE_FP_CONFIG:
626     result_size = sizeof(cl_device_fp_config);
627     result_data.devicefpconfig =
628       CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN | CL_FP_DENORM;
629     break;
630   case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE:
631     result_size = sizeof(cl_device_mem_cache_type);
632     result_data.devicememcachetype = CL_NONE;
633     break;
634   case CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE:
635     result_size = sizeof(cl_uint);
636     result_data.cluint = 0;
637     break;
638   case CL_DEVICE_GLOBAL_MEM_CACHE_SIZE:
639     result_size = sizeof(cl_ulong);
640     result_data.clulong = 0;
641     break;
642   case CL_DEVICE_GLOBAL_MEM_SIZE:
643     result_size = sizeof(cl_ulong);
644     result_data.clulong = device->globalMemSize;
645     break;
646   case CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE:
647     result_size = sizeof(cl_ulong);
648     result_data.clulong = device->constantMemSize;
649     break;
650   case CL_DEVICE_MAX_CONSTANT_ARGS:
651     result_size = sizeof(cl_uint);
652     result_data.cluint = 1024;
653     break;
654   case CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE:
655     result_size = sizeof(size_t);
656     result_data.sizet = 64 * 1024;
657     break;
658   case CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE:
659     result_size = sizeof(size_t);
660     result_data.sizet = device->globalMemSize;
661     break;
662   case CL_DEVICE_LOCAL_MEM_TYPE:
663     result_size = sizeof(cl_device_local_mem_type);
664     result_data.devicelocalmemtype = CL_LOCAL;
665     break;
666   case CL_DEVICE_LOCAL_MEM_SIZE:
667     result_size = sizeof(cl_ulong);
668     result_data.clulong = device->localMemSize;
669     break;
670   case CL_DEVICE_ERROR_CORRECTION_SUPPORT:
671     result_size = sizeof(cl_bool);
672     result_data.clbool = CL_FALSE;
673     break;
674   case CL_DEVICE_PROFILING_TIMER_RESOLUTION:
675     result_size = sizeof(size_t);
676     result_data.sizet = 1000;
677     break;
678   case CL_DEVICE_ENDIAN_LITTLE:
679     result_size = sizeof(cl_bool);
680 #if IS_BIG_ENDIAN
681     result_data.clbool = CL_FALSE;
682 #else
683     result_data.clbool = CL_TRUE;
684 #endif
685     break;
686   case CL_DEVICE_AVAILABLE:
687     result_size = sizeof(cl_bool);
688     result_data.clbool = CL_TRUE;
689     break;
690   case CL_DEVICE_COMPILER_AVAILABLE:
691     result_size = sizeof(cl_bool);
692     result_data.clbool = CL_TRUE;
693     break;
694   case CL_DEVICE_EXECUTION_CAPABILITIES:
695     result_size = sizeof(cl_device_exec_capabilities);
696     result_data.cldevexeccap = CL_EXEC_KERNEL | CL_EXEC_NATIVE_KERNEL;
697     break;
698   case CL_DEVICE_QUEUE_ON_HOST_PROPERTIES:
699     result_size = sizeof(cl_command_queue_properties);
700     result_data.clcmdqprop =
701       CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE;
702     break;
703   case CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES:
704     result_size = sizeof(cl_command_queue_properties);
705     result_data.clcmdqprop = 0;
706     break;
707   case CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE:
708     result_size = sizeof(cl_uint);
709     result_data.cluint = 0;
710     break;
711   case CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE:
712     result_size = sizeof(cl_uint);
713     result_data.cluint = 0;
714     break;
715   case CL_DEVICE_MAX_ON_DEVICE_QUEUES:
716     result_size = sizeof(cl_uint);
717     result_data.cluint = 0;
718     break;
719   case CL_DEVICE_MAX_ON_DEVICE_EVENTS:
720     result_size = sizeof(cl_uint);
721     result_data.cluint = 0;
722     break;
723   case CL_DEVICE_NAME:
724     result_size = sizeof(DEVICE_NAME);
725     data = DEVICE_NAME;
726     break;
727   case CL_DEVICE_VENDOR:
728     result_size = sizeof(DEVICE_VENDOR);
729     data = DEVICE_VENDOR;
730     break;
731   case CL_DRIVER_VERSION:
732     result_size = sizeof(DRIVER_VERSION);
733     data = DRIVER_VERSION;
734     break;
735   case CL_DEVICE_PROFILE:
736     result_size = sizeof(DEVICE_PROFILE);
737     data = DEVICE_PROFILE;
738     break;
739   case CL_DEVICE_VERSION:
740     result_size = sizeof(DEVICE_VERSION);
741     data = DEVICE_VERSION;
742     break;
743   case CL_DEVICE_EXTENSIONS:
744     result_size = sizeof(extensions);
745     data = extensions;
746     break;
747   case CL_DEVICE_PLATFORM:
748     result_size = sizeof(cl_platform_id);
749     result_data.clplatid = m_platform;
750     break;
751   case CL_DEVICE_DOUBLE_FP_CONFIG:
752     result_size = sizeof(cl_device_fp_config);
753     result_data.devicefpconfig = CL_FP_FMA | CL_FP_ROUND_TO_NEAREST |
754                                  CL_FP_ROUND_TO_ZERO | CL_FP_ROUND_TO_INF |
755                                  CL_FP_INF_NAN | CL_FP_DENORM;
756     break;
757   case CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF:
758     result_size = sizeof(cl_uint);
759     result_data.cluint = 0;
760     break;
761   case CL_DEVICE_HOST_UNIFIED_MEMORY:
762     result_size = sizeof(cl_bool);
763     result_data.clbool = CL_FALSE;
764     break;
765   case CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR:
766   case CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT:
767   case CL_DEVICE_NATIVE_VECTOR_WIDTH_INT:
768   case CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG:
769   case CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT:
770   case CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE:
771     result_size = sizeof(cl_uint);
772     result_data.cluint = 1;
773     break;
774   case CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF:
775     result_size = sizeof(cl_uint);
776     result_data.cluint = 0;
777     break;
778   case CL_DEVICE_OPENCL_C_VERSION:
779     result_size = sizeof(DEVICE_LANG_VERSION);
780     data = DEVICE_LANG_VERSION;
781     break;
782   case CL_DEVICE_LINKER_AVAILABLE:
783     result_size = sizeof(cl_bool);
784     result_data.clbool = CL_TRUE;
785     break;
786   case CL_DEVICE_BUILT_IN_KERNELS:
787     result_size = 1;
788     data = "";
789     break;
790   case CL_DEVICE_IMAGE_MAX_BUFFER_SIZE:
791     result_size = sizeof(size_t);
792     result_data.sizet = 65536;
793     break;
794   case CL_DEVICE_IMAGE_MAX_ARRAY_SIZE:
795     result_size = sizeof(size_t);
796     result_data.sizet = 2048;
797     break;
798   case CL_DEVICE_PARENT_DEVICE:
799     result_size = sizeof(cl_device_id);
800     result_data.cldeviceid = NULL;
801     break;
802   case CL_DEVICE_PARTITION_MAX_SUB_DEVICES:
803     result_size = sizeof(cl_uint);
804     result_data.cluint = 0;
805     break;
806   case CL_DEVICE_PARTITION_PROPERTIES:
807   case CL_DEVICE_PARTITION_TYPE:
808     result_size = sizeof(cl_device_partition_property);
809     result_data.cldevpartprop = 0;
810     break;
811   case CL_DEVICE_PARTITION_AFFINITY_DOMAIN:
812     result_size = sizeof(cl_device_affinity_domain);
813     result_data.cldevaffdom = 0;
814     break;
815   case CL_DEVICE_REFERENCE_COUNT:
816     result_size = sizeof(cl_uint);
817     result_data.cluint = 1;
818     break;
819   case CL_DEVICE_PREFERRED_INTEROP_USER_SYNC:
820     result_size = sizeof(cl_bool);
821     result_data.clbool = CL_TRUE;
822     break;
823   case CL_DEVICE_PRINTF_BUFFER_SIZE:
824     result_size = sizeof(size_t);
825     result_data.sizet = 1024;
826     break;
827   case CL_DEVICE_SVM_CAPABILITIES:
828     result_size = sizeof(cl_device_svm_capabilities);
829     result_data.svm = 0;
830     break;
831   case CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT:
832     result_size = sizeof(cl_uint);
833     result_data.cluint = 0;
834     break;
835   case CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT:
836     result_size = sizeof(cl_uint);
837     result_data.cluint = 0;
838     break;
839   case CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT:
840     result_size = sizeof(cl_uint);
841     result_data.cluint = 0;
842     break;
843   case CL_DEVICE_MAX_NUM_SUB_GROUPS:
844     result_size = sizeof(cl_uint);
845     result_data.cluint = 0;
846     break;
847   case CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS:
848     result_size = sizeof(cl_bool);
849     result_data.clbool = CL_FALSE;
850     break;
851   case CL_DEVICE_SPIR_VERSIONS:
852     result_size = sizeof(DEVICE_SPIR_VERSIONS);
853     data = DEVICE_SPIR_VERSIONS;
854     break;
855   case CL_DEVICE_NUMERIC_VERSION:
856     result_size = sizeof(cl_version);
857     result_data.cluint = CL_MAKE_VERSION(3, 0, 0);
858     break;
859   case CL_DEVICE_EXTENSIONS_WITH_VERSION:
860     result_size = sizeof(extension_versions);
861     data = extension_versions;
862     break;
863   case CL_DEVICE_IL_VERSION:
864     result_size = 1;
865     data = "";
866     break;
867   case CL_DEVICE_ILS_WITH_VERSION:
868     // TODO: Enable when supported.
869     // result_size = sizeof(il_versions);
870     // data = il_versions;
871     result_size = 0;
872     break;
873   case CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION:
874     // TODO: Enable when supported.
875     // result_size = sizeof(built_in_kernel_versions);
876     // data = built_in_kernel_versions;
877     result_size = 0;
878     break;
879   case CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES:
880     result_size = sizeof(cl_device_atomic_capabilities);
881     result_data.atomiccaps =
882       CL_DEVICE_ATOMIC_ORDER_RELAXED | CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP;
883     break;
884   case CL_DEVICE_ATOMIC_FENCE_CAPABILITIES:
885     result_size = sizeof(cl_device_atomic_capabilities);
886     result_data.atomiccaps = CL_DEVICE_ATOMIC_ORDER_RELAXED |
887                              CL_DEVICE_ATOMIC_ORDER_ACQ_REL |
888                              CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP;
889     break;
890   case CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT:
891     result_size = sizeof(cl_bool);
892     result_data.clbool = CL_TRUE;
893     break;
894   case CL_DEVICE_OPENCL_C_ALL_VERSIONS:
895     result_size = sizeof(opencl_c_all_versions);
896     data = opencl_c_all_versions;
897     break;
898   case CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_MULTIPLE:
899     result_size = sizeof(size_t);
900     result_data.sizet = 1;
901     break;
902   case CL_DEVICE_WORK_GROUP_COLLECTIVE_FUNCTIONS_SUPPORT:
903     result_size = sizeof(cl_bool);
904     result_data.clbool = CL_FALSE;
905     break;
906   case CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT:
907     result_size = sizeof(cl_bool);
908     result_data.clbool = CL_FALSE;
909     break;
910   case CL_DEVICE_OPENCL_C_FEATURES:
911     // TODO: Enable when supported.
912     // result_size = sizeof(opencl_c_features);
913     // data = opencl_c_features;
914     result_size = 0;
915     break;
916   case CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES:
917     result_size = sizeof(cl_device_device_enqueue_capabilities);
918     result_data.devenqcaps = 0;
919     break;
920   case CL_DEVICE_PIPE_SUPPORT:
921     result_size = sizeof(cl_bool);
922     result_data.clbool = CL_FALSE;
923     break;
924   case CL_DEVICE_LATEST_CONFORMANCE_VERSION_PASSED:
925     result_size = sizeof(DEVICE_CTS_VERSION);
926     data = DEVICE_CTS_VERSION;
927     break;
928   default:
929     ReturnErrorArg(NULL, CL_INVALID_VALUE, param_name);
930   }
931 
932   if (param_value)
933   {
934     // Check destination is large enough
935     if (param_value_size < result_size)
936     {
937       ReturnErrorInfo(NULL, CL_INVALID_VALUE, ParamValueSizeTooSmall);
938     }
939     else
940     {
941       if (data)
942         memcpy(param_value, data, result_size);
943       else
944         memcpy(param_value, &result_data, result_size);
945     }
946   }
947 
948   return CL_SUCCESS;
949 }
950 
clCreateSubDevices(cl_device_id in_device,const cl_device_partition_property * properties,cl_uint num_entries,cl_device_id * out_devices,cl_uint * num_devices)951 CL_API_ENTRY cl_int CL_API_CALL clCreateSubDevices(
952   cl_device_id in_device, const cl_device_partition_property* properties,
953   cl_uint num_entries, cl_device_id* out_devices,
954   cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_2
955 {
956   REGISTER_API;
957 
958   ReturnErrorInfo(NULL, CL_INVALID_VALUE, "Not yet implemented");
959 }
960 
clRetainDevice(cl_device_id device)961 CL_API_ENTRY cl_int CL_API_CALL clRetainDevice(cl_device_id device)
962   CL_API_SUFFIX__VERSION_1_2
963 {
964   REGISTER_API;
965 
966   return CL_SUCCESS;
967 }
968 
clReleaseDevice(cl_device_id device)969 CL_API_ENTRY cl_int CL_API_CALL clReleaseDevice(cl_device_id device)
970   CL_API_SUFFIX__VERSION_1_2
971 {
972   REGISTER_API;
973 
974   return CL_SUCCESS;
975 }
976 
clCreateContext(const cl_context_properties * properties,cl_uint num_devices,const cl_device_id * devices,void (CL_CALLBACK * pfn_notify)(const char *,const void *,size_t,void *),void * user_data,cl_int * errcode_ret)977 CL_API_ENTRY cl_context CL_API_CALL clCreateContext(
978   const cl_context_properties* properties, cl_uint num_devices,
979   const cl_device_id* devices,
980   void(CL_CALLBACK* pfn_notify)(const char*, const void*, size_t, void*),
981   void* user_data, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0
982 {
983   REGISTER_API;
984 
985   // Check parameters
986   if (num_devices != 1)
987   {
988     SetErrorArg(NULL, CL_INVALID_VALUE, num_devices);
989     return NULL;
990   }
991   if (!devices)
992   {
993     SetErrorArg(NULL, CL_INVALID_VALUE, devices);
994     return NULL;
995   }
996   if (devices[0] != m_device)
997   {
998     SetError(NULL, CL_INVALID_DEVICE);
999     return NULL;
1000   }
1001   if (!pfn_notify && user_data)
1002   {
1003     SetErrorInfo(NULL, CL_INVALID_VALUE,
1004                  "pfn_notify NULL but user_data non-NULL");
1005     return NULL;
1006   }
1007 
1008   // Create context object
1009   cl_context context = new _cl_context;
1010   context->dispatch = m_dispatchTable;
1011   context->context = new oclgrind::Context();
1012   context->notify = pfn_notify;
1013   context->data = user_data;
1014   context->properties = NULL;
1015   context->szProperties = 0;
1016   context->refCount = 1;
1017 
1018   if (properties)
1019   {
1020     int num = 1;
1021     while (properties[num])
1022     {
1023       num++;
1024     }
1025     size_t sz = (num + 1) * sizeof(cl_context_properties);
1026     context->szProperties = sz;
1027     context->properties = (cl_context_properties*)malloc(sz);
1028     memcpy(context->properties, properties, sz);
1029   }
1030 
1031   SetError(NULL, CL_SUCCESS);
1032   return context;
1033 }
1034 
clCreateContextFromType(const cl_context_properties * properties,cl_device_type device_type,void (CL_CALLBACK * pfn_notify)(const char *,const void *,size_t,void *),void * user_data,cl_int * errcode_ret)1035 CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromType(
1036   const cl_context_properties* properties, cl_device_type device_type,
1037   void(CL_CALLBACK* pfn_notify)(const char*, const void*, size_t, void*),
1038   void* user_data, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0
1039 {
1040   REGISTER_API;
1041 
1042   // Check parameters
1043   if (!pfn_notify && user_data)
1044   {
1045     SetErrorInfo(NULL, CL_INVALID_VALUE,
1046                  "pfn_notify NULL but user_data non-NULL");
1047     return NULL;
1048   }
1049   if (!(device_type & DEVICE_TYPE))
1050   {
1051     SetErrorArg(NULL, CL_DEVICE_NOT_FOUND, device_type);
1052     return NULL;
1053   }
1054 
1055   // Create context object
1056   cl_context context = new _cl_context;
1057   context->dispatch = m_dispatchTable;
1058   context->context = new oclgrind::Context();
1059   context->notify = pfn_notify;
1060   context->data = user_data;
1061   context->properties = NULL;
1062   context->szProperties = 0;
1063   context->refCount = 1;
1064 
1065   if (properties)
1066   {
1067     int num = 0;
1068     while (properties[num])
1069     {
1070       num++;
1071     }
1072     size_t sz = (num + 1) * sizeof(cl_context_properties);
1073     context->szProperties = sz;
1074     context->properties = (cl_context_properties*)malloc(sz);
1075     memcpy(context->properties, properties, sz);
1076   }
1077 
1078   SetError(NULL, CL_SUCCESS);
1079   return context;
1080 }
1081 
clRetainContext(cl_context context)1082 CL_API_ENTRY cl_int CL_API_CALL clRetainContext(cl_context context)
1083   CL_API_SUFFIX__VERSION_1_0
1084 {
1085   REGISTER_API;
1086 
1087   if (!context)
1088   {
1089     ReturnErrorArg(NULL, CL_INVALID_CONTEXT, context);
1090   }
1091 
1092   context->refCount++;
1093 
1094   return CL_SUCCESS;
1095 }
1096 
clReleaseContext(cl_context context)1097 CL_API_ENTRY cl_int CL_API_CALL clReleaseContext(cl_context context)
1098   CL_API_SUFFIX__VERSION_1_0
1099 {
1100   REGISTER_API;
1101 
1102   if (!context)
1103   {
1104     ReturnErrorArg(NULL, CL_INVALID_CONTEXT, context);
1105   }
1106 
1107   if (--context->refCount == 0)
1108   {
1109     if (context->properties)
1110     {
1111       free(context->properties);
1112     }
1113 
1114     while (!context->callbacks.empty())
1115     {
1116       pair<void(CL_CALLBACK*)(cl_context, void*), void*> callback =
1117         context->callbacks.top();
1118       callback.first(context, callback.second);
1119       context->callbacks.pop();
1120     }
1121 
1122     delete context->context;
1123     delete context;
1124   }
1125 
1126   return CL_SUCCESS;
1127 }
1128 
clGetContextInfo(cl_context context,cl_context_info param_name,size_t param_value_size,void * param_value,size_t * param_value_size_ret)1129 CL_API_ENTRY cl_int CL_API_CALL clGetContextInfo(
1130   cl_context context, cl_context_info param_name, size_t param_value_size,
1131   void* param_value, size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
1132 {
1133   REGISTER_API;
1134 
1135   // Check context is valid
1136   if (!context)
1137   {
1138     ReturnErrorArg(NULL, CL_INVALID_CONTEXT, context);
1139   }
1140 
1141   size_t dummy = 0;
1142   size_t& result_size = param_value_size_ret ? *param_value_size_ret : dummy;
1143 
1144   union
1145   {
1146     cl_uint cluint;
1147     cl_device_id cldevid;
1148   } result_data;
1149   cl_context_properties* properties = NULL;
1150 
1151   switch (param_name)
1152   {
1153   case CL_CONTEXT_REFERENCE_COUNT:
1154     result_size = sizeof(cl_uint);
1155     result_data.cluint = context->refCount;
1156     break;
1157   case CL_CONTEXT_NUM_DEVICES:
1158     result_size = sizeof(cl_uint);
1159     result_data.cluint = 1;
1160     break;
1161   case CL_CONTEXT_DEVICES:
1162     result_size = sizeof(cl_device_id);
1163     result_data.cldevid = m_device;
1164     break;
1165   case CL_CONTEXT_PROPERTIES:
1166     result_size = context->szProperties;
1167     properties = context->properties;
1168     break;
1169   default:
1170     ReturnErrorArg(context, CL_INVALID_VALUE, param_name);
1171   }
1172 
1173   if (param_value)
1174   {
1175     // Check destination is large enough
1176     if (param_value_size < result_size)
1177     {
1178       ReturnErrorInfo(context, CL_INVALID_VALUE, ParamValueSizeTooSmall);
1179     }
1180     else
1181     {
1182       if (properties)
1183         memcpy(param_value, properties, result_size);
1184       else
1185         memcpy(param_value, &result_data, result_size);
1186     }
1187   }
1188 
1189   return CL_SUCCESS;
1190 }
1191 
1192 CL_API_ENTRY cl_command_queue CL_API_CALL
clCreateCommandQueue(cl_context context,cl_device_id device,cl_command_queue_properties properties,cl_int * errcode_ret)1193 clCreateCommandQueue(cl_context context, cl_device_id device,
1194                      cl_command_queue_properties properties,
1195                      cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0
1196 {
1197   REGISTER_API;
1198 
1199   // Check parameters
1200   if (!context)
1201   {
1202     SetErrorArg(NULL, CL_INVALID_CONTEXT, context);
1203     return NULL;
1204   }
1205   if (device != m_device)
1206   {
1207     SetErrorArg(context, CL_INVALID_DEVICE, device);
1208     return NULL;
1209   }
1210 
1211   // Create command-queue object
1212   bool out_of_order = properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
1213   cl_command_queue queue;
1214   queue = new _cl_command_queue;
1215   queue->queue = new oclgrind::Queue(context->context, out_of_order);
1216   queue->dispatch = m_dispatchTable;
1217   queue->properties = properties;
1218   queue->context = context;
1219   queue->refCount = 1;
1220 
1221   clRetainContext(context);
1222 
1223   SetError(context, CL_SUCCESS);
1224   return queue;
1225 }
1226 
clSetCommandQueueProperty(cl_command_queue command_queue,cl_command_queue_properties properties,cl_bool enable,cl_command_queue_properties * old_properties)1227 CL_API_ENTRY cl_int CL_API_CALL clSetCommandQueueProperty(
1228   cl_command_queue command_queue, cl_command_queue_properties properties,
1229   cl_bool enable, cl_command_queue_properties* old_properties)
1230 {
1231   REGISTER_API;
1232 
1233   return CL_SUCCESS;
1234 }
1235 
1236 CL_API_ENTRY cl_int CL_API_CALL
clRetainCommandQueue(cl_command_queue command_queue)1237 clRetainCommandQueue(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0
1238 {
1239   REGISTER_API;
1240 
1241   // Check parameters
1242   if (!command_queue)
1243   {
1244     ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue);
1245   }
1246 
1247   command_queue->refCount++;
1248 
1249   return CL_SUCCESS;
1250 }
1251 
1252 CL_API_ENTRY cl_int CL_API_CALL
clReleaseCommandQueue(cl_command_queue command_queue)1253 clReleaseCommandQueue(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0
1254 {
1255   REGISTER_API;
1256 
1257   if (!command_queue)
1258   {
1259     ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue);
1260   }
1261 
1262   if (--command_queue->refCount == 0)
1263   {
1264     // TODO: Retain/release queue from async thread
1265     // TODO: Spec states that this function performs an implicit flush,
1266     // so maybe we are OK to delete queue here?
1267     clFinish(command_queue);
1268     delete command_queue->queue;
1269     clReleaseContext(command_queue->context);
1270     delete command_queue;
1271   }
1272 
1273   return CL_SUCCESS;
1274 }
1275 
clGetCommandQueueInfo(cl_command_queue command_queue,cl_command_queue_info param_name,size_t param_value_size,void * param_value,size_t * param_value_size_ret)1276 CL_API_ENTRY cl_int CL_API_CALL clGetCommandQueueInfo(
1277   cl_command_queue command_queue, cl_command_queue_info param_name,
1278   size_t param_value_size, void* param_value,
1279   size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
1280 {
1281   REGISTER_API;
1282 
1283   // Check queue is valid
1284   if (!command_queue)
1285   {
1286     ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue);
1287   }
1288 
1289   size_t dummy = 0;
1290   size_t& result_size = param_value_size_ret ? *param_value_size_ret : dummy;
1291 
1292   union
1293   {
1294     cl_uint cluint;
1295     cl_context context;
1296     cl_device_id cldevid;
1297     cl_command_queue_properties properties;
1298     cl_command_queue queue;
1299   } result_data;
1300   const void* data = nullptr;
1301 
1302   switch (param_name)
1303   {
1304   case CL_QUEUE_CONTEXT:
1305     result_size = sizeof(cl_context);
1306     result_data.context = command_queue->context;
1307     break;
1308   case CL_QUEUE_DEVICE:
1309     result_size = sizeof(cl_device_id);
1310     result_data.cldevid = m_device;
1311     break;
1312   case CL_QUEUE_REFERENCE_COUNT:
1313     result_size = sizeof(cl_uint);
1314     result_data.cluint = command_queue->refCount;
1315     break;
1316   case CL_QUEUE_PROPERTIES:
1317     result_size = sizeof(cl_command_queue_properties);
1318     result_data.properties = command_queue->properties;
1319     break;
1320   case CL_QUEUE_PROPERTIES_ARRAY:
1321     result_size =
1322       command_queue->properties_array.size() * sizeof(cl_queue_properties);
1323     data = command_queue->properties_array.data();
1324     break;
1325   case CL_QUEUE_SIZE:
1326     ReturnErrorArg(command_queue->context, CL_INVALID_COMMAND_QUEUE,
1327                    param_name);
1328   case CL_QUEUE_DEVICE_DEFAULT:
1329     result_size = sizeof(cl_command_queue);
1330     result_data.queue = nullptr;
1331     break;
1332   default:
1333     ReturnErrorArg(command_queue->context, CL_INVALID_VALUE, param_name);
1334   }
1335 
1336   if (param_value)
1337   {
1338     // Check destination is large enough
1339     if (param_value_size < result_size)
1340     {
1341       ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE,
1342                       ParamValueSizeTooSmall);
1343     }
1344     else
1345     {
1346       if (data)
1347         memcpy(param_value, data, result_size);
1348       else
1349         memcpy(param_value, &result_data, result_size);
1350     }
1351   }
1352 
1353   return CL_SUCCESS;
1354 }
1355 
1356 namespace
1357 {
createBuffer(cl_context context,cl_mem_flags flags,size_t size,void * host_ptr,cl_int * errcode_ret)1358 cl_mem createBuffer(cl_context context, cl_mem_flags flags, size_t size,
1359                     void* host_ptr, cl_int* errcode_ret)
1360 {
1361   // Check parameters
1362   if (!context)
1363   {
1364     SetErrorArg(NULL, CL_INVALID_CONTEXT, context);
1365     return NULL;
1366   }
1367   if (size == 0)
1368   {
1369     SetErrorArg(context, CL_INVALID_BUFFER_SIZE, size);
1370     return NULL;
1371   }
1372   if ((host_ptr == NULL) ==
1373       ((flags & CL_MEM_COPY_HOST_PTR) || flags & CL_MEM_USE_HOST_PTR))
1374   {
1375     SetErrorInfo(context, CL_INVALID_HOST_PTR,
1376                  "host_ptr NULL but CL_MEM_{COPY,USE}_HOST_PTR used");
1377     return NULL;
1378   }
1379   if ((flags & CL_MEM_USE_HOST_PTR) &&
1380       (flags & (CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_HOST_PTR)))
1381   {
1382     SetErrorInfo(context, CL_INVALID_VALUE,
1383                  "CL_MEM_USE_HOST_PTR cannot be used with "
1384                  "CL_MEM_{COPY,ALLOC}_HOST_PTR");
1385     return NULL;
1386   }
1387 
1388   // Create memory object
1389   oclgrind::Memory* globalMemory = context->context->getGlobalMemory();
1390   cl_mem mem = new _cl_mem;
1391   mem->dispatch = m_dispatchTable;
1392   mem->context = context;
1393   mem->parent = NULL;
1394   mem->size = size;
1395   mem->offset = 0;
1396   mem->flags = flags;
1397   mem->isImage = false;
1398   mem->refCount = 1;
1399   if (flags & CL_MEM_USE_HOST_PTR)
1400   {
1401     mem->address = globalMemory->createHostBuffer(size, host_ptr, flags);
1402     mem->hostPtr = host_ptr;
1403   }
1404   else
1405   {
1406     mem->address = globalMemory->allocateBuffer(size, flags);
1407     mem->hostPtr = NULL;
1408   }
1409   if (!mem->address)
1410   {
1411     SetError(context, CL_MEM_OBJECT_ALLOCATION_FAILURE);
1412     delete mem;
1413     return NULL;
1414   }
1415   clRetainContext(context);
1416 
1417   if (flags & CL_MEM_COPY_HOST_PTR)
1418   {
1419     context->context->getGlobalMemory()->store((const unsigned char*)host_ptr,
1420                                                mem->address, size);
1421   }
1422 
1423   SetError(context, CL_SUCCESS);
1424   return mem;
1425 }
1426 } // namespace
1427 
1428 CL_API_ENTRY cl_mem CL_API_CALL
clCreateBuffer(cl_context context,cl_mem_flags flags,size_t size,void * host_ptr,cl_int * errcode_ret)1429 clCreateBuffer(cl_context context, cl_mem_flags flags, size_t size,
1430                void* host_ptr, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0
1431 {
1432   REGISTER_API;
1433 
1434   return createBuffer(context, flags, size, host_ptr, errcode_ret);
1435 }
1436 
clCreateBufferWithProperties(cl_context context,const cl_mem_properties * properties,cl_mem_flags flags,size_t size,void * host_ptr,cl_int * errcode_ret)1437 CL_API_ENTRY cl_mem CL_API_CALL clCreateBufferWithProperties(
1438   cl_context context, const cl_mem_properties* properties, cl_mem_flags flags,
1439   size_t size, void* host_ptr, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_3_0
1440 {
1441   REGISTER_API;
1442 
1443   // Check properties (none are supported)
1444   if (properties && properties[0] != 0)
1445   {
1446     SetErrorInfo(context, CL_INVALID_PROPERTY, "Unsupported property");
1447   }
1448 
1449   cl_mem buffer = createBuffer(context, flags, size, host_ptr, errcode_ret);
1450   if (buffer && properties)
1451   {
1452     buffer->properties.assign(properties, properties + 1);
1453   }
1454 
1455   return buffer;
1456 }
1457 
clCreateSubBuffer(cl_mem buffer,cl_mem_flags flags,cl_buffer_create_type buffer_create_type,const void * buffer_create_info,cl_int * errcode_ret)1458 CL_API_ENTRY cl_mem CL_API_CALL clCreateSubBuffer(
1459   cl_mem buffer, cl_mem_flags flags, cl_buffer_create_type buffer_create_type,
1460   const void* buffer_create_info,
1461   cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_1
1462 {
1463   REGISTER_API;
1464 
1465   // Check parameters
1466   if (!buffer)
1467   {
1468     SetErrorArg(NULL, CL_INVALID_MEM_OBJECT, buffer);
1469     return NULL;
1470   }
1471   if (buffer->parent)
1472   {
1473     SetErrorInfo(buffer->context, CL_INVALID_MEM_OBJECT,
1474                  "Parent buffer cannot be a sub-buffer");
1475     return NULL;
1476   }
1477   if (buffer_create_type != CL_BUFFER_CREATE_TYPE_REGION)
1478   {
1479     SetErrorArg(buffer->context, CL_INVALID_VALUE, buffer_create_type);
1480     return NULL;
1481   }
1482   if (!buffer_create_info)
1483   {
1484     SetErrorArg(buffer->context, CL_INVALID_VALUE, buffer_create_info);
1485     return NULL;
1486   }
1487 
1488   _cl_buffer_region region = *(_cl_buffer_region*)buffer_create_info;
1489   if (region.origin + region.size > buffer->size)
1490   {
1491     SetErrorInfo(buffer->context, CL_INVALID_VALUE,
1492                  "Region doesn't fit inside parent buffer");
1493     return NULL;
1494   }
1495   if (region.size == 0)
1496   {
1497     SetErrorInfo(buffer->context, CL_INVALID_VALUE, "Region size cannot be 0");
1498     return NULL;
1499   }
1500 
1501   // Inherit flags from parent where appropriate
1502   cl_mem_flags memFlags = 0;
1503   cl_mem_flags rwFlags =
1504     CL_MEM_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY;
1505   cl_mem_flags hostAccess =
1506     CL_MEM_HOST_NO_ACCESS | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY;
1507   cl_mem_flags hostPtr =
1508     CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR;
1509   if ((flags & rwFlags) == 0)
1510   {
1511     memFlags |= buffer->flags & rwFlags;
1512   }
1513   else
1514   {
1515     memFlags |= flags & rwFlags;
1516   }
1517   if ((flags & hostAccess) == 0)
1518   {
1519     memFlags |= buffer->flags & hostAccess;
1520   }
1521   else
1522   {
1523     memFlags |= flags & hostAccess;
1524   }
1525   memFlags |= buffer->flags & hostPtr;
1526 
1527   // Create memory object
1528   cl_mem mem = new _cl_mem;
1529   mem->dispatch = m_dispatchTable;
1530   mem->context = buffer->context;
1531   mem->parent = buffer;
1532   mem->size = region.size;
1533   mem->offset = region.origin;
1534   mem->isImage = false;
1535   mem->flags = memFlags;
1536   mem->hostPtr = (unsigned char*)buffer->hostPtr + region.origin;
1537   mem->refCount = 1;
1538   mem->address = buffer->address + region.origin;
1539   clRetainMemObject(buffer);
1540 
1541   SetError(buffer->context, CL_SUCCESS);
1542   return mem;
1543 }
1544 
1545 namespace
1546 {
1547 
1548 // Utility function for getting number of dimensions in image
getNumDimensions(cl_mem_object_type type)1549 size_t getNumDimensions(cl_mem_object_type type)
1550 {
1551   switch (type)
1552   {
1553   case CL_MEM_OBJECT_IMAGE1D:
1554   case CL_MEM_OBJECT_IMAGE1D_ARRAY:
1555   case CL_MEM_OBJECT_IMAGE1D_BUFFER:
1556     return 1;
1557   case CL_MEM_OBJECT_IMAGE2D:
1558   case CL_MEM_OBJECT_IMAGE2D_ARRAY:
1559     return 2;
1560   case CL_MEM_OBJECT_IMAGE3D:
1561     return 3;
1562   default:
1563     return 0;
1564   }
1565 }
1566 
1567 // Utility function for getting number of channels in an image
getNumChannels(const cl_image_format * format)1568 size_t getNumChannels(const cl_image_format* format)
1569 {
1570   switch (format->image_channel_order)
1571   {
1572   case CL_R:
1573   case CL_Rx:
1574   case CL_A:
1575   case CL_INTENSITY:
1576   case CL_LUMINANCE:
1577     return 1;
1578   case CL_RG:
1579   case CL_RGx:
1580   case CL_RA:
1581     return 2;
1582   case CL_RGB:
1583   case CL_RGBx:
1584     return 3;
1585   case CL_RGBA:
1586   case CL_ARGB:
1587   case CL_BGRA:
1588     return 4;
1589   default:
1590     return 0;
1591   }
1592 }
1593 
1594 // Utility function for computing an image format's pixel size (in bytes)
getPixelSize(const cl_image_format * format)1595 size_t getPixelSize(const cl_image_format* format)
1596 {
1597   // Get number of channels
1598   size_t numChannels = getNumChannels(format);
1599 
1600   // Get size of each pixel (in bytes)
1601   switch (format->image_channel_data_type)
1602   {
1603   case CL_SNORM_INT8:
1604   case CL_UNORM_INT8:
1605   case CL_SIGNED_INT8:
1606   case CL_UNSIGNED_INT8:
1607     return numChannels;
1608   case CL_SNORM_INT16:
1609   case CL_UNORM_INT16:
1610   case CL_SIGNED_INT16:
1611   case CL_UNSIGNED_INT16:
1612   case CL_HALF_FLOAT:
1613     return 2 * numChannels;
1614   case CL_SIGNED_INT32:
1615   case CL_UNSIGNED_INT32:
1616   case CL_FLOAT:
1617     return 4 * numChannels;
1618   case CL_UNORM_SHORT_565:
1619   case CL_UNORM_SHORT_555:
1620     return 2;
1621   case CL_UNORM_INT_101010:
1622     return 4;
1623   default:
1624     return 0;
1625   }
1626 }
1627 
isImageArray(cl_mem_object_type type)1628 bool isImageArray(cl_mem_object_type type)
1629 {
1630   if (type == CL_MEM_OBJECT_IMAGE1D_ARRAY ||
1631       type == CL_MEM_OBJECT_IMAGE2D_ARRAY)
1632   {
1633     return true;
1634   }
1635   return false;
1636 }
1637 
createImage(cl_context context,cl_mem_flags flags,const cl_image_format * image_format,const cl_image_desc * image_desc,void * host_ptr,cl_int * errcode_ret)1638 cl_mem createImage(cl_context context, cl_mem_flags flags,
1639                    const cl_image_format* image_format,
1640                    const cl_image_desc* image_desc, void* host_ptr,
1641                    cl_int* errcode_ret)
1642 {
1643   // Check parameters
1644   if (!context)
1645   {
1646     SetErrorArg(NULL, CL_INVALID_CONTEXT, context);
1647     return NULL;
1648   }
1649   if (!image_format)
1650   {
1651     SetErrorArg(context, CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, image_format);
1652     return NULL;
1653   }
1654   if (!image_desc)
1655   {
1656     SetErrorArg(context, CL_INVALID_IMAGE_DESCRIPTOR, image_desc);
1657     return NULL;
1658   }
1659 
1660   // Get size of each pixel (in bytes)
1661   size_t pixelSize = getPixelSize(image_format);
1662   if (!pixelSize)
1663   {
1664     SetErrorArg(context, CL_INVALID_VALUE, image_format);
1665     return NULL;
1666   }
1667 
1668   // Get image dimensions
1669   size_t dims = getNumDimensions(image_desc->image_type);
1670   size_t width = image_desc->image_width;
1671   size_t height = 1, depth = 1;
1672   size_t arraySize = 1;
1673   if (dims > 1)
1674   {
1675     height = image_desc->image_height;
1676   }
1677   if (dims > 2)
1678   {
1679     depth = image_desc->image_depth;
1680   }
1681   if (isImageArray(image_desc->image_type))
1682   {
1683     arraySize = image_desc->image_array_size;
1684   }
1685 
1686   // Calculate total size of image
1687   size_t size = width * height * depth * arraySize * pixelSize;
1688 
1689   cl_mem mem;
1690 
1691   if (image_desc->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER)
1692   {
1693     // Use existing buffer
1694     if (!image_desc->buffer)
1695     {
1696       SetErrorInfo(context, CL_INVALID_VALUE,
1697                    "image_desc->buffer cannot be NULL "
1698                    "when using CL_MEM_OBJECT_IMAGE1D_BUFFER");
1699       return NULL;
1700     }
1701     mem = image_desc->buffer;
1702     clRetainMemObject(image_desc->buffer);
1703   }
1704   else if (image_desc->image_type == CL_MEM_OBJECT_IMAGE2D &&
1705            image_desc->mem_object)
1706   {
1707     SetErrorInfo(context, CL_INVALID_OPERATION,
1708                  "Creating 2D images from buffers is not supported");
1709     return nullptr;
1710   }
1711   else
1712   {
1713     // Create buffer
1714     // TODO: Use pitches
1715     mem = createBuffer(context, flags, size, host_ptr, errcode_ret);
1716     if (!mem)
1717     {
1718       return NULL;
1719     }
1720   }
1721 
1722   // Create image object wrapper
1723   cl_image* image = new cl_image;
1724   *(cl_mem)image = *mem;
1725   image->isImage = true;
1726   image->format = *image_format;
1727   image->desc = *image_desc;
1728   image->desc.image_width = width;
1729   image->desc.image_height = height;
1730   image->desc.image_depth = depth;
1731   image->desc.image_array_size = arraySize;
1732   image->refCount = 1;
1733   if (image_desc->image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER)
1734   {
1735     delete mem;
1736   }
1737 
1738   SetError(context, CL_SUCCESS);
1739   return image;
1740 }
1741 } // namespace
1742 
clCreateImage(cl_context context,cl_mem_flags flags,const cl_image_format * image_format,const cl_image_desc * image_desc,void * host_ptr,cl_int * errcode_ret)1743 CL_API_ENTRY cl_mem CL_API_CALL clCreateImage(
1744   cl_context context, cl_mem_flags flags, const cl_image_format* image_format,
1745   const cl_image_desc* image_desc, void* host_ptr,
1746   cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2
1747 {
1748   REGISTER_API;
1749 
1750   return createImage(context, flags, image_format, image_desc, host_ptr,
1751                      errcode_ret);
1752 }
1753 
clCreateImageWithProperties(cl_context context,const cl_mem_properties * properties,cl_mem_flags flags,const cl_image_format * image_format,const cl_image_desc * image_desc,void * host_ptr,cl_int * errcode_ret)1754 CL_API_ENTRY cl_mem CL_API_CALL clCreateImageWithProperties(
1755   cl_context context, const cl_mem_properties* properties, cl_mem_flags flags,
1756   const cl_image_format* image_format, const cl_image_desc* image_desc,
1757   void* host_ptr, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_3_0
1758 {
1759   REGISTER_API;
1760 
1761   // Check properties (none are supported)
1762   if (properties && properties[0] != 0)
1763   {
1764     SetErrorInfo(context, CL_INVALID_PROPERTY, "Unsupported property");
1765   }
1766 
1767   cl_mem image = createImage(context, flags, image_format, image_desc, host_ptr,
1768                              errcode_ret);
1769   if (image && properties)
1770   {
1771     image->properties.assign(properties, properties + 1);
1772   }
1773 
1774   return image;
1775 }
1776 
clCreateImage2D(cl_context context,cl_mem_flags flags,const cl_image_format * image_format,size_t image_width,size_t image_height,size_t image_row_pitch,void * host_ptr,cl_int * errcode_ret)1777 CL_API_ENTRY cl_mem CL_API_CALL clCreateImage2D(
1778   cl_context context, cl_mem_flags flags, const cl_image_format* image_format,
1779   size_t image_width, size_t image_height, size_t image_row_pitch,
1780   void* host_ptr, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0
1781 {
1782   REGISTER_API;
1783 
1784   cl_image_desc desc = {CL_MEM_OBJECT_IMAGE2D,
1785                         image_width,
1786                         image_height,
1787                         1,
1788                         1,
1789                         image_row_pitch,
1790                         0,
1791                         0,
1792                         0,
1793                         {NULL}};
1794   return createImage(context, flags, image_format, &desc, host_ptr,
1795                      errcode_ret);
1796 }
1797 
clCreateImage3D(cl_context context,cl_mem_flags flags,const cl_image_format * image_format,size_t image_width,size_t image_height,size_t image_depth,size_t image_row_pitch,size_t image_slice_pitch,void * host_ptr,cl_int * errcode_ret)1798 CL_API_ENTRY cl_mem CL_API_CALL clCreateImage3D(
1799   cl_context context, cl_mem_flags flags, const cl_image_format* image_format,
1800   size_t image_width, size_t image_height, size_t image_depth,
1801   size_t image_row_pitch, size_t image_slice_pitch, void* host_ptr,
1802   cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0
1803 {
1804   REGISTER_API;
1805 
1806   cl_image_desc desc = {CL_MEM_OBJECT_IMAGE3D,
1807                         image_width,
1808                         image_height,
1809                         image_depth,
1810                         1,
1811                         image_row_pitch,
1812                         image_slice_pitch,
1813                         0,
1814                         0,
1815                         {NULL}};
1816   return createImage(context, flags, image_format, &desc, host_ptr,
1817                      errcode_ret);
1818 }
1819 
clRetainMemObject(cl_mem memobj)1820 CL_API_ENTRY cl_int CL_API_CALL clRetainMemObject(cl_mem memobj)
1821   CL_API_SUFFIX__VERSION_1_0
1822 {
1823   REGISTER_API;
1824 
1825   if (!memobj)
1826   {
1827     ReturnErrorArg(NULL, CL_INVALID_MEM_OBJECT, memobj);
1828   }
1829 
1830   memobj->refCount++;
1831   return CL_SUCCESS;
1832 }
1833 
clReleaseMemObject(cl_mem memobj)1834 CL_API_ENTRY cl_int CL_API_CALL clReleaseMemObject(cl_mem memobj)
1835   CL_API_SUFFIX__VERSION_1_0
1836 {
1837   REGISTER_API;
1838 
1839   if (!memobj)
1840   {
1841     ReturnErrorArg(NULL, CL_INVALID_MEM_OBJECT, memobj);
1842   }
1843 
1844   if (--memobj->refCount == 0)
1845   {
1846     if (memobj->isImage &&
1847         ((cl_image*)memobj)->desc.image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER)
1848     {
1849       clReleaseMemObject(((cl_image*)memobj)->desc.buffer);
1850     }
1851     else
1852     {
1853       if (memobj->parent)
1854       {
1855         clReleaseMemObject(memobj->parent);
1856       }
1857       else
1858       {
1859         memobj->context->context->getGlobalMemory()->deallocateBuffer(
1860           memobj->address);
1861         clReleaseContext(memobj->context);
1862       }
1863 
1864       while (!memobj->callbacks.empty())
1865       {
1866         pair<void(CL_CALLBACK*)(cl_mem, void*), void*> callback =
1867           memobj->callbacks.top();
1868         callback.first(memobj, callback.second);
1869         memobj->callbacks.pop();
1870       }
1871     }
1872 
1873     delete memobj;
1874   }
1875 
1876   return CL_SUCCESS;
1877 }
1878 
clGetSupportedImageFormats(cl_context context,cl_mem_flags flags,cl_mem_object_type image_type,cl_uint num_entries,cl_image_format * image_formats,cl_uint * num_image_formats)1879 CL_API_ENTRY cl_int CL_API_CALL clGetSupportedImageFormats(
1880   cl_context context, cl_mem_flags flags, cl_mem_object_type image_type,
1881   cl_uint num_entries, cl_image_format* image_formats,
1882   cl_uint* num_image_formats) CL_API_SUFFIX__VERSION_1_0
1883 {
1884   REGISTER_API;
1885 
1886   // Check parameters
1887   if (!context)
1888   {
1889     ReturnErrorArg(NULL, CL_INVALID_CONTEXT, context);
1890   }
1891   if (num_entries == 0 && image_formats)
1892   {
1893     ReturnErrorInfo(context, CL_INVALID_VALUE,
1894                     "num_entries should be >0 if image_formats non-NULL");
1895   }
1896 
1897   // TODO: Add support for packed image types
1898 
1899   // Channel orders
1900   const cl_channel_order ordersAll[] = {
1901     CL_R, CL_Rx, CL_A, CL_RG, CL_RGx, CL_RA, CL_RGBA,
1902   };
1903   const cl_channel_order ordersNormalized[] = {CL_INTENSITY, CL_LUMINANCE};
1904   const cl_channel_order ordersByte[] = {CL_ARGB, CL_BGRA};
1905   const cl_channel_order ordersPacked[] = {CL_RGB, CL_RGBx};
1906   const cl_channel_order* orders[] = {
1907     ordersAll, ordersNormalized, ordersByte //, ordersPacked
1908   };
1909   const size_t numOrders[] = {
1910     sizeof(ordersAll) / sizeof(cl_channel_order),
1911     sizeof(ordersNormalized) / sizeof(cl_channel_order),
1912     sizeof(ordersByte) / sizeof(cl_channel_order),
1913     sizeof(ordersPacked) / sizeof(cl_channel_order),
1914   };
1915 
1916   // Channel types
1917   const cl_channel_type typesAll[] = {
1918     CL_SNORM_INT8,     CL_SNORM_INT16,    CL_UNORM_INT8,   CL_UNORM_INT16,
1919     CL_SIGNED_INT8,    CL_SIGNED_INT16,   CL_SIGNED_INT32, CL_UNSIGNED_INT8,
1920     CL_UNSIGNED_INT16, CL_UNSIGNED_INT32, CL_FLOAT,        CL_HALF_FLOAT,
1921   };
1922   const cl_channel_type typesNormalized[] = {
1923     CL_SNORM_INT8,  CL_SNORM_INT16, CL_UNORM_INT8,
1924     CL_UNORM_INT16, CL_FLOAT,       CL_HALF_FLOAT,
1925   };
1926   const cl_channel_type typesByte[] = {
1927     CL_SNORM_INT8,
1928     CL_UNORM_INT8,
1929     CL_SIGNED_INT8,
1930     CL_UNSIGNED_INT8,
1931   };
1932   const cl_channel_type typesPacked[] = {CL_UNORM_SHORT_565, CL_UNORM_SHORT_555,
1933                                          CL_UNORM_INT_101010};
1934   const cl_channel_type* types[] = {
1935     typesAll, typesNormalized, typesByte //, typesPacked,
1936   };
1937   const size_t numTypes[] = {
1938     sizeof(typesAll) / sizeof(cl_channel_order),
1939     sizeof(typesNormalized) / sizeof(cl_channel_order),
1940     sizeof(typesByte) / sizeof(cl_channel_order),
1941     sizeof(typesPacked) / sizeof(cl_channel_order),
1942   };
1943 
1944   // Calculate total number of formats
1945   size_t numCatagories = sizeof(orders) / sizeof(cl_channel_order*);
1946   size_t numFormats = 0;
1947   for (size_t c = 0; c < numCatagories; c++)
1948   {
1949     numFormats += numOrders[c] * numTypes[c];
1950   }
1951   if (num_image_formats)
1952   {
1953     *num_image_formats = numFormats;
1954   }
1955 
1956   // Generate list of all valid order/type combinations
1957   if (image_formats)
1958   {
1959     unsigned i = 0;
1960     for (size_t c = 0; c < numCatagories; c++)
1961     {
1962       for (size_t o = 0; o < numOrders[c]; o++)
1963       {
1964         for (size_t t = 0; t < numTypes[c]; t++)
1965         {
1966           if (i >= num_entries)
1967           {
1968             return CL_SUCCESS;
1969           }
1970 
1971           cl_image_format format = {orders[c][o], types[c][t]};
1972           image_formats[i++] = format;
1973         }
1974       }
1975     }
1976   }
1977 
1978   return CL_SUCCESS;
1979 }
1980 
clGetMemObjectInfo(cl_mem memobj,cl_mem_info param_name,size_t param_value_size,void * param_value,size_t * param_value_size_ret)1981 CL_API_ENTRY cl_int CL_API_CALL clGetMemObjectInfo(
1982   cl_mem memobj, cl_mem_info param_name, size_t param_value_size,
1983   void* param_value, size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
1984 {
1985   REGISTER_API;
1986 
1987   // Check mem object is valid
1988   if (!memobj)
1989   {
1990     ReturnErrorArg(NULL, CL_INVALID_MEM_OBJECT, memobj);
1991   }
1992 
1993   size_t dummy = 0;
1994   size_t& result_size = param_value_size_ret ? *param_value_size_ret : dummy;
1995   union
1996   {
1997     cl_mem_object_type clmemobjty;
1998     cl_mem_flags clmemflags;
1999     cl_context context;
2000     cl_mem clmem;
2001     size_t sizet;
2002     cl_uint cluint;
2003     cl_bool clbool;
2004     void* ptr;
2005   } result_data;
2006   const void* data = nullptr;
2007 
2008   switch (param_name)
2009   {
2010   case CL_MEM_TYPE:
2011     result_size = sizeof(cl_mem_object_type);
2012     result_data.clmemobjty = memobj->isImage
2013                                ? ((cl_image*)memobj)->desc.image_type
2014                                : CL_MEM_OBJECT_BUFFER;
2015     break;
2016   case CL_MEM_FLAGS:
2017     result_size = sizeof(cl_mem_flags);
2018     result_data.clmemflags = memobj->flags;
2019     break;
2020   case CL_MEM_SIZE:
2021     result_size = sizeof(size_t);
2022     result_data.sizet = memobj->size;
2023     break;
2024   case CL_MEM_HOST_PTR:
2025     result_size = sizeof(void*);
2026     result_data.ptr = memobj->hostPtr;
2027     break;
2028   case CL_MEM_MAP_COUNT:
2029     result_size = sizeof(cl_uint);
2030     result_data.cluint = 0;
2031     break;
2032   case CL_MEM_REFERENCE_COUNT:
2033     result_size = sizeof(cl_uint);
2034     result_data.cluint = memobj->refCount;
2035     break;
2036   case CL_MEM_CONTEXT:
2037     result_size = sizeof(cl_context);
2038     result_data.context = memobj->context;
2039     break;
2040   case CL_MEM_ASSOCIATED_MEMOBJECT:
2041     result_size = sizeof(cl_mem);
2042     if (memobj->isImage)
2043     {
2044       result_data.clmem = static_cast<cl_image*>(memobj)->desc.mem_object;
2045     }
2046     else
2047     {
2048       result_data.clmem = memobj->parent;
2049     }
2050     break;
2051   case CL_MEM_OFFSET:
2052     result_size = sizeof(size_t);
2053     result_data.sizet = memobj->offset;
2054     break;
2055   case CL_MEM_USES_SVM_POINTER:
2056     result_size = sizeof(cl_bool);
2057     result_data.clbool = CL_FALSE;
2058     break;
2059   case CL_MEM_PROPERTIES:
2060     result_size = memobj->properties.size() * sizeof(cl_mem_properties);
2061     data = memobj->properties.data();
2062     break;
2063   default:
2064     ReturnErrorArg(memobj->context, CL_INVALID_VALUE, param_name);
2065   }
2066 
2067   if (param_value)
2068   {
2069     // Check destination is large enough
2070     if (param_value_size < result_size)
2071     {
2072       ReturnErrorInfo(memobj->context, CL_INVALID_VALUE,
2073                       ParamValueSizeTooSmall);
2074     }
2075     else
2076     {
2077       if (data)
2078         memcpy(param_value, data, result_size);
2079       else
2080         memcpy(param_value, &result_data, result_size);
2081     }
2082   }
2083 
2084   return CL_SUCCESS;
2085 }
2086 
clGetImageInfo(cl_mem image,cl_image_info param_name,size_t param_value_size,void * param_value,size_t * param_value_size_ret)2087 CL_API_ENTRY cl_int CL_API_CALL clGetImageInfo(
2088   cl_mem image, cl_image_info param_name, size_t param_value_size,
2089   void* param_value, size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
2090 {
2091   REGISTER_API;
2092 
2093   // Check mem object is valid
2094   if (!image)
2095   {
2096     ReturnErrorArg(NULL, CL_INVALID_MEM_OBJECT, image);
2097   }
2098   cl_image* img = (cl_image*)image;
2099 
2100   size_t dummy = 0;
2101   size_t& result_size = param_value_size_ret ? *param_value_size_ret : dummy;
2102   union
2103   {
2104     cl_image_format climgfmt;
2105     size_t sizet;
2106     cl_mem clmem;
2107     cl_uint cluint;
2108   } result_data;
2109 
2110   switch (param_name)
2111   {
2112   case CL_IMAGE_FORMAT:
2113     result_size = sizeof(cl_image_format);
2114     result_data.climgfmt = img->format;
2115     break;
2116   case CL_IMAGE_ELEMENT_SIZE:
2117     result_size = sizeof(size_t);
2118     result_data.sizet = getPixelSize(&img->format);
2119     break;
2120   case CL_IMAGE_ROW_PITCH:
2121     result_size = sizeof(size_t);
2122     result_data.sizet = img->desc.image_row_pitch;
2123     break;
2124   case CL_IMAGE_SLICE_PITCH:
2125     result_size = sizeof(size_t);
2126     result_data.sizet = img->desc.image_slice_pitch;
2127     break;
2128   case CL_IMAGE_WIDTH:
2129     result_size = sizeof(size_t);
2130     result_data.sizet = img->desc.image_width;
2131     break;
2132   case CL_IMAGE_HEIGHT:
2133     result_size = sizeof(size_t);
2134     result_data.sizet =
2135       getNumDimensions(img->desc.image_type) > 1 ? img->desc.image_height : 0;
2136     break;
2137   case CL_IMAGE_DEPTH:
2138     result_size = sizeof(size_t);
2139     result_data.sizet =
2140       getNumDimensions(img->desc.image_type) > 2 ? img->desc.image_depth : 0;
2141     break;
2142   case CL_IMAGE_ARRAY_SIZE:
2143     result_size = sizeof(size_t);
2144     result_data.sizet =
2145       isImageArray(img->desc.image_type) ? img->desc.image_array_size : 0;
2146     break;
2147   case CL_IMAGE_BUFFER:
2148     result_size = sizeof(cl_mem);
2149     result_data.clmem = img->desc.buffer;
2150     break;
2151   case CL_IMAGE_NUM_MIP_LEVELS:
2152     result_size = sizeof(cl_uint);
2153     result_data.cluint = 0;
2154     break;
2155   case CL_IMAGE_NUM_SAMPLES:
2156     result_size = sizeof(cl_uint);
2157     result_data.cluint = 0;
2158     break;
2159   default:
2160     ReturnErrorArg(image->context, CL_INVALID_VALUE, param_name);
2161   }
2162 
2163   if (param_value)
2164   {
2165     // Check destination is large enough
2166     if (param_value_size < result_size)
2167     {
2168       ReturnErrorInfo(image->context, CL_INVALID_VALUE, ParamValueSizeTooSmall);
2169     }
2170     else
2171     {
2172       memcpy(param_value, &result_data, result_size);
2173     }
2174   }
2175 
2176   return CL_SUCCESS;
2177 }
2178 
clSetMemObjectDestructorCallback(cl_mem memobj,void (CL_CALLBACK * pfn_notify)(cl_mem,void *),void * user_data)2179 CL_API_ENTRY cl_int CL_API_CALL clSetMemObjectDestructorCallback(
2180   cl_mem memobj, void(CL_CALLBACK* pfn_notify)(cl_mem, void*),
2181   void* user_data) CL_API_SUFFIX__VERSION_1_1
2182 {
2183   REGISTER_API;
2184 
2185   // Check parameters
2186   if (!memobj)
2187   {
2188     ReturnErrorArg(NULL, CL_INVALID_MEM_OBJECT, memobj);
2189   }
2190   if (!pfn_notify)
2191   {
2192     ReturnErrorArg(memobj->context, CL_INVALID_VALUE, pfn_notify);
2193   }
2194 
2195   memobj->callbacks.push(make_pair(pfn_notify, user_data));
2196 
2197   return CL_SUCCESS;
2198 }
2199 
2200 CL_API_ENTRY cl_sampler CL_API_CALL
clCreateSampler(cl_context context,cl_bool normalized_coords,cl_addressing_mode addressing_mode,cl_filter_mode filter_mode,cl_int * errcode_ret)2201 clCreateSampler(cl_context context, cl_bool normalized_coords,
2202                 cl_addressing_mode addressing_mode, cl_filter_mode filter_mode,
2203                 cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0
2204 {
2205   REGISTER_API;
2206 
2207   // Check parameters
2208   if (!context)
2209   {
2210     SetErrorArg(NULL, CL_INVALID_CONTEXT, context);
2211     return NULL;
2212   }
2213 
2214   // Create sampler bitfield
2215   uint32_t bitfield = 0;
2216 
2217   if (normalized_coords)
2218   {
2219     bitfield |= CLK_NORMALIZED_COORDS_TRUE;
2220   }
2221 
2222   switch (addressing_mode)
2223   {
2224   case CL_ADDRESS_NONE:
2225     break;
2226   case CL_ADDRESS_CLAMP_TO_EDGE:
2227     bitfield |= CLK_ADDRESS_CLAMP_TO_EDGE;
2228     break;
2229   case CL_ADDRESS_CLAMP:
2230     bitfield |= CLK_ADDRESS_CLAMP;
2231     break;
2232   case CL_ADDRESS_REPEAT:
2233     bitfield |= CLK_ADDRESS_REPEAT;
2234     break;
2235   case CL_ADDRESS_MIRRORED_REPEAT:
2236     bitfield |= CLK_ADDRESS_MIRRORED_REPEAT;
2237     break;
2238   default:
2239     SetErrorArg(context, CL_INVALID_VALUE, addressing_mode);
2240     return NULL;
2241   }
2242 
2243   switch (filter_mode)
2244   {
2245   case CL_FILTER_NEAREST:
2246     bitfield |= CLK_FILTER_NEAREST;
2247     break;
2248   case CL_FILTER_LINEAR:
2249     bitfield |= CLK_FILTER_LINEAR;
2250     break;
2251   default:
2252     SetErrorArg(context, CL_INVALID_VALUE, filter_mode);
2253     return NULL;
2254   }
2255 
2256   // Create sampler
2257   cl_sampler sampler = new _cl_sampler;
2258   sampler->dispatch = m_dispatchTable;
2259   sampler->context = context;
2260   sampler->normCoords = normalized_coords;
2261   sampler->addressMode = addressing_mode;
2262   sampler->filterMode = filter_mode;
2263   sampler->sampler = bitfield;
2264   sampler->refCount = 1;
2265 
2266   SetError(context, CL_SUCCESS);
2267   return sampler;
2268 }
2269 
clRetainSampler(cl_sampler sampler)2270 CL_API_ENTRY cl_int CL_API_CALL clRetainSampler(cl_sampler sampler)
2271   CL_API_SUFFIX__VERSION_1_0
2272 {
2273   REGISTER_API;
2274 
2275   if (!sampler)
2276   {
2277     ReturnErrorArg(NULL, CL_INVALID_SAMPLER, sampler);
2278   }
2279 
2280   sampler->refCount++;
2281 
2282   return CL_SUCCESS;
2283 }
2284 
clReleaseSampler(cl_sampler sampler)2285 CL_API_ENTRY cl_int CL_API_CALL clReleaseSampler(cl_sampler sampler)
2286   CL_API_SUFFIX__VERSION_1_0
2287 {
2288   REGISTER_API;
2289 
2290   if (!sampler)
2291   {
2292     ReturnErrorArg(NULL, CL_INVALID_SAMPLER, sampler);
2293   }
2294 
2295   if (--sampler->refCount == 0)
2296   {
2297     delete sampler;
2298   }
2299 
2300   return CL_SUCCESS;
2301 }
2302 
clGetSamplerInfo(cl_sampler sampler,cl_sampler_info param_name,size_t param_value_size,void * param_value,size_t * param_value_size_ret)2303 CL_API_ENTRY cl_int CL_API_CALL clGetSamplerInfo(
2304   cl_sampler sampler, cl_sampler_info param_name, size_t param_value_size,
2305   void* param_value, size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
2306 {
2307   REGISTER_API;
2308 
2309   // Check sampler is valid
2310   if (!sampler)
2311   {
2312     ReturnErrorArg(NULL, CL_INVALID_SAMPLER, sampler);
2313   }
2314 
2315   size_t dummy = 0;
2316   size_t& result_size = param_value_size_ret ? *param_value_size_ret : dummy;
2317   union
2318   {
2319     cl_uint cluint;
2320     cl_context clcontext;
2321     cl_bool clbool;
2322     cl_addressing_mode claddrmode;
2323     cl_filter_mode clfiltmode;
2324   } result_data;
2325   const void* data = nullptr;
2326 
2327   switch (param_name)
2328   {
2329   case CL_SAMPLER_REFERENCE_COUNT:
2330     result_size = sizeof(cl_uint);
2331     result_data.cluint = sampler->refCount;
2332     break;
2333   case CL_SAMPLER_CONTEXT:
2334     result_size = sizeof(cl_context);
2335     result_data.clcontext = sampler->context;
2336     break;
2337   case CL_SAMPLER_NORMALIZED_COORDS:
2338     result_size = sizeof(cl_bool);
2339     result_data.clbool = sampler->normCoords;
2340     break;
2341   case CL_SAMPLER_ADDRESSING_MODE:
2342     result_size = sizeof(cl_addressing_mode);
2343     result_data.claddrmode = sampler->addressMode;
2344     break;
2345   case CL_SAMPLER_FILTER_MODE:
2346     result_size = sizeof(cl_filter_mode);
2347     result_data.clfiltmode = sampler->filterMode;
2348     break;
2349   case CL_SAMPLER_PROPERTIES:
2350     result_size = sampler->properties.size() * sizeof(cl_sampler_properties);
2351     data = sampler->properties.data();
2352     break;
2353   default:
2354     ReturnErrorArg(sampler->context, CL_INVALID_VALUE, param_name);
2355   }
2356 
2357   if (param_value)
2358   {
2359     // Check destination is large enough
2360     if (param_value_size < result_size)
2361     {
2362       ReturnErrorInfo(sampler->context, CL_INVALID_VALUE,
2363                       ParamValueSizeTooSmall);
2364     }
2365     else
2366     {
2367       if (data)
2368         memcpy(param_value, data, result_size);
2369       else
2370         memcpy(param_value, &result_data, result_size);
2371     }
2372   }
2373 
2374   return CL_SUCCESS;
2375 }
2376 
clCreateProgramWithSource(cl_context context,cl_uint count,const char ** strings,const size_t * lengths,cl_int * errcode_ret)2377 CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithSource(
2378   cl_context context, cl_uint count, const char** strings,
2379   const size_t* lengths, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0
2380 {
2381   REGISTER_API;
2382 
2383   // Check parameters
2384   if (!context)
2385   {
2386     SetErrorArg(NULL, CL_INVALID_CONTEXT, context);
2387     return NULL;
2388   }
2389   if (count == 0)
2390   {
2391     SetErrorArg(context, CL_INVALID_VALUE, count);
2392     return NULL;
2393   }
2394   if (!strings || !strings[0])
2395   {
2396     SetErrorArg(context, CL_INVALID_VALUE, strings);
2397     return NULL;
2398   }
2399 
2400   // Concatenate sources into a single string
2401   std::string source;
2402   for (unsigned i = 0; i < count; i++)
2403   {
2404     size_t length = (lengths && lengths[i]) ? lengths[i] : strlen(strings[i]);
2405     source.append(strings[i], length);
2406   }
2407 
2408   // Create program object
2409   cl_program prog = new _cl_program;
2410   prog->dispatch = m_dispatchTable;
2411   prog->program = new oclgrind::Program(context->context, source);
2412   prog->context = context;
2413   prog->refCount = 1;
2414   if (!prog->program)
2415   {
2416     SetError(context, CL_OUT_OF_HOST_MEMORY);
2417     delete prog;
2418     return NULL;
2419   }
2420 
2421   clRetainContext(context);
2422 
2423   SetError(context, CL_SUCCESS);
2424   return prog;
2425 }
2426 
clCreateProgramWithBinary(cl_context context,cl_uint num_devices,const cl_device_id * device_list,const size_t * lengths,const unsigned char ** binaries,cl_int * binary_status,cl_int * errcode_ret)2427 CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBinary(
2428   cl_context context, cl_uint num_devices, const cl_device_id* device_list,
2429   const size_t* lengths, const unsigned char** binaries, cl_int* binary_status,
2430   cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0
2431 {
2432   REGISTER_API;
2433 
2434   // Check parameters
2435   if (!context)
2436   {
2437     SetErrorArg(NULL, CL_INVALID_CONTEXT, context);
2438     return NULL;
2439   }
2440   if (num_devices != 1 || !device_list)
2441   {
2442     SetErrorInfo(context, CL_INVALID_VALUE, "Invalid device list");
2443     return NULL;
2444   }
2445   if (!lengths)
2446   {
2447     SetErrorArg(context, CL_INVALID_VALUE, lengths);
2448     return NULL;
2449   }
2450   if (!binaries)
2451   {
2452     SetErrorArg(context, CL_INVALID_VALUE, binaries);
2453     return NULL;
2454   }
2455   if (device_list[0] != m_device)
2456   {
2457     SetErrorArg(context, CL_INVALID_DEVICE, device_list);
2458     return NULL;
2459   }
2460 
2461   // Create program object
2462   cl_program prog = new _cl_program;
2463   prog->dispatch = m_dispatchTable;
2464   prog->program = oclgrind::Program::createFromBitcode(context->context,
2465                                                        binaries[0], lengths[0]);
2466   prog->context = context;
2467   prog->refCount = 1;
2468   if (!prog->program)
2469   {
2470     SetError(context, CL_INVALID_BINARY);
2471     if (binary_status)
2472     {
2473       binary_status[0] = CL_INVALID_BINARY;
2474     }
2475     delete prog;
2476     return NULL;
2477   }
2478   if (binary_status)
2479   {
2480     binary_status[0] = CL_SUCCESS;
2481   }
2482 
2483   clRetainContext(context);
2484 
2485   SetError(context, CL_SUCCESS);
2486   return prog;
2487 }
2488 
clCreateProgramWithBuiltInKernels(cl_context context,cl_uint num_devices,const cl_device_id * device_list,const char * kernel_names,cl_int * errcode_ret)2489 CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBuiltInKernels(
2490   cl_context context, cl_uint num_devices, const cl_device_id* device_list,
2491   const char* kernel_names, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2
2492 {
2493   REGISTER_API;
2494 
2495   if (!context)
2496   {
2497     SetError(NULL, CL_INVALID_CONTEXT);
2498     return NULL;
2499   }
2500 
2501   SetErrorInfo(context, CL_INVALID_VALUE, "No built-in kernels available");
2502   return NULL;
2503 }
2504 
clRetainProgram(cl_program program)2505 CL_API_ENTRY cl_int CL_API_CALL clRetainProgram(cl_program program)
2506   CL_API_SUFFIX__VERSION_1_0
2507 {
2508   REGISTER_API;
2509 
2510   if (!program)
2511   {
2512     ReturnErrorArg(NULL, CL_INVALID_PROGRAM, program);
2513   }
2514 
2515   program->refCount++;
2516   return CL_SUCCESS;
2517 }
2518 
clReleaseProgram(cl_program program)2519 CL_API_ENTRY cl_int CL_API_CALL clReleaseProgram(cl_program program)
2520   CL_API_SUFFIX__VERSION_1_0
2521 {
2522   REGISTER_API;
2523 
2524   if (!program)
2525   {
2526     ReturnErrorArg(NULL, CL_INVALID_PROGRAM, program);
2527   }
2528 
2529   if (--program->refCount == 0)
2530   {
2531     delete program->program;
2532     clReleaseContext(program->context);
2533     delete program;
2534   }
2535 
2536   return CL_SUCCESS;
2537 }
2538 
clBuildProgram(cl_program program,cl_uint num_devices,const cl_device_id * device_list,const char * options,void (CL_CALLBACK * pfn_notify)(cl_program,void *),void * user_data)2539 CL_API_ENTRY cl_int CL_API_CALL clBuildProgram(
2540   cl_program program, cl_uint num_devices, const cl_device_id* device_list,
2541   const char* options, void(CL_CALLBACK* pfn_notify)(cl_program, void*),
2542   void* user_data) CL_API_SUFFIX__VERSION_1_0
2543 {
2544   REGISTER_API;
2545 
2546   // Check parameters
2547   if (!program || !program->program)
2548   {
2549     ReturnErrorArg(NULL, CL_INVALID_PROGRAM, program);
2550   }
2551   if (num_devices > 0 && !device_list)
2552   {
2553     ReturnErrorInfo(program->context, CL_INVALID_VALUE,
2554                     "num_devices >0 but device_list is NULL");
2555   }
2556   if (num_devices == 0 && device_list)
2557   {
2558     ReturnErrorInfo(program->context, CL_INVALID_VALUE,
2559                     "num_devices == 0 but device_list non-NULL");
2560   }
2561   if (!pfn_notify && user_data)
2562   {
2563     ReturnErrorInfo(program->context, CL_INVALID_VALUE,
2564                     "pfn_notify NULL but user_data non-NULL");
2565   }
2566   if (device_list && !device_list[0])
2567   {
2568     ReturnErrorArg(program->context, CL_INVALID_DEVICE, device);
2569   }
2570 
2571   // Build program
2572   bool success = program->program->build(oclgrind::Program::BUILD, options);
2573 
2574   // Fire callback
2575   if (pfn_notify)
2576   {
2577     pfn_notify(program, user_data);
2578   }
2579 
2580   if (!success)
2581   {
2582     ReturnError(program->context, CL_BUILD_PROGRAM_FAILURE);
2583   }
2584 
2585   return CL_SUCCESS;
2586 }
2587 
clUnloadCompiler(void)2588 CL_API_ENTRY cl_int CL_API_CALL clUnloadCompiler(void)
2589   CL_API_SUFFIX__VERSION_1_0
2590 {
2591   REGISTER_API;
2592 
2593   return CL_SUCCESS;
2594 }
2595 
clCompileProgram(cl_program program,cl_uint num_devices,const cl_device_id * device_list,const char * options,cl_uint num_input_headers,const cl_program * input_headers,const char ** header_include_names,void (CL_CALLBACK * pfn_notify)(cl_program,void *),void * user_data)2596 CL_API_ENTRY cl_int CL_API_CALL clCompileProgram(
2597   cl_program program, cl_uint num_devices, const cl_device_id* device_list,
2598   const char* options, cl_uint num_input_headers,
2599   const cl_program* input_headers, const char** header_include_names,
2600   void(CL_CALLBACK* pfn_notify)(cl_program, void*),
2601   void* user_data) CL_API_SUFFIX__VERSION_1_2
2602 {
2603   REGISTER_API;
2604 
2605   // Check parameters
2606   if (!program)
2607   {
2608     ReturnErrorArg(NULL, CL_INVALID_PROGRAM, program);
2609   }
2610   if (num_devices > 0 && !device_list)
2611   {
2612     ReturnErrorInfo(program->context, CL_INVALID_VALUE,
2613                     "num_devices >0 but device_list is NULL");
2614   }
2615   if (num_devices == 0 && device_list)
2616   {
2617     ReturnErrorInfo(program->context, CL_INVALID_VALUE,
2618                     "num_devices == 0 but device_list non-NULL");
2619   }
2620   if (!pfn_notify && user_data)
2621   {
2622     ReturnErrorInfo(program->context, CL_INVALID_VALUE,
2623                     "pfn_notify NULL but user_data non-NULL");
2624   }
2625   if (device_list && !device_list[0])
2626   {
2627     ReturnErrorArg(program->context, CL_INVALID_DEVICE, device);
2628   }
2629 
2630   // Prepare headers
2631   list<oclgrind::Program::Header> headers;
2632   for (unsigned i = 0; i < num_input_headers; i++)
2633   {
2634     headers.push_back(
2635       make_pair(header_include_names[i], input_headers[i]->program));
2636   }
2637 
2638   // Build program
2639   if (!program->program->build(oclgrind::Program::COMPILE, options, headers))
2640   {
2641     ReturnError(program->context, CL_BUILD_PROGRAM_FAILURE);
2642   }
2643 
2644   // Fire callback
2645   if (pfn_notify)
2646   {
2647     pfn_notify(program, user_data);
2648   }
2649 
2650   return CL_SUCCESS;
2651 }
2652 
2653 CL_API_ENTRY cl_program CL_API_CALL
clLinkProgram(cl_context context,cl_uint num_devices,const cl_device_id * device_list,const char * options,cl_uint num_input_programs,const cl_program * input_programs,void (CL_CALLBACK * pfn_notify)(cl_program,void *),void * user_data,cl_int * errcode_ret)2654 clLinkProgram(cl_context context, cl_uint num_devices,
2655               const cl_device_id* device_list, const char* options,
2656               cl_uint num_input_programs, const cl_program* input_programs,
2657               void(CL_CALLBACK* pfn_notify)(cl_program, void*), void* user_data,
2658               cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2
2659 {
2660   REGISTER_API;
2661 
2662   // Check parameters
2663   if (!context)
2664   {
2665     SetErrorArg(NULL, CL_INVALID_CONTEXT, context);
2666     return NULL;
2667   }
2668   if (num_devices > 0 && !device_list)
2669   {
2670     SetErrorInfo(context, CL_INVALID_VALUE,
2671                  "num_devices >0 but device_list is NULL");
2672     return NULL;
2673   }
2674   if (num_devices == 0 && device_list)
2675   {
2676     SetErrorInfo(context, CL_INVALID_VALUE,
2677                  "num_devices == 0 but device_list non-NULL");
2678     return NULL;
2679   }
2680   if (!pfn_notify && user_data)
2681   {
2682     SetErrorInfo(context, CL_INVALID_VALUE,
2683                  "pfn_notify NULL but user_data non-NULL");
2684     return NULL;
2685   }
2686   if (device_list && !device_list[0])
2687   {
2688     SetErrorArg(context, CL_INVALID_DEVICE, device_list);
2689     return NULL;
2690   }
2691 
2692   // Prepare programs
2693   list<const oclgrind::Program*> programs;
2694   for (unsigned i = 0; i < num_input_programs; i++)
2695   {
2696     programs.push_back(input_programs[i]->program);
2697   }
2698 
2699   // Create program object
2700   cl_program prog = new _cl_program;
2701   prog->dispatch = m_dispatchTable;
2702   prog->program =
2703     oclgrind::Program::createFromPrograms(context->context, programs, options);
2704   prog->context = context;
2705   prog->refCount = 1;
2706   if (!prog->program)
2707   {
2708     SetError(context, CL_INVALID_BINARY);
2709     delete prog;
2710     return NULL;
2711   }
2712 
2713   // Fire callback
2714   if (pfn_notify)
2715   {
2716     pfn_notify(prog, user_data);
2717   }
2718 
2719   clRetainContext(context);
2720 
2721   SetError(context, CL_SUCCESS);
2722   return prog;
2723 }
2724 
2725 CL_API_ENTRY cl_int CL_API_CALL
clUnloadPlatformCompiler(cl_platform_id platform)2726 clUnloadPlatformCompiler(cl_platform_id platform) CL_API_SUFFIX__VERSION_1_2
2727 {
2728   return CL_SUCCESS;
2729 }
2730 
clGetProgramInfo(cl_program program,cl_program_info param_name,size_t param_value_size,void * param_value,size_t * param_value_size_ret)2731 CL_API_ENTRY cl_int CL_API_CALL clGetProgramInfo(
2732   cl_program program, cl_program_info param_name, size_t param_value_size,
2733   void* param_value, size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
2734 {
2735   REGISTER_API;
2736 
2737   // Check program is valid
2738   if (!program)
2739   {
2740     ReturnErrorArg(NULL, CL_INVALID_PROGRAM, program);
2741   }
2742   if ((param_name == CL_PROGRAM_NUM_KERNELS ||
2743        param_name == CL_PROGRAM_KERNEL_NAMES) &&
2744       program->program->getBuildStatus() != CL_BUILD_SUCCESS)
2745   {
2746     ReturnErrorInfo(program->context, CL_INVALID_PROGRAM_EXECUTABLE,
2747                     "Program not successfully built");
2748   }
2749 
2750   size_t dummy;
2751   size_t& result_size = param_value_size_ret ? *param_value_size_ret : dummy;
2752   union
2753   {
2754     cl_uint cluint;
2755     cl_device_id device;
2756     cl_context context;
2757     size_t sizet;
2758     cl_bool clbool;
2759   } result_data;
2760   const char* str = 0;
2761   string kernelNames;
2762 
2763   switch (param_name)
2764   {
2765   case CL_PROGRAM_REFERENCE_COUNT:
2766     result_size = sizeof(cl_uint);
2767     result_data.cluint = program->refCount;
2768     break;
2769   case CL_PROGRAM_CONTEXT:
2770     result_size = sizeof(cl_context);
2771     result_data.context = program->context;
2772     break;
2773   case CL_PROGRAM_NUM_DEVICES:
2774     result_size = sizeof(cl_uint);
2775     result_data.cluint = 1;
2776     break;
2777   case CL_PROGRAM_DEVICES:
2778     result_size = sizeof(cl_device_id);
2779     result_data.device = m_device;
2780     break;
2781   case CL_PROGRAM_SOURCE:
2782     str = program->program->getSource().c_str();
2783     result_size = strlen(str) + 1;
2784     break;
2785   case CL_PROGRAM_IL:
2786     result_size = 0;
2787     break;
2788   case CL_PROGRAM_BINARY_SIZES:
2789     result_size = sizeof(size_t);
2790     result_data.sizet = program->program->getBinarySize();
2791     break;
2792   case CL_PROGRAM_BINARIES:
2793     result_size = sizeof(unsigned char*);
2794     break;
2795   case CL_PROGRAM_NUM_KERNELS:
2796     result_size = sizeof(size_t);
2797     result_data.sizet = program->program->getNumKernels();
2798     break;
2799   case CL_PROGRAM_KERNEL_NAMES:
2800   {
2801     list<string> names = program->program->getKernelNames();
2802     for (list<string>::iterator itr = names.begin(); itr != names.end(); itr++)
2803     {
2804       kernelNames += *itr;
2805       kernelNames += ";";
2806     }
2807     if (!kernelNames.empty())
2808     {
2809       kernelNames.erase(kernelNames.length() - 1);
2810     }
2811     str = kernelNames.c_str();
2812     result_size = strlen(str) + 1;
2813     break;
2814   }
2815   case CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT:
2816   case CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT:
2817     result_size = sizeof(cl_bool);
2818     result_data.clbool = CL_FALSE;
2819     break;
2820   default:
2821     ReturnErrorArg(program->context, CL_INVALID_VALUE, param_name);
2822   }
2823 
2824   if (param_value)
2825   {
2826     // Check destination is large enough
2827     if (param_value_size < result_size)
2828     {
2829       ReturnErrorInfo(NULL, CL_INVALID_VALUE, ParamValueSizeTooSmall);
2830     }
2831     else if (param_name == CL_PROGRAM_BINARIES)
2832     {
2833       program->program->getBinary(((unsigned char**)param_value)[0]);
2834     }
2835     else
2836     {
2837       if (str)
2838         memcpy(param_value, str, result_size);
2839       else
2840         memcpy(param_value, &result_data, result_size);
2841     }
2842   }
2843 
2844   return CL_SUCCESS;
2845 }
2846 
clGetProgramBuildInfo(cl_program program,cl_device_id device,cl_program_build_info param_name,size_t param_value_size,void * param_value,size_t * param_value_size_ret)2847 CL_API_ENTRY cl_int CL_API_CALL clGetProgramBuildInfo(
2848   cl_program program, cl_device_id device, cl_program_build_info param_name,
2849   size_t param_value_size, void* param_value,
2850   size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
2851 {
2852   REGISTER_API;
2853 
2854   // Check program is valid
2855   if (!program)
2856   {
2857     ReturnErrorArg(NULL, CL_INVALID_PROGRAM, program);
2858   }
2859 
2860   size_t dummy;
2861   size_t& result_size = param_value_size_ret ? *param_value_size_ret : dummy;
2862   union
2863   {
2864     cl_build_status status;
2865     cl_program_binary_type type;
2866     size_t sizet;
2867   } result_data;
2868   const char* str = 0;
2869 
2870   switch (param_name)
2871   {
2872   case CL_PROGRAM_BUILD_STATUS:
2873     result_size = sizeof(cl_build_status);
2874     result_data.status = program->program->getBuildStatus();
2875     break;
2876   case CL_PROGRAM_BUILD_OPTIONS:
2877     str = program->program->getBuildOptions().c_str();
2878     result_size = strlen(str) + 1;
2879     break;
2880   case CL_PROGRAM_BUILD_LOG:
2881     str = program->program->getBuildLog().c_str();
2882     result_size = strlen(str) + 1;
2883     break;
2884   case CL_PROGRAM_BINARY_TYPE:
2885     result_size = sizeof(cl_program_binary_type);
2886     result_data.type = program->program->getBinaryType();
2887     break;
2888   case CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE:
2889     result_size = sizeof(size_t);
2890     result_data.sizet = program->program->getTotalProgramScopeVarSize();
2891     break;
2892   default:
2893     ReturnErrorArg(program->context, CL_INVALID_VALUE, param_name);
2894   }
2895 
2896   if (param_value)
2897   {
2898     // Check destination is large enough
2899     if (param_value_size < result_size)
2900     {
2901       ReturnErrorInfo(program->context, CL_INVALID_VALUE,
2902                       ParamValueSizeTooSmall);
2903     }
2904     else
2905     {
2906       if (str)
2907         memcpy(param_value, str, result_size);
2908       else
2909         memcpy(param_value, &result_data, result_size);
2910     }
2911   }
2912 
2913   return CL_SUCCESS;
2914 }
2915 
2916 CL_API_ENTRY cl_kernel CL_API_CALL
clCreateKernel(cl_program program,const char * kernel_name,cl_int * errcode_ret)2917 clCreateKernel(cl_program program, const char* kernel_name,
2918                cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0
2919 {
2920   REGISTER_API;
2921 
2922   // Check parameters
2923   if (program->dispatch != m_dispatchTable)
2924   {
2925     SetError(NULL, CL_INVALID_PROGRAM);
2926     return NULL;
2927   }
2928   if (!kernel_name)
2929   {
2930     SetErrorArg(program->context, CL_INVALID_VALUE, kernel_name);
2931     return NULL;
2932   }
2933 
2934   // Create kernel object
2935   cl_kernel kernel = new _cl_kernel;
2936   kernel->dispatch = m_dispatchTable;
2937   kernel->kernel = program->program->createKernel(kernel_name);
2938   kernel->program = program;
2939   kernel->refCount = 1;
2940   if (!kernel->kernel)
2941   {
2942     SetErrorInfo(program->context, CL_INVALID_KERNEL_NAME,
2943                  "Kernel '" << kernel_name << "' not found");
2944     delete kernel;
2945     return NULL;
2946   }
2947 
2948   clRetainProgram(program);
2949 
2950   SetError(program->context, CL_SUCCESS);
2951   return kernel;
2952 }
2953 
clCreateKernelsInProgram(cl_program program,cl_uint num_kernels,cl_kernel * kernels,cl_uint * num_kernels_ret)2954 CL_API_ENTRY cl_int CL_API_CALL clCreateKernelsInProgram(
2955   cl_program program, cl_uint num_kernels, cl_kernel* kernels,
2956   cl_uint* num_kernels_ret) CL_API_SUFFIX__VERSION_1_0
2957 {
2958   REGISTER_API;
2959 
2960   // Check parameters
2961   if (!program)
2962   {
2963     ReturnErrorArg(NULL, CL_INVALID_PROGRAM, program);
2964   }
2965   if (program->program->getBuildStatus() != CL_BUILD_SUCCESS)
2966   {
2967     ReturnErrorInfo(program->context, CL_INVALID_PROGRAM_EXECUTABLE,
2968                     "Program not built");
2969   }
2970 
2971   unsigned int num = program->program->getNumKernels();
2972   if (kernels && num_kernels < num)
2973   {
2974     ReturnErrorInfo(program->context, CL_INVALID_VALUE,
2975                     "num_kernels is " << num_kernels << ", but " << num
2976                                       << " kernels found");
2977   }
2978 
2979   if (kernels)
2980   {
2981     int i = 0;
2982     list<string> names = program->program->getKernelNames();
2983     for (list<string>::iterator itr = names.begin(); itr != names.end(); itr++)
2984     {
2985       cl_kernel kernel = new _cl_kernel;
2986       kernel->dispatch = m_dispatchTable;
2987       kernel->kernel = program->program->createKernel(*itr);
2988       kernel->program = program;
2989       kernel->refCount = 1;
2990       kernels[i++] = kernel;
2991 
2992       clRetainProgram(program);
2993     }
2994   }
2995 
2996   if (num_kernels_ret)
2997   {
2998     *num_kernels_ret = num;
2999   }
3000 
3001   return CL_SUCCESS;
3002 }
3003 
clRetainKernel(cl_kernel kernel)3004 CL_API_ENTRY cl_int CL_API_CALL clRetainKernel(cl_kernel kernel)
3005   CL_API_SUFFIX__VERSION_1_0
3006 {
3007   REGISTER_API;
3008 
3009   if (!kernel)
3010   {
3011     ReturnErrorArg(NULL, CL_INVALID_KERNEL, kernel);
3012   }
3013 
3014   kernel->refCount++;
3015   return CL_SUCCESS;
3016 }
3017 
clReleaseKernel(cl_kernel kernel)3018 CL_API_ENTRY cl_int CL_API_CALL clReleaseKernel(cl_kernel kernel)
3019   CL_API_SUFFIX__VERSION_1_0
3020 {
3021   REGISTER_API;
3022 
3023   if (!kernel)
3024   {
3025     ReturnErrorArg(NULL, CL_INVALID_KERNEL, kernel);
3026   }
3027 
3028   if (--kernel->refCount == 0)
3029   {
3030 
3031     // Release memory allocated for image arguments
3032     for (auto* img : kernel->imageArgs)
3033     {
3034       delete img;
3035     }
3036 
3037     delete kernel->kernel;
3038 
3039     clReleaseProgram(kernel->program);
3040 
3041     delete kernel;
3042   }
3043 
3044   return CL_SUCCESS;
3045 }
3046 
3047 CL_API_ENTRY cl_int CL_API_CALL
clSetKernelArg(cl_kernel kernel,cl_uint arg_index,size_t arg_size,const void * arg_value)3048 clSetKernelArg(cl_kernel kernel, cl_uint arg_index, size_t arg_size,
3049                const void* arg_value) CL_API_SUFFIX__VERSION_1_0
3050 {
3051   REGISTER_API;
3052 
3053   // Check parameters are valid
3054   if (!kernel)
3055   {
3056     ReturnErrorArg(NULL, CL_INVALID_KERNEL, kernel);
3057   }
3058   if (arg_index >= kernel->kernel->getNumArguments())
3059   {
3060     ReturnErrorInfo(kernel->program->context, CL_INVALID_ARG_INDEX,
3061                     "arg_index is " << arg_index << ", but kernel has "
3062                                     << kernel->kernel->getNumArguments()
3063                                     << " arguments");
3064   }
3065 
3066   unsigned int addr = kernel->kernel->getArgumentAddressQualifier(arg_index);
3067   bool isSampler =
3068     kernel->kernel->getArgumentTypeName(arg_index) == "sampler_t";
3069 
3070   if (kernel->kernel->getArgumentSize(arg_index) != arg_size && !isSampler &&
3071       addr != CL_KERNEL_ARG_ADDRESS_LOCAL)
3072   {
3073     ReturnErrorInfo(kernel->program->context, CL_INVALID_ARG_SIZE,
3074                     "arg_size is " << arg_size << ", but argument should be "
3075                                    << kernel->kernel->getArgumentSize(arg_index)
3076                                    << " bytes");
3077   }
3078 
3079   // Prepare argument value
3080   oclgrind::TypedValue value;
3081   value.data = new unsigned char[arg_size];
3082   value.size = arg_size;
3083   value.num = 1;
3084   switch (addr)
3085   {
3086   case CL_KERNEL_ARG_ADDRESS_PRIVATE:
3087     if (isSampler)
3088     {
3089       memcpy(value.data, &(*(cl_sampler*)arg_value)->sampler, 4);
3090     }
3091     else
3092     {
3093       memcpy(value.data, arg_value, arg_size);
3094     }
3095     break;
3096   case CL_KERNEL_ARG_ADDRESS_LOCAL:
3097     delete[] value.data;
3098     value.data = NULL;
3099     break;
3100   case CL_KERNEL_ARG_ADDRESS_GLOBAL:
3101   case CL_KERNEL_ARG_ADDRESS_CONSTANT:
3102     if (arg_value && *(cl_mem*)arg_value)
3103     {
3104       cl_mem mem = *(cl_mem*)arg_value;
3105 
3106       if (mem->isImage)
3107       {
3108         // Create Image struct
3109         oclgrind::Image* image = new oclgrind::Image;
3110         image->address = mem->address;
3111         image->format = ((cl_image*)mem)->format;
3112         image->desc = ((cl_image*)mem)->desc;
3113         *(oclgrind::Image**)value.data = image;
3114         // Keep a record of the image struct for releasing it later
3115         kernel->imageArgs.push_back(image);
3116       }
3117       else
3118       {
3119         memcpy(value.data, &mem->address, arg_size);
3120       }
3121 
3122       kernel->memArgs[arg_index] = mem;
3123     }
3124     else
3125     {
3126       value.setPointer(0);
3127       kernel->memArgs.erase(arg_index);
3128     }
3129     break;
3130   default:
3131     ReturnErrorInfo(kernel->program->context, CL_INVALID_ARG_VALUE,
3132                     "Unsupported address space");
3133   }
3134 
3135   // Set argument
3136   kernel->kernel->setArgument(arg_index, value);
3137   delete[] value.data;
3138 
3139   return CL_SUCCESS;
3140 }
3141 
clGetKernelInfo(cl_kernel kernel,cl_kernel_info param_name,size_t param_value_size,void * param_value,size_t * param_value_size_ret)3142 CL_API_ENTRY cl_int CL_API_CALL clGetKernelInfo(
3143   cl_kernel kernel, cl_kernel_info param_name, size_t param_value_size,
3144   void* param_value, size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
3145 {
3146   REGISTER_API;
3147 
3148   // Check kernel is valid
3149   if (!kernel)
3150   {
3151     ReturnErrorArg(NULL, CL_INVALID_KERNEL, kernel);
3152   }
3153 
3154   size_t dummy;
3155   size_t& result_size = param_value_size_ret ? *param_value_size_ret : dummy;
3156   union
3157   {
3158     cl_uint cluint;
3159     cl_context context;
3160     cl_program program;
3161   } result_data;
3162   const char* str = 0;
3163 
3164   switch (param_name)
3165   {
3166   case CL_KERNEL_FUNCTION_NAME:
3167     result_size = kernel->kernel->getName().size() + 1;
3168     str = kernel->kernel->getName().c_str();
3169     break;
3170   case CL_KERNEL_NUM_ARGS:
3171     result_size = sizeof(cl_uint);
3172     result_data.cluint = kernel->kernel->getNumArguments();
3173     break;
3174   case CL_KERNEL_REFERENCE_COUNT:
3175     result_size = sizeof(cl_uint);
3176     result_data.cluint = kernel->refCount;
3177     break;
3178   case CL_KERNEL_CONTEXT:
3179     result_size = sizeof(cl_context);
3180     result_data.context = kernel->program->context;
3181     break;
3182   case CL_KERNEL_PROGRAM:
3183     result_size = sizeof(cl_program);
3184     result_data.program = kernel->program;
3185     break;
3186   case CL_KERNEL_ATTRIBUTES:
3187     result_size = kernel->kernel->getAttributes().size() + 1;
3188     str = kernel->kernel->getAttributes().c_str();
3189     break;
3190   default:
3191     ReturnErrorArg(kernel->program->context, CL_INVALID_VALUE, param_name);
3192   }
3193 
3194   if (param_value)
3195   {
3196     // Check destination is large enough
3197     if (param_value_size < result_size)
3198     {
3199       ReturnErrorInfo(kernel->program->context, CL_INVALID_VALUE,
3200                       ParamValueSizeTooSmall);
3201     }
3202     else
3203     {
3204       if (str)
3205         memcpy(param_value, str, result_size);
3206       else
3207         memcpy(param_value, &result_data, result_size);
3208     }
3209   }
3210 
3211   return CL_SUCCESS;
3212 }
3213 
clGetKernelArgInfo(cl_kernel kernel,cl_uint arg_indx,cl_kernel_arg_info param_name,size_t param_value_size,void * param_value,size_t * param_value_size_ret)3214 CL_API_ENTRY cl_int CL_API_CALL clGetKernelArgInfo(
3215   cl_kernel kernel, cl_uint arg_indx, cl_kernel_arg_info param_name,
3216   size_t param_value_size, void* param_value,
3217   size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_2
3218 {
3219   REGISTER_API;
3220 
3221   // Check parameters are valid
3222   if (!kernel)
3223   {
3224     ReturnErrorArg(NULL, CL_INVALID_KERNEL, kernel);
3225   }
3226   if (arg_indx >= kernel->kernel->getNumArguments())
3227   {
3228     ReturnErrorInfo(kernel->program->context, CL_INVALID_ARG_INDEX,
3229                     "arg_indx is " << arg_indx << ", but kernel has "
3230                                    << kernel->kernel->getNumArguments()
3231                                    << " arguments");
3232   }
3233 
3234   size_t dummy = 0;
3235   size_t& result_size = param_value_size_ret ? *param_value_size_ret : dummy;
3236   union
3237   {
3238     cl_kernel_arg_address_qualifier addressQual;
3239     cl_kernel_arg_access_qualifier accessQual;
3240     cl_kernel_arg_type_qualifier typeQual;
3241   } result_data;
3242 
3243   std::string str_data;
3244 
3245   switch (param_name)
3246   {
3247   case CL_KERNEL_ARG_ADDRESS_QUALIFIER:
3248     result_size = sizeof(cl_kernel_arg_address_qualifier);
3249     result_data.addressQual =
3250       kernel->kernel->getArgumentAddressQualifier(arg_indx);
3251     break;
3252   case CL_KERNEL_ARG_ACCESS_QUALIFIER:
3253     result_size = sizeof(cl_kernel_arg_access_qualifier);
3254     result_data.accessQual =
3255       kernel->kernel->getArgumentAccessQualifier(arg_indx);
3256     break;
3257   case CL_KERNEL_ARG_TYPE_NAME:
3258     str_data = kernel->kernel->getArgumentTypeName(arg_indx).str();
3259     result_size = str_data.size() + 1;
3260     break;
3261   case CL_KERNEL_ARG_TYPE_QUALIFIER:
3262     result_size = sizeof(cl_kernel_arg_type_qualifier);
3263     result_data.typeQual = kernel->kernel->getArgumentTypeQualifier(arg_indx);
3264     break;
3265   case CL_KERNEL_ARG_NAME:
3266     str_data = kernel->kernel->getArgumentName(arg_indx).str();
3267     result_size = str_data.size() + 1;
3268     break;
3269   default:
3270     ReturnErrorArg(kernel->program->context, CL_INVALID_VALUE, param_name);
3271   }
3272 
3273   if (param_value)
3274   {
3275     // Check destination is large enough
3276     if (param_value_size < result_size)
3277     {
3278       ReturnErrorInfo(kernel->program->context, CL_INVALID_VALUE,
3279                       ParamValueSizeTooSmall);
3280     }
3281 
3282     if (str_data.size())
3283       memcpy(param_value, str_data.c_str(), result_size);
3284     else
3285       memcpy(param_value, &result_data, result_size);
3286   }
3287 
3288   return CL_SUCCESS;
3289 }
3290 
clGetKernelWorkGroupInfo(cl_kernel kernel,cl_device_id device,cl_kernel_work_group_info param_name,size_t param_value_size,void * param_value,size_t * param_value_size_ret)3291 CL_API_ENTRY cl_int CL_API_CALL clGetKernelWorkGroupInfo(
3292   cl_kernel kernel, cl_device_id device, cl_kernel_work_group_info param_name,
3293   size_t param_value_size, void* param_value,
3294   size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
3295 {
3296   REGISTER_API;
3297 
3298   // Check parameters are valid
3299   if (!kernel)
3300   {
3301     ReturnErrorArg(NULL, CL_INVALID_KERNEL, kernel);
3302   }
3303   if (!device || device != m_device)
3304   {
3305     ReturnErrorArg(kernel->program->context, CL_INVALID_DEVICE, device);
3306   }
3307 
3308   size_t dummy;
3309   size_t& result_size = param_value_size_ret ? *param_value_size_ret : dummy;
3310   union
3311   {
3312     size_t sizet;
3313     size_t sizet3[3];
3314     cl_ulong clulong;
3315   } result_data;
3316 
3317   switch (param_name)
3318   {
3319   case CL_KERNEL_GLOBAL_WORK_SIZE:
3320     ReturnErrorInfo(kernel->program->context, CL_INVALID_VALUE,
3321                     "CL_KERNEL_GLOBAL_SIZE only valid on custom devices");
3322   case CL_KERNEL_WORK_GROUP_SIZE:
3323     result_size = sizeof(size_t);
3324     result_data.sizet = m_device->maxWGSize;
3325     break;
3326   case CL_KERNEL_COMPILE_WORK_GROUP_SIZE:
3327     result_size = sizeof(size_t[3]);
3328     kernel->kernel->getRequiredWorkGroupSize(result_data.sizet3);
3329     break;
3330   case CL_KERNEL_LOCAL_MEM_SIZE:
3331     result_size = sizeof(cl_ulong);
3332     result_data.clulong = kernel->kernel->getLocalMemorySize();
3333     break;
3334   case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE:
3335     result_size = sizeof(size_t);
3336     result_data.sizet = 1;
3337     break;
3338   case CL_KERNEL_PRIVATE_MEM_SIZE:
3339     result_size = sizeof(cl_ulong);
3340     result_data.clulong = 0;
3341     break;
3342   default:
3343     ReturnErrorArg(kernel->program->context, CL_INVALID_VALUE, param_name);
3344   }
3345 
3346   if (param_value)
3347   {
3348     // Check destination is large enough
3349     if (param_value_size < result_size)
3350     {
3351       ReturnErrorInfo(kernel->program->context, CL_INVALID_VALUE,
3352                       ParamValueSizeTooSmall);
3353     }
3354     else
3355     {
3356       memcpy(param_value, &result_data, result_size);
3357     }
3358   }
3359 
3360   return CL_SUCCESS;
3361 }
3362 
3363 /* Event Object APIs  */
3364 
3365 namespace
3366 {
3367 // Utility to check if an event has completed (or terminated)
isComplete(cl_event event)3368 inline bool isComplete(cl_event event)
3369 {
3370   return (event->event->state == CL_COMPLETE || event->event->state < 0);
3371 }
3372 } // namespace
3373 
clWaitForEvents(cl_uint num_events,const cl_event * event_list)3374 CL_API_ENTRY cl_int CL_API_CALL clWaitForEvents(
3375   cl_uint num_events, const cl_event* event_list) CL_API_SUFFIX__VERSION_1_0
3376 {
3377   REGISTER_API;
3378 
3379   // Check parameters
3380   if (!num_events)
3381   {
3382     ReturnErrorInfo(NULL, CL_INVALID_VALUE, "num_events cannot be 0");
3383   }
3384   if (!event_list)
3385   {
3386     ReturnErrorInfo(NULL, CL_INVALID_VALUE, "event_list cannot be NULL");
3387   }
3388 
3389   // Loop until all events complete
3390   bool complete = false;
3391   while (!complete)
3392   {
3393     complete = true;
3394     for (unsigned i = 0; i < num_events; i++)
3395     {
3396       // Skip event if already complete
3397       if (isComplete(event_list[i]))
3398       {
3399         continue;
3400       }
3401 
3402       // If it's not a user event, execute the associated command
3403       if (event_list[i]->queue)
3404       {
3405         oclgrind::Command* cmd = event_list[i]->event->command;
3406         event_list[i]->event->queue->execute(cmd, false);
3407         releaseCommand(cmd);
3408 
3409         // If it's still not complete, update flag
3410         if (!isComplete(event_list[i]))
3411         {
3412           complete = false;
3413         }
3414       }
3415       else
3416       {
3417         complete = false;
3418       }
3419     }
3420   }
3421 
3422   // Check if any command terminated unsuccessfully
3423   for (unsigned i = 0; i < num_events; i++)
3424   {
3425     if (event_list[i]->event->state < 0)
3426     {
3427       ReturnErrorInfo(event_list[i]->context,
3428                       CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST,
3429                       "Event " << i << " terminated with error "
3430                                << event_list[i]->event->state);
3431     }
3432   }
3433 
3434   return CL_SUCCESS;
3435 }
3436 
clGetEventInfo(cl_event event,cl_event_info param_name,size_t param_value_size,void * param_value,size_t * param_value_size_ret)3437 CL_API_ENTRY cl_int CL_API_CALL clGetEventInfo(
3438   cl_event event, cl_event_info param_name, size_t param_value_size,
3439   void* param_value, size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
3440 {
3441   REGISTER_API;
3442 
3443   // Check event is valid
3444   if (!event)
3445   {
3446     ReturnErrorArg(NULL, CL_INVALID_EVENT, event);
3447   }
3448 
3449   size_t dummy;
3450   size_t& result_size = param_value_size_ret ? *param_value_size_ret : dummy;
3451   union
3452   {
3453     cl_command_queue queue;
3454     cl_context context;
3455     cl_command_type type;
3456     cl_int clint;
3457     cl_uint cluint;
3458     size_t sizet;
3459     size_t sizet3[3];
3460   } result_data;
3461 
3462   switch (param_name)
3463   {
3464   case CL_EVENT_COMMAND_QUEUE:
3465     result_size = sizeof(cl_command_queue);
3466     result_data.queue = event->queue;
3467     break;
3468   case CL_EVENT_CONTEXT:
3469     result_size = sizeof(cl_context);
3470     result_data.context = event->context;
3471     break;
3472   case CL_EVENT_COMMAND_TYPE:
3473     result_size = sizeof(cl_command_type);
3474     result_data.type = event->type;
3475     break;
3476   case CL_EVENT_COMMAND_EXECUTION_STATUS:
3477     result_size = sizeof(cl_int);
3478     result_data.clint = event->event->state;
3479     break;
3480   case CL_EVENT_REFERENCE_COUNT:
3481     result_size = sizeof(cl_uint);
3482     result_data.cluint = event->refCount;
3483     break;
3484   default:
3485     ReturnErrorArg(event->context, CL_INVALID_VALUE, param_name);
3486   }
3487 
3488   if (param_value)
3489   {
3490     // Check destination is large enough
3491     if (param_value_size < result_size)
3492     {
3493       ReturnErrorInfo(event->context, CL_INVALID_VALUE, ParamValueSizeTooSmall);
3494     }
3495     else
3496     {
3497       memcpy(param_value, &result_data, result_size);
3498     }
3499   }
3500 
3501   return CL_SUCCESS;
3502 }
3503 
clCreateUserEvent(cl_context context,cl_int * errcode_ret)3504 CL_API_ENTRY cl_event CL_API_CALL clCreateUserEvent(
3505   cl_context context, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_1
3506 {
3507   REGISTER_API;
3508 
3509   // Check parameters
3510   if (!context)
3511   {
3512     SetErrorArg(NULL, CL_INVALID_CONTEXT, context);
3513     return NULL;
3514   }
3515 
3516   /// Create event object
3517   cl_event event = new _cl_event;
3518   event->dispatch = m_dispatchTable;
3519   event->context = context;
3520   event->queue = 0;
3521   event->type = CL_COMMAND_USER;
3522   event->event = new oclgrind::Event();
3523   event->event->state = CL_SUBMITTED;
3524   event->event->command = NULL;
3525   event->event->queue = NULL;
3526   event->refCount = 1;
3527 
3528   SetError(context, CL_SUCCESS);
3529   return event;
3530 }
3531 
clRetainEvent(cl_event event)3532 CL_API_ENTRY cl_int CL_API_CALL clRetainEvent(cl_event event)
3533   CL_API_SUFFIX__VERSION_1_0
3534 {
3535   REGISTER_API;
3536 
3537   if (!event)
3538   {
3539     ReturnErrorArg(NULL, CL_INVALID_EVENT, event);
3540   }
3541 
3542   event->refCount++;
3543 
3544   return CL_SUCCESS;
3545 }
3546 
clReleaseEvent(cl_event event)3547 CL_API_ENTRY cl_int CL_API_CALL clReleaseEvent(cl_event event)
3548   CL_API_SUFFIX__VERSION_1_0
3549 {
3550   REGISTER_API;
3551 
3552   if (!event)
3553   {
3554     ReturnErrorArg(NULL, CL_INVALID_EVENT, event);
3555   }
3556 
3557   if (--event->refCount == 0)
3558   {
3559     if (event->event)
3560     {
3561       delete event->event;
3562     }
3563     delete event;
3564   }
3565 
3566   return CL_SUCCESS;
3567 }
3568 
clSetUserEventStatus(cl_event event,cl_int execution_status)3569 CL_API_ENTRY cl_int CL_API_CALL clSetUserEventStatus(
3570   cl_event event, cl_int execution_status) CL_API_SUFFIX__VERSION_1_1
3571 {
3572   REGISTER_API;
3573 
3574   // Check parameters
3575   if (!event)
3576   {
3577     ReturnErrorArg(NULL, CL_INVALID_EVENT, event);
3578   }
3579   if (event->queue)
3580   {
3581     ReturnErrorInfo(event->context, CL_INVALID_EVENT, "Not a user event");
3582   }
3583   if (execution_status != CL_COMPLETE && execution_status >= 0)
3584   {
3585     ReturnErrorArg(event->context, CL_INVALID_VALUE, execution_status);
3586   }
3587   if (event->event->state == CL_COMPLETE || event->event->state < 0)
3588   {
3589     ReturnErrorInfo(event->context, CL_INVALID_OPERATION,
3590                     "Event status already set");
3591   }
3592 
3593   event->event->state = execution_status;
3594 
3595   // Perform callbacks
3596   list<pair<void(CL_CALLBACK*)(cl_event, cl_int, void*), void*>>::iterator itr;
3597   for (itr = event->callbacks.begin(); itr != event->callbacks.end(); itr++)
3598   {
3599     itr->first(event, execution_status, itr->second);
3600   }
3601 
3602   return CL_SUCCESS;
3603 }
3604 
3605 CL_API_ENTRY cl_int CL_API_CALL
clSetEventCallback(cl_event event,cl_int command_exec_callback_type,void (CL_CALLBACK * pfn_notify)(cl_event,cl_int,void *),void * user_data)3606 clSetEventCallback(cl_event event, cl_int command_exec_callback_type,
3607                    void(CL_CALLBACK* pfn_notify)(cl_event, cl_int, void*),
3608                    void* user_data) CL_API_SUFFIX__VERSION_1_1
3609 {
3610   REGISTER_API;
3611 
3612   // Check parameters
3613   if (!event)
3614   {
3615     ReturnErrorArg(NULL, CL_INVALID_EVENT, event);
3616   }
3617   if (!pfn_notify)
3618   {
3619     ReturnErrorArg(event->context, CL_INVALID_VALUE, pfn_notify);
3620   }
3621   if (command_exec_callback_type != CL_COMPLETE &&
3622       command_exec_callback_type != CL_SUBMITTED &&
3623       command_exec_callback_type != CL_RUNNING)
3624   {
3625     ReturnErrorArg(event->context, CL_INVALID_VALUE,
3626                    command_exec_callback_type);
3627   }
3628 
3629   event->callbacks.push_back(make_pair(pfn_notify, user_data));
3630 
3631   return CL_SUCCESS;
3632 }
3633 
clGetEventProfilingInfo(cl_event event,cl_profiling_info param_name,size_t param_value_size,void * param_value,size_t * param_value_size_ret)3634 CL_API_ENTRY cl_int CL_API_CALL clGetEventProfilingInfo(
3635   cl_event event, cl_profiling_info param_name, size_t param_value_size,
3636   void* param_value, size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
3637 {
3638   REGISTER_API;
3639 
3640   // Check event is valid
3641   if (!event)
3642   {
3643     ReturnErrorArg(NULL, CL_INVALID_EVENT, event);
3644   }
3645   if (!event->queue)
3646   {
3647     ReturnError(event->context, CL_PROFILING_INFO_NOT_AVAILABLE);
3648   }
3649 
3650   size_t dummy = 0;
3651   size_t& result_size = param_value_size_ret ? *param_value_size_ret : dummy;
3652   cl_ulong result;
3653 
3654   switch (param_name)
3655   {
3656   case CL_PROFILING_COMMAND_QUEUED:
3657     result_size = sizeof(cl_ulong);
3658     result = event->event->queueTime;
3659     break;
3660   case CL_PROFILING_COMMAND_SUBMIT:
3661     result_size = sizeof(cl_ulong);
3662     result = event->event->startTime;
3663     break;
3664   case CL_PROFILING_COMMAND_START:
3665     result_size = sizeof(cl_ulong);
3666     result = event->event->startTime;
3667     break;
3668   case CL_PROFILING_COMMAND_END:
3669     result_size = sizeof(cl_ulong);
3670     result = event->event->endTime;
3671     break;
3672   default:
3673     ReturnErrorArg(event->context, CL_INVALID_VALUE, param_name);
3674   }
3675 
3676   if (param_value)
3677   {
3678     // Check destination is large enough
3679     if (param_value_size < result_size)
3680     {
3681       ReturnErrorInfo(event->context, CL_INVALID_VALUE, ParamValueSizeTooSmall);
3682     }
3683     else
3684     {
3685       *(cl_ulong*)param_value = result;
3686     }
3687   }
3688 
3689   return CL_SUCCESS;
3690 }
3691 
clFlush(cl_command_queue command_queue)3692 CL_API_ENTRY cl_int CL_API_CALL clFlush(cl_command_queue command_queue)
3693   CL_API_SUFFIX__VERSION_1_0
3694 {
3695   REGISTER_API;
3696 
3697   // Check parameters
3698   if (!command_queue)
3699   {
3700     ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue);
3701   }
3702 
3703   // TODO: Implement properly?
3704   clFinish(command_queue);
3705 
3706   return CL_SUCCESS;
3707 }
3708 
clFinish(cl_command_queue command_queue)3709 CL_API_ENTRY cl_int CL_API_CALL clFinish(cl_command_queue command_queue)
3710   CL_API_SUFFIX__VERSION_1_0
3711 {
3712   REGISTER_API;
3713 
3714   // Check parameters
3715   if (!command_queue)
3716   {
3717     ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue);
3718   }
3719 
3720   // TODO: Move this finish to async thread?
3721   oclgrind::Command* cmd = command_queue->queue->finish();
3722   releaseCommand(cmd);
3723 
3724   return CL_SUCCESS;
3725 }
3726 
clEnqueueReadBuffer(cl_command_queue command_queue,cl_mem buffer,cl_bool blocking_read,size_t offset,size_t cb,void * ptr,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)3727 CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBuffer(
3728   cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read,
3729   size_t offset, size_t cb, void* ptr, cl_uint num_events_in_wait_list,
3730   const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_0
3731 {
3732   REGISTER_API;
3733 
3734   // Check parameters
3735   if (!command_queue)
3736   {
3737     ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue);
3738   }
3739   if (!buffer)
3740   {
3741     ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, memobj);
3742   }
3743   if (!ptr)
3744   {
3745     ReturnErrorArg(command_queue->context, CL_INVALID_VALUE, ptr);
3746   }
3747   if (offset + cb > buffer->size)
3748   {
3749     ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE,
3750                     "offset + cb (" << offset << " + " << cb
3751                                     << ") exceeds buffer size (" << buffer->size
3752                                     << " bytes)");
3753   }
3754   if (buffer->flags & (CL_MEM_HOST_NO_ACCESS | CL_MEM_HOST_WRITE_ONLY))
3755   {
3756     ReturnErrorInfo(command_queue->context, CL_INVALID_OPERATION,
3757                     "Buffer flags specify host will not read data");
3758   }
3759 
3760   // Enqueue command
3761   oclgrind::BufferCommand* cmd =
3762     new oclgrind::BufferCommand(oclgrind::Command::READ);
3763   cmd->ptr = (unsigned char*)ptr;
3764   cmd->address = buffer->address + offset;
3765   cmd->size = cb;
3766   asyncQueueRetain(cmd, buffer);
3767   asyncEnqueue(command_queue, CL_COMMAND_READ_BUFFER, cmd,
3768                num_events_in_wait_list, event_wait_list, event);
3769 
3770   if (blocking_read)
3771   {
3772     return clFinish(command_queue);
3773   }
3774 
3775   return CL_SUCCESS;
3776 }
3777 
clEnqueueReadBufferRect(cl_command_queue command_queue,cl_mem buffer,cl_bool blocking_read,const size_t * buffer_origin,const size_t * host_origin,const size_t * region,size_t buffer_row_pitch,size_t buffer_slice_pitch,size_t host_row_pitch,size_t host_slice_pitch,void * ptr,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)3778 CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBufferRect(
3779   cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read,
3780   const size_t* buffer_origin, const size_t* host_origin, const size_t* region,
3781   size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch,
3782   size_t host_slice_pitch, void* ptr, cl_uint num_events_in_wait_list,
3783   const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_1
3784 {
3785   REGISTER_API;
3786 
3787   // Check parameters
3788   if (!command_queue)
3789   {
3790     ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue);
3791   }
3792   if (!buffer)
3793   {
3794     ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, memobj);
3795   }
3796   if (!ptr)
3797   {
3798     ReturnErrorArg(command_queue->context, CL_INVALID_VALUE, ptr);
3799   }
3800   if (buffer->flags & (CL_MEM_HOST_NO_ACCESS | CL_MEM_HOST_WRITE_ONLY))
3801   {
3802     ReturnErrorInfo(command_queue->context, CL_INVALID_OPERATION,
3803                     "Buffer flags specify host will not read data");
3804   }
3805 
3806   // Compute pitches if neccessary
3807   if (buffer_row_pitch == 0)
3808   {
3809     buffer_row_pitch = region[0];
3810   }
3811   if (buffer_slice_pitch == 0)
3812   {
3813     buffer_slice_pitch = region[1] * buffer_row_pitch;
3814   }
3815   if (host_row_pitch == 0)
3816   {
3817     host_row_pitch = region[0];
3818   }
3819   if (host_slice_pitch == 0)
3820   {
3821     host_slice_pitch = region[1] * host_row_pitch;
3822   }
3823 
3824   // Compute origin offsets
3825   size_t buffer_offset = buffer_origin[2] * buffer_slice_pitch +
3826                          buffer_origin[1] * buffer_row_pitch + buffer_origin[0];
3827   size_t host_offset = host_origin[2] * host_slice_pitch +
3828                        host_origin[1] * host_row_pitch + host_origin[0];
3829 
3830   // Ensure buffer region valid
3831   size_t end = buffer_offset + region[0] + (region[1] - 1) * buffer_row_pitch +
3832                (region[2] - 1) * buffer_slice_pitch;
3833   if (end > buffer->size)
3834   {
3835     ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE,
3836                     "Region exceeds buffer size (" << buffer->size
3837                                                    << " bytes)");
3838   }
3839 
3840   // Enqueue command
3841   oclgrind::BufferRectCommand* cmd =
3842     new oclgrind::BufferRectCommand(oclgrind::Command::READ_RECT);
3843   cmd->ptr = (unsigned char*)ptr;
3844   cmd->address = buffer->address;
3845   cmd->buffer_offset[0] = buffer_offset;
3846   cmd->buffer_offset[1] = buffer_row_pitch;
3847   cmd->buffer_offset[2] = buffer_slice_pitch;
3848   cmd->host_offset[0] = host_offset;
3849   cmd->host_offset[1] = host_row_pitch;
3850   cmd->host_offset[2] = host_slice_pitch;
3851   memcpy(cmd->region, region, 3 * sizeof(size_t));
3852   asyncQueueRetain(cmd, buffer);
3853   asyncEnqueue(command_queue, CL_COMMAND_READ_BUFFER_RECT, cmd,
3854                num_events_in_wait_list, event_wait_list, event);
3855 
3856   if (blocking_read)
3857   {
3858     return clFinish(command_queue);
3859   }
3860 
3861   return CL_SUCCESS;
3862 }
3863 
clEnqueueWriteBuffer(cl_command_queue command_queue,cl_mem buffer,cl_bool blocking_write,size_t offset,size_t cb,const void * ptr,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)3864 CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteBuffer(
3865   cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write,
3866   size_t offset, size_t cb, const void* ptr, cl_uint num_events_in_wait_list,
3867   const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_0
3868 {
3869   REGISTER_API;
3870 
3871   // Check parameters
3872   if (!command_queue)
3873   {
3874     ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue);
3875   }
3876   if (!buffer)
3877   {
3878     ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, memobj);
3879   }
3880   if (!ptr)
3881   {
3882     ReturnErrorArg(command_queue->context, CL_INVALID_VALUE, ptr);
3883   }
3884   if (offset + cb > buffer->size)
3885   {
3886     ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE,
3887                     "offset + cb (" << offset << " + " << cb
3888                                     << ") exceeds buffer size (" << buffer->size
3889                                     << " bytes)");
3890   }
3891   if (buffer->flags & (CL_MEM_HOST_NO_ACCESS | CL_MEM_HOST_READ_ONLY))
3892   {
3893     ReturnErrorInfo(command_queue->context, CL_INVALID_OPERATION,
3894                     "Buffer flags specify host will not write data");
3895   }
3896 
3897   // Enqueue command
3898   oclgrind::BufferCommand* cmd =
3899     new oclgrind::BufferCommand(oclgrind::Command::WRITE);
3900   cmd->ptr = (unsigned char*)ptr;
3901   cmd->address = buffer->address + offset;
3902   cmd->size = cb;
3903   asyncQueueRetain(cmd, buffer);
3904   asyncEnqueue(command_queue, CL_COMMAND_WRITE_BUFFER, cmd,
3905                num_events_in_wait_list, event_wait_list, event);
3906 
3907   if (blocking_write)
3908   {
3909     return clFinish(command_queue);
3910   }
3911 
3912   return CL_SUCCESS;
3913 }
3914 
clEnqueueWriteBufferRect(cl_command_queue command_queue,cl_mem buffer,cl_bool blocking_write,const size_t * buffer_origin,const size_t * host_origin,const size_t * region,size_t buffer_row_pitch,size_t buffer_slice_pitch,size_t host_row_pitch,size_t host_slice_pitch,const void * ptr,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)3915 CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteBufferRect(
3916   cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write,
3917   const size_t* buffer_origin, const size_t* host_origin, const size_t* region,
3918   size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch,
3919   size_t host_slice_pitch, const void* ptr, cl_uint num_events_in_wait_list,
3920   const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_1
3921 {
3922   REGISTER_API;
3923 
3924   // Check parameters
3925   if (!command_queue)
3926   {
3927     ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue);
3928   }
3929   if (!buffer)
3930   {
3931     ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, memobj);
3932   }
3933   if (!ptr)
3934   {
3935     ReturnErrorArg(command_queue->context, CL_INVALID_VALUE, ptr);
3936   }
3937   if (buffer->flags & (CL_MEM_HOST_NO_ACCESS | CL_MEM_HOST_READ_ONLY))
3938   {
3939     ReturnErrorInfo(command_queue->context, CL_INVALID_OPERATION,
3940                     "Buffer flags specify host will not write data");
3941   }
3942 
3943   // Compute pitches if necessary
3944   if (buffer_row_pitch == 0)
3945   {
3946     buffer_row_pitch = region[0];
3947   }
3948   if (buffer_slice_pitch == 0)
3949   {
3950     buffer_slice_pitch = region[1] * buffer_row_pitch;
3951   }
3952   if (host_row_pitch == 0)
3953   {
3954     host_row_pitch = region[0];
3955   }
3956   if (host_slice_pitch == 0)
3957   {
3958     host_slice_pitch = region[1] * host_row_pitch;
3959   }
3960 
3961   // Compute origin offsets
3962   size_t buffer_offset = buffer_origin[2] * buffer_slice_pitch +
3963                          buffer_origin[1] * buffer_row_pitch + buffer_origin[0];
3964   size_t host_offset = host_origin[2] * host_slice_pitch +
3965                        host_origin[1] * host_row_pitch + host_origin[0];
3966 
3967   // Ensure buffer region valid
3968   size_t end = buffer_offset + region[0] + (region[1] - 1) * buffer_row_pitch +
3969                (region[2] - 1) * buffer_slice_pitch;
3970   if (end > buffer->size)
3971   {
3972     ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE,
3973                     "Region exceeds buffer size (" << buffer->size
3974                                                    << " bytes)");
3975   }
3976 
3977   // Enqueue command
3978   oclgrind::BufferRectCommand* cmd =
3979     new oclgrind::BufferRectCommand(oclgrind::Command::WRITE_RECT);
3980   cmd->ptr = (unsigned char*)ptr;
3981   cmd->address = buffer->address;
3982   cmd->buffer_offset[0] = buffer_offset;
3983   cmd->buffer_offset[1] = buffer_row_pitch;
3984   cmd->buffer_offset[2] = buffer_slice_pitch;
3985   cmd->host_offset[0] = host_offset;
3986   cmd->host_offset[1] = host_row_pitch;
3987   cmd->host_offset[2] = host_slice_pitch;
3988   memcpy(cmd->region, region, 3 * sizeof(size_t));
3989   asyncQueueRetain(cmd, buffer);
3990   asyncEnqueue(command_queue, CL_COMMAND_WRITE_BUFFER_RECT, cmd,
3991                num_events_in_wait_list, event_wait_list, event);
3992 
3993   if (blocking_write)
3994   {
3995     return clFinish(command_queue);
3996   }
3997 
3998   return CL_SUCCESS;
3999 }
4000 
clEnqueueCopyBuffer(cl_command_queue command_queue,cl_mem src_buffer,cl_mem dst_buffer,size_t src_offset,size_t dst_offset,size_t cb,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)4001 CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyBuffer(
4002   cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer,
4003   size_t src_offset, size_t dst_offset, size_t cb,
4004   cl_uint num_events_in_wait_list, const cl_event* event_wait_list,
4005   cl_event* event) CL_API_SUFFIX__VERSION_1_0
4006 {
4007   REGISTER_API;
4008 
4009   // Check parameters
4010   if (!command_queue)
4011   {
4012     ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue);
4013   }
4014   if (!src_buffer)
4015   {
4016     ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, src_buffer);
4017   }
4018   if (!dst_buffer)
4019   {
4020     ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, dst_buffer);
4021   }
4022   if (dst_offset + cb > dst_buffer->size)
4023   {
4024     ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE,
4025                     "dst_offset + cb (" << dst_offset << " + " << cb
4026                                         << ") exceeds buffer size ("
4027                                         << dst_buffer->size << " bytes)");
4028   }
4029   if (src_offset + cb > src_buffer->size)
4030   {
4031     ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE,
4032                     "src_offset + cb (" << src_offset << " + " << cb
4033                                         << ") exceeds buffer size ("
4034                                         << src_buffer->size << " bytes)");
4035   }
4036   // If src and dst buffers are the same and if src_offset comes before
4037   // dst_offset and src buffer size goes beyond dst_offset then there is an
4038   // overlap
4039   if ((src_buffer == dst_buffer) && (src_offset <= dst_offset) &&
4040       ((src_offset + cb) > dst_offset))
4041   {
4042     ReturnErrorInfo(command_queue->context, CL_MEM_COPY_OVERLAP,
4043                     "src_buffer == dst_buffer and "
4044                     "src_offset + cb ("
4045                       << src_offset << " + " << cb << ") overlaps dst_offset ("
4046                       << dst_offset << ")");
4047   }
4048   // If src and dst buffers are the same and if dst_offset comes before
4049   // src_offset and dst buffer size goes beyond src_offset then there is an
4050   // overlap
4051   if ((src_buffer == dst_buffer) && (dst_offset <= src_offset) &&
4052       ((dst_offset + cb) > src_offset))
4053   {
4054     ReturnErrorInfo(command_queue->context, CL_MEM_COPY_OVERLAP,
4055                     "src_buffer == dst_buffer and "
4056                     "dst_offset + cb ("
4057                       << dst_offset << " + " << cb << ") overlaps src_offset ("
4058                       << src_offset << ")");
4059   }
4060 
4061   // Enqueue command
4062   oclgrind::CopyCommand* cmd = new oclgrind::CopyCommand();
4063   cmd->dst = dst_buffer->address + dst_offset;
4064   cmd->src = src_buffer->address + src_offset;
4065   cmd->size = cb;
4066   asyncQueueRetain(cmd, src_buffer);
4067   asyncQueueRetain(cmd, dst_buffer);
4068   asyncEnqueue(command_queue, CL_COMMAND_COPY_BUFFER, cmd,
4069                num_events_in_wait_list, event_wait_list, event);
4070 
4071   return CL_SUCCESS;
4072 }
4073 
clEnqueueCopyBufferRect(cl_command_queue command_queue,cl_mem src_buffer,cl_mem dst_buffer,const size_t * src_origin,const size_t * dst_origin,const size_t * region,size_t src_row_pitch,size_t src_slice_pitch,size_t dst_row_pitch,size_t dst_slice_pitch,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)4074 CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyBufferRect(
4075   cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer,
4076   const size_t* src_origin, const size_t* dst_origin, const size_t* region,
4077   size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch,
4078   size_t dst_slice_pitch, cl_uint num_events_in_wait_list,
4079   const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_1
4080 {
4081   REGISTER_API;
4082 
4083   // Check parameters
4084   if (!command_queue)
4085   {
4086     ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue);
4087   }
4088   if (!src_buffer)
4089   {
4090     ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, src_buffer);
4091   }
4092   if (!dst_buffer)
4093   {
4094     ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, dst_buffer);
4095   }
4096   if (!region || region[0] == 0 || region[1] == 0 || region[2] == 0)
4097   {
4098     ReturnErrorArg(command_queue->context, CL_INVALID_VALUE, region);
4099   }
4100 
4101   // Compute pitches if necessary
4102   if (src_row_pitch == 0)
4103   {
4104     src_row_pitch = region[0];
4105   }
4106   if (src_slice_pitch == 0)
4107   {
4108     src_slice_pitch = region[1] * src_row_pitch;
4109   }
4110   if (dst_row_pitch == 0)
4111   {
4112     dst_row_pitch = region[0];
4113   }
4114   if (dst_slice_pitch == 0)
4115   {
4116     dst_slice_pitch = region[1] * dst_row_pitch;
4117   }
4118 
4119   // Compute origin offsets
4120   size_t src_offset = src_origin[2] * src_slice_pitch +
4121                       src_origin[1] * src_row_pitch + src_origin[0];
4122   size_t dst_offset = dst_origin[2] * dst_slice_pitch +
4123                       dst_origin[1] * dst_row_pitch + dst_origin[0];
4124 
4125   // Ensure buffer region valid
4126   size_t src_end = src_offset + region[0] + (region[1] - 1) * src_row_pitch +
4127                    (region[2] - 1) * src_slice_pitch;
4128   size_t dst_end = dst_offset + region[0] + (region[1] - 1) * dst_row_pitch +
4129                    (region[2] - 1) * dst_slice_pitch;
4130   if (src_end > src_buffer->size)
4131   {
4132     ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE,
4133                     "Region exceeds source buffer size (" << src_buffer->size
4134                                                           << " bytes)");
4135   }
4136   if (dst_end > dst_buffer->size)
4137   {
4138     ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE,
4139                     "Region exceeds destination buffer size ("
4140                       << dst_buffer->size << " bytes)");
4141   }
4142 
4143   // Enqueue command
4144   oclgrind::CopyRectCommand* cmd = new oclgrind::CopyRectCommand();
4145   cmd->src = src_buffer->address;
4146   cmd->dst = dst_buffer->address;
4147   cmd->src_offset[0] = src_offset;
4148   cmd->src_offset[1] = src_row_pitch;
4149   cmd->src_offset[2] = src_slice_pitch;
4150   cmd->dst_offset[0] = dst_offset;
4151   cmd->dst_offset[1] = dst_row_pitch;
4152   cmd->dst_offset[2] = dst_slice_pitch;
4153   memcpy(cmd->region, region, 3 * sizeof(size_t));
4154   asyncQueueRetain(cmd, src_buffer);
4155   asyncQueueRetain(cmd, dst_buffer);
4156   asyncEnqueue(command_queue, CL_COMMAND_COPY_BUFFER_RECT, cmd,
4157                num_events_in_wait_list, event_wait_list, event);
4158 
4159   return CL_SUCCESS;
4160 }
4161 
clEnqueueFillBuffer(cl_command_queue command_queue,cl_mem buffer,const void * pattern,size_t pattern_size,size_t offset,size_t cb,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)4162 CL_API_ENTRY cl_int CL_API_CALL clEnqueueFillBuffer(
4163   cl_command_queue command_queue, cl_mem buffer, const void* pattern,
4164   size_t pattern_size, size_t offset, size_t cb,
4165   cl_uint num_events_in_wait_list, const cl_event* event_wait_list,
4166   cl_event* event) CL_API_SUFFIX__VERSION_1_2
4167 {
4168   REGISTER_API;
4169 
4170   // Check parameters
4171   if (!command_queue)
4172   {
4173     ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue);
4174   }
4175   if (!buffer)
4176   {
4177     ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, buffer);
4178   }
4179   if (offset + cb > buffer->size)
4180   {
4181     ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE,
4182                     "offset + cb (" << offset << " + " << cb
4183                                     << ") exceeds buffer size (" << buffer->size
4184                                     << " bytes)");
4185   }
4186   if (!pattern)
4187   {
4188     ReturnErrorArg(command_queue->context, CL_INVALID_VALUE, pattern);
4189   }
4190   if (pattern_size == 0)
4191   {
4192     ReturnErrorArg(command_queue->context, CL_INVALID_VALUE, pattern_size);
4193   }
4194   if (offset % pattern_size)
4195   {
4196     ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE,
4197                     "offset (" << offset << ")"
4198                                << " not a multiple of pattern_size ("
4199                                << pattern_size << ")");
4200   }
4201   if (cb % pattern_size)
4202   {
4203     ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE,
4204                     "cb (" << cb << ")"
4205                            << " not a multiple of pattern_size ("
4206                            << pattern_size << ")");
4207   }
4208 
4209   // Enqueue command
4210   oclgrind::FillBufferCommand* cmd = new oclgrind::FillBufferCommand(
4211     (const unsigned char*)pattern, pattern_size);
4212   cmd->address = buffer->address + offset;
4213   cmd->size = cb;
4214   asyncQueueRetain(cmd, buffer);
4215   asyncEnqueue(command_queue, CL_COMMAND_FILL_BUFFER, cmd,
4216                num_events_in_wait_list, event_wait_list, event);
4217 
4218   return CL_SUCCESS;
4219 }
4220 
clEnqueueFillImage(cl_command_queue command_queue,cl_mem image,const void * fill_color,const size_t * origin,const size_t * region,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)4221 CL_API_ENTRY cl_int CL_API_CALL clEnqueueFillImage(
4222   cl_command_queue command_queue, cl_mem image, const void* fill_color,
4223   const size_t* origin, const size_t* region, cl_uint num_events_in_wait_list,
4224   const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_2
4225 {
4226   REGISTER_API;
4227 
4228   // Check parameters
4229   if (!command_queue)
4230   {
4231     ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue);
4232   }
4233   if (!image)
4234   {
4235     ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, image);
4236   }
4237   if (!fill_color)
4238   {
4239     ReturnErrorArg(command_queue->context, CL_INVALID_VALUE, fill_color);
4240   }
4241   if (!region[0] || !region[1] || !region[2])
4242   {
4243     ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE,
4244                     "Values in region cannot be 0");
4245   }
4246 
4247   // Get image dimensions
4248   cl_image* img = (cl_image*)image;
4249   size_t width = img->desc.image_width;
4250   size_t height = img->desc.image_height;
4251   size_t depth = img->desc.image_depth;
4252   size_t arraySize = img->desc.image_array_size;
4253   size_t pixelSize = getPixelSize(&img->format);
4254   size_t row_pitch = width * pixelSize;
4255   size_t slice_pitch = height * row_pitch;
4256 
4257   if (img->desc.image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
4258     height = arraySize;
4259   if (img->desc.image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY)
4260     depth = arraySize;
4261 
4262   // Ensure region is within image bounds
4263   if (origin[0] + region[0] > width)
4264   {
4265     ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE,
4266                     "origin[0] + region[0] > width ("
4267                       << origin[0] << " + " << region[0] << " > " << width
4268                       << " )");
4269   }
4270   if (origin[1] + region[1] > height)
4271   {
4272     ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE,
4273                     "origin[1] + region[1] > height ("
4274                       << origin[1] << " + " << region[1] << " > " << height
4275                       << " )");
4276   }
4277   if (origin[2] + region[2] > depth)
4278   {
4279     ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE,
4280                     "origin[2] + region[2] > depth ("
4281                       << origin[2] << " + " << region[2] << " > " << depth
4282                       << " )");
4283   }
4284 
4285   // Generate color data with correct order and data type
4286   unsigned char* color = new unsigned char[pixelSize];
4287   for (unsigned output = 0; output < getNumChannels(&img->format); output++)
4288   {
4289     // Get input channel index
4290     int input = output;
4291     switch (img->format.image_channel_order)
4292     {
4293     case CL_R:
4294     case CL_Rx:
4295     case CL_RG:
4296     case CL_RGx:
4297     case CL_RGB:
4298     case CL_RGBx:
4299     case CL_RGBA:
4300       break;
4301     case CL_BGRA:
4302       if (output == 0)
4303         input = 2;
4304       if (output == 2)
4305         input = 0;
4306       break;
4307     case CL_ARGB:
4308       if (output == 0)
4309         input = 3;
4310       if (output == 1)
4311         input = 0;
4312       if (output == 2)
4313         input = 1;
4314       if (output == 3)
4315         input = 2;
4316       break;
4317     case CL_A:
4318       if (output == 0)
4319         input = 3;
4320       break;
4321     case CL_RA:
4322       if (output == 1)
4323         input = 3;
4324       break;
4325     case CL_INTENSITY:
4326     case CL_LUMINANCE:
4327       input = 0;
4328       break;
4329     default:
4330       ReturnError(command_queue->context, CL_INVALID_IMAGE_FORMAT_DESCRIPTOR);
4331     }
4332 
4333     // Interpret data
4334     switch (img->format.image_channel_data_type)
4335     {
4336     case CL_SNORM_INT8:
4337       ((int8_t*)color)[output] =
4338         rint(min(max(((float*)fill_color)[input] * 127.f, -127.f), 128.f));
4339       break;
4340     case CL_UNORM_INT8:
4341       ((uint8_t*)color)[output] =
4342         rint(min(max(((float*)fill_color)[input] * 255.f, 0.f), 255.f));
4343       break;
4344     case CL_SNORM_INT16:
4345       ((int16_t*)color)[output] = rint(
4346         min(max(((float*)fill_color)[input] * 32767.f, -32768.f), 32767.f));
4347       break;
4348     case CL_UNORM_INT16:
4349       ((uint16_t*)color)[output] =
4350         rint(min(max(((float*)fill_color)[input] * 65535.f, 0.f), 65535.f));
4351       break;
4352     case CL_FLOAT:
4353       ((float*)color)[output] = ((float*)fill_color)[input];
4354       break;
4355     case CL_HALF_FLOAT:
4356       ((uint16_t*)color)[output] =
4357         cl_half_from_float(((float*)fill_color)[input], CL_HALF_RTE);
4358       break;
4359     case CL_SIGNED_INT8:
4360       ((int8_t*)color)[output] = ((int32_t*)fill_color)[input];
4361       break;
4362     case CL_SIGNED_INT16:
4363       ((int16_t*)color)[output] = ((int32_t*)fill_color)[input];
4364       break;
4365     case CL_SIGNED_INT32:
4366       ((int32_t*)color)[output] = ((int32_t*)fill_color)[input];
4367       break;
4368     case CL_UNSIGNED_INT8:
4369       ((uint8_t*)color)[output] = ((uint32_t*)fill_color)[input];
4370       break;
4371     case CL_UNSIGNED_INT16:
4372       ((uint16_t*)color)[output] = ((uint32_t*)fill_color)[input];
4373       break;
4374     case CL_UNSIGNED_INT32:
4375       ((uint32_t*)color)[output] = ((uint32_t*)fill_color)[input];
4376       break;
4377     default:
4378       ReturnError(command_queue->context, CL_INVALID_IMAGE_FORMAT_DESCRIPTOR);
4379     }
4380   }
4381 
4382   // Enqueue command
4383   oclgrind::FillImageCommand* cmd = new oclgrind::FillImageCommand(
4384     image->address, origin, region, row_pitch, slice_pitch, pixelSize, color);
4385   asyncQueueRetain(cmd, image);
4386   asyncEnqueue(command_queue, CL_COMMAND_FILL_IMAGE, cmd,
4387                num_events_in_wait_list, event_wait_list, event);
4388   delete[] color;
4389 
4390   return CL_SUCCESS;
4391 }
4392 
clEnqueueReadImage(cl_command_queue command_queue,cl_mem image,cl_bool blocking_read,const size_t * origin,const size_t * region,size_t row_pitch,size_t slice_pitch,void * ptr,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)4393 CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadImage(
4394   cl_command_queue command_queue, cl_mem image, cl_bool blocking_read,
4395   const size_t* origin, const size_t* region, size_t row_pitch,
4396   size_t slice_pitch, void* ptr, cl_uint num_events_in_wait_list,
4397   const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_0
4398 {
4399   REGISTER_API;
4400 
4401   // Check parameters
4402   if (!command_queue)
4403   {
4404     ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue);
4405   }
4406   if (!image)
4407   {
4408     ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, image);
4409   }
4410 
4411   cl_image* img = (cl_image*)image;
4412 
4413   size_t pixelSize = getPixelSize(&img->format);
4414   size_t buffer_origin[3] = {origin[0] * pixelSize, origin[1], origin[2]};
4415   size_t pixel_region[3] = {region[0] * pixelSize, region[1], region[2]};
4416   size_t host_origin[3] = {0, 0, 0};
4417 
4418   size_t img_row_pitch = img->desc.image_width * pixelSize;
4419   size_t img_slice_pitch = img->desc.image_height * img_row_pitch;
4420   if (row_pitch == 0)
4421   {
4422     row_pitch = pixel_region[0];
4423   }
4424   if (slice_pitch == 0)
4425   {
4426     slice_pitch = pixel_region[1] * row_pitch;
4427   }
4428 
4429   // Enqueue read
4430   cl_int ret = clEnqueueReadBufferRect(
4431     command_queue, image, blocking_read, buffer_origin, host_origin,
4432     pixel_region, img_row_pitch, img_slice_pitch, row_pitch, slice_pitch, ptr,
4433     num_events_in_wait_list, event_wait_list, event);
4434   if (event && ret == CL_SUCCESS)
4435   {
4436     (*event)->type = CL_COMMAND_READ_IMAGE;
4437   }
4438   return ret;
4439 }
4440 
clEnqueueWriteImage(cl_command_queue command_queue,cl_mem image,cl_bool blocking_write,const size_t * origin,const size_t * region,size_t input_row_pitch,size_t input_slice_pitch,const void * ptr,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)4441 CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteImage(
4442   cl_command_queue command_queue, cl_mem image, cl_bool blocking_write,
4443   const size_t* origin, const size_t* region, size_t input_row_pitch,
4444   size_t input_slice_pitch, const void* ptr, cl_uint num_events_in_wait_list,
4445   const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_0
4446 {
4447   REGISTER_API;
4448 
4449   // Check parameters
4450   if (!command_queue)
4451   {
4452     ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue);
4453   }
4454   if (!image)
4455   {
4456     ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, image);
4457   }
4458 
4459   cl_image* img = (cl_image*)image;
4460 
4461   size_t pixelSize = getPixelSize(&img->format);
4462   size_t buffer_origin[3] = {origin[0] * pixelSize, origin[1], origin[2]};
4463   size_t pixel_region[3] = {region[0] * pixelSize, region[1], region[2]};
4464   size_t host_origin[3] = {0, 0, 0};
4465 
4466   size_t img_row_pitch = img->desc.image_width * pixelSize;
4467   size_t img_slice_pitch = img->desc.image_height * img_row_pitch;
4468   if (input_row_pitch == 0)
4469   {
4470     input_row_pitch = pixel_region[0];
4471   }
4472   if (input_slice_pitch == 0)
4473   {
4474     input_slice_pitch = pixel_region[1] * input_row_pitch;
4475   }
4476 
4477   // Enqueue write
4478   cl_int ret = clEnqueueWriteBufferRect(
4479     command_queue, image, blocking_write, buffer_origin, host_origin,
4480     pixel_region, img_row_pitch, img_slice_pitch, input_row_pitch,
4481     input_slice_pitch, ptr, num_events_in_wait_list, event_wait_list, event);
4482   if (event && ret == CL_SUCCESS)
4483   {
4484     (*event)->type = CL_COMMAND_WRITE_IMAGE;
4485   }
4486   return ret;
4487 }
4488 
clEnqueueCopyImage(cl_command_queue command_queue,cl_mem src_image,cl_mem dst_image,const size_t * src_origin,const size_t * dst_origin,const size_t * region,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)4489 CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyImage(
4490   cl_command_queue command_queue, cl_mem src_image, cl_mem dst_image,
4491   const size_t* src_origin, const size_t* dst_origin, const size_t* region,
4492   cl_uint num_events_in_wait_list, const cl_event* event_wait_list,
4493   cl_event* event) CL_API_SUFFIX__VERSION_1_0
4494 {
4495   REGISTER_API;
4496 
4497   // Check parameters
4498   if (!command_queue)
4499   {
4500     ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue);
4501   }
4502   if (!src_image)
4503   {
4504     ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, src_image);
4505   }
4506   if (!dst_image)
4507   {
4508     ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, dst_image);
4509   }
4510 
4511   cl_image* src = (cl_image*)src_image;
4512   cl_image* dst = (cl_image*)dst_image;
4513   if (src->format.image_channel_order != dst->format.image_channel_order)
4514   {
4515     ReturnErrorInfo(command_queue->context, CL_IMAGE_FORMAT_MISMATCH,
4516                     "Channel orders do not match");
4517   }
4518   if (src->format.image_channel_data_type !=
4519       dst->format.image_channel_data_type)
4520   {
4521     ReturnErrorInfo(command_queue->context, CL_IMAGE_FORMAT_MISMATCH,
4522                     "Channel data types do no match");
4523   }
4524 
4525   size_t srcPixelSize = getPixelSize(&src->format);
4526   size_t dstPixelSize = getPixelSize(&dst->format);
4527 
4528   size_t src_pixel_origin[3] = {src_origin[0] * srcPixelSize, src_origin[1],
4529                                 src_origin[2]};
4530   size_t dst_pixel_origin[3] = {dst_origin[0] * dstPixelSize, dst_origin[1],
4531                                 dst_origin[2]};
4532   size_t pixel_region[3] = {region[0] * srcPixelSize, region[1], region[2]};
4533 
4534   size_t src_row_pitch = src->desc.image_width * srcPixelSize;
4535   size_t src_slice_pitch = src->desc.image_height * src_row_pitch;
4536   size_t dst_row_pitch = dst->desc.image_width * dstPixelSize;
4537   size_t dst_slice_pitch = dst->desc.image_height * dst_row_pitch;
4538 
4539   // Enqueue copy
4540   cl_int ret = clEnqueueCopyBufferRect(
4541     command_queue, src_image, dst_image, src_pixel_origin, dst_pixel_origin,
4542     pixel_region, src_row_pitch, src_slice_pitch, dst_row_pitch,
4543     dst_slice_pitch, num_events_in_wait_list, event_wait_list, event);
4544   if (event && ret == CL_SUCCESS)
4545   {
4546     (*event)->type = CL_COMMAND_COPY_IMAGE;
4547   }
4548   return ret;
4549 }
4550 
clEnqueueCopyImageToBuffer(cl_command_queue command_queue,cl_mem src_image,cl_mem dst_buffer,const size_t * src_origin,const size_t * region,size_t dst_offset,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)4551 CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyImageToBuffer(
4552   cl_command_queue command_queue, cl_mem src_image, cl_mem dst_buffer,
4553   const size_t* src_origin, const size_t* region, size_t dst_offset,
4554   cl_uint num_events_in_wait_list, const cl_event* event_wait_list,
4555   cl_event* event) CL_API_SUFFIX__VERSION_1_0
4556 {
4557   REGISTER_API;
4558 
4559   // Check parameters
4560   if (!command_queue)
4561   {
4562     ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue);
4563   }
4564   if (!src_image)
4565   {
4566     ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, src_image);
4567   }
4568   if (!dst_buffer)
4569   {
4570     ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, dst_buffer);
4571   }
4572 
4573   cl_image* src = (cl_image*)src_image;
4574   size_t pixel_size = getPixelSize(&src->format);
4575   size_t src_pixel_origin[3] = {src_origin[0] * pixel_size, src_origin[1],
4576                                 src_origin[2]};
4577   size_t src_row_pitch = src->desc.image_width * pixel_size;
4578   size_t src_slice_pitch = src->desc.image_height * src_row_pitch;
4579 
4580   size_t pixel_region[3] = {region[0] * pixel_size, region[1], region[2]};
4581   size_t dst_origin[3] = {dst_offset, 0, 0};
4582 
4583   // Enqueue copy
4584   cl_int ret = clEnqueueCopyBufferRect(
4585     command_queue, src_image, dst_buffer, src_pixel_origin, dst_origin,
4586     pixel_region, src_row_pitch, src_slice_pitch, 0, 0, num_events_in_wait_list,
4587     event_wait_list, event);
4588   if (event && ret == CL_SUCCESS)
4589   {
4590     (*event)->type = CL_COMMAND_COPY_IMAGE_TO_BUFFER;
4591   }
4592   return ret;
4593 }
4594 
clEnqueueCopyBufferToImage(cl_command_queue command_queue,cl_mem src_buffer,cl_mem dst_image,size_t src_offset,const size_t * dst_origin,const size_t * region,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)4595 CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyBufferToImage(
4596   cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_image,
4597   size_t src_offset, const size_t* dst_origin, const size_t* region,
4598   cl_uint num_events_in_wait_list, const cl_event* event_wait_list,
4599   cl_event* event) CL_API_SUFFIX__VERSION_1_0
4600 {
4601   REGISTER_API;
4602 
4603   // Check parameters
4604   if (!command_queue)
4605   {
4606     ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue);
4607   }
4608   if (!src_buffer)
4609   {
4610     ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, src_buffer);
4611   }
4612   if (!dst_image)
4613   {
4614     ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, dst_image);
4615   }
4616 
4617   cl_image* dst = (cl_image*)dst_image;
4618   size_t pixel_size = getPixelSize(&dst->format);
4619   size_t dst_pixel_origin[3] = {dst_origin[0] * pixel_size, dst_origin[1],
4620                                 dst_origin[2]};
4621   size_t dst_row_pitch = dst->desc.image_width * pixel_size;
4622   size_t dst_slice_pitch = dst->desc.image_height * dst_row_pitch;
4623 
4624   size_t pixel_region[3] = {region[0] * pixel_size, region[1], region[2]};
4625   size_t src_origin[3] = {src_offset, 0, 0};
4626 
4627   // Enqueue copy
4628   cl_int ret = clEnqueueCopyBufferRect(
4629     command_queue, src_buffer, dst_image, src_origin, dst_pixel_origin,
4630     pixel_region, 0, 0, dst_row_pitch, dst_slice_pitch, num_events_in_wait_list,
4631     event_wait_list, event);
4632   if (event && ret == CL_SUCCESS)
4633   {
4634     (*event)->type = CL_COMMAND_COPY_BUFFER_TO_IMAGE;
4635   }
4636   return ret;
4637 }
4638 
clEnqueueMapBuffer(cl_command_queue command_queue,cl_mem buffer,cl_bool blocking_map,cl_map_flags map_flags,size_t offset,size_t cb,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event,cl_int * errcode_ret)4639 CL_API_ENTRY void* CL_API_CALL clEnqueueMapBuffer(
4640   cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_map,
4641   cl_map_flags map_flags, size_t offset, size_t cb,
4642   cl_uint num_events_in_wait_list, const cl_event* event_wait_list,
4643   cl_event* event, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0
4644 {
4645   REGISTER_API;
4646 
4647   // Check parameters
4648   if (!command_queue)
4649   {
4650     SetErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue);
4651     return NULL;
4652   }
4653   if (!buffer)
4654   {
4655     SetErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, buffer);
4656     return NULL;
4657   }
4658   if (map_flags & CL_MAP_WRITE &&
4659       buffer->flags & (CL_MEM_HOST_NO_ACCESS | CL_MEM_HOST_READ_ONLY))
4660   {
4661     SetErrorInfo(command_queue->context, CL_INVALID_OPERATION,
4662                  "Buffer flags specify host will not write data");
4663     return NULL;
4664   }
4665   if (map_flags & CL_MAP_READ &&
4666       buffer->flags & (CL_MEM_HOST_NO_ACCESS | CL_MEM_HOST_WRITE_ONLY))
4667   {
4668     SetErrorInfo(command_queue->context, CL_INVALID_OPERATION,
4669                  "Buffer flags specify host will not read data");
4670     return NULL;
4671   }
4672 
4673   // Check map region
4674   if (offset + cb > buffer->size)
4675   {
4676     SetErrorInfo(command_queue->context, CL_INVALID_VALUE,
4677                  "offset + cb (" << offset << " + " << cb
4678                                  << ") exceeds buffer size (" << buffer->size
4679                                  << " bytes)");
4680     return NULL;
4681   }
4682 
4683   // Map buffer
4684   void* ptr = buffer->context->context->getGlobalMemory()->mapBuffer(
4685     buffer->address, offset, cb);
4686   if (ptr == NULL)
4687   {
4688     SetError(command_queue->context, CL_INVALID_VALUE);
4689     return NULL;
4690   }
4691 
4692   // Enqueue command
4693   oclgrind::MapCommand* cmd = new oclgrind::MapCommand();
4694   cmd->address = buffer->address;
4695   cmd->offset = offset;
4696   cmd->size = cb;
4697   cmd->flags = map_flags;
4698   asyncQueueRetain(cmd, buffer);
4699   asyncEnqueue(command_queue, CL_COMMAND_MAP_BUFFER, cmd,
4700                num_events_in_wait_list, event_wait_list, event);
4701 
4702   SetError(command_queue->context, CL_SUCCESS);
4703   if (blocking_map)
4704   {
4705     SetError(command_queue->context, clFinish(command_queue));
4706   }
4707 
4708   return ptr;
4709 }
4710 
clEnqueueMapImage(cl_command_queue command_queue,cl_mem image,cl_bool blocking_map,cl_map_flags map_flags,const size_t * origin,const size_t * region,size_t * image_row_pitch,size_t * image_slice_pitch,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event,cl_int * errcode_ret)4711 CL_API_ENTRY void* CL_API_CALL clEnqueueMapImage(
4712   cl_command_queue command_queue, cl_mem image, cl_bool blocking_map,
4713   cl_map_flags map_flags, const size_t* origin, const size_t* region,
4714   size_t* image_row_pitch, size_t* image_slice_pitch,
4715   cl_uint num_events_in_wait_list, const cl_event* event_wait_list,
4716   cl_event* event, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0
4717 {
4718   REGISTER_API;
4719 
4720   // Check parameters
4721   if (!command_queue)
4722   {
4723     SetErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue);
4724     return NULL;
4725   }
4726   if (!image)
4727   {
4728     SetErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, image);
4729     return NULL;
4730   }
4731   if (!image_row_pitch)
4732   {
4733     SetErrorArg(command_queue->context, CL_INVALID_VALUE, image_row_pitch);
4734     return NULL;
4735   }
4736   if (map_flags & CL_MAP_WRITE &&
4737       image->flags & (CL_MEM_HOST_NO_ACCESS | CL_MEM_HOST_READ_ONLY))
4738   {
4739     SetErrorInfo(command_queue->context, CL_INVALID_OPERATION,
4740                  "Image flags specify host will not write data");
4741     return NULL;
4742   }
4743   if (map_flags & CL_MAP_READ &&
4744       image->flags & (CL_MEM_HOST_NO_ACCESS | CL_MEM_HOST_WRITE_ONLY))
4745   {
4746     SetErrorInfo(command_queue->context, CL_INVALID_OPERATION,
4747                  "Image flags specify host will not read data");
4748     return NULL;
4749   }
4750   if (!region[0] || !region[1] || !region[2])
4751   {
4752     SetErrorInfo(command_queue->context, CL_INVALID_VALUE,
4753                  "Values in region cannot be 0");
4754   }
4755 
4756   // Get image dimensions
4757   cl_image* img = (cl_image*)image;
4758   size_t width = img->desc.image_width;
4759   size_t height = img->desc.image_height;
4760   size_t depth = img->desc.image_depth;
4761   size_t arraySize = img->desc.image_array_size;
4762   size_t pixelSize = getPixelSize(&img->format);
4763   size_t row_pitch = width * pixelSize;
4764   size_t slice_pitch = height * row_pitch;
4765 
4766   if (img->desc.image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
4767     height = arraySize;
4768   if (img->desc.image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY)
4769     depth = arraySize;
4770 
4771   // Ensure region is within image bounds
4772   if (origin[0] + region[0] > width)
4773   {
4774     SetErrorInfo(command_queue->context, CL_INVALID_VALUE,
4775                  "origin[0] + region[0] > width ("
4776                    << origin[0] << " + " << region[0] << " > " << width
4777                    << " )");
4778   }
4779   if (origin[1] + region[1] > height)
4780   {
4781     SetErrorInfo(command_queue->context, CL_INVALID_VALUE,
4782                  "origin[1] + region[1] > height ("
4783                    << origin[1] << " + " << region[1] << " > " << height
4784                    << " )");
4785   }
4786   if (origin[2] + region[2] > depth)
4787   {
4788     SetErrorInfo(command_queue->context, CL_INVALID_VALUE,
4789                  "origin[2] + region[2] > depth ("
4790                    << origin[2] << " + " << region[2] << " > " << depth
4791                    << " )");
4792   }
4793 
4794   // Compute byte offset and size
4795   size_t offset =
4796     origin[0] * pixelSize + origin[1] * row_pitch + origin[2] * slice_pitch;
4797   size_t size = region[0] * pixelSize + (region[1] - 1) * row_pitch +
4798                 (region[2] - 1) * slice_pitch;
4799 
4800   // Map image
4801   void* ptr = image->context->context->getGlobalMemory()->mapBuffer(
4802     image->address, offset, size);
4803   if (ptr == NULL)
4804   {
4805     SetError(command_queue->context, CL_INVALID_VALUE);
4806     return NULL;
4807   }
4808 
4809   *image_row_pitch = row_pitch;
4810   if (image_slice_pitch)
4811   {
4812     *image_slice_pitch = slice_pitch;
4813   }
4814 
4815   // Enqueue command
4816   oclgrind::MapCommand* cmd = new oclgrind::MapCommand();
4817   cmd->address = image->address;
4818   cmd->offset = offset;
4819   cmd->size = size;
4820   cmd->flags = map_flags;
4821   asyncQueueRetain(cmd, image);
4822   asyncEnqueue(command_queue, CL_COMMAND_MAP_IMAGE, cmd,
4823                num_events_in_wait_list, event_wait_list, event);
4824 
4825   SetError(command_queue->context, CL_SUCCESS);
4826   if (blocking_map)
4827   {
4828     SetError(command_queue->context, clFinish(command_queue));
4829   }
4830 
4831   return ptr;
4832 }
4833 
clEnqueueUnmapMemObject(cl_command_queue command_queue,cl_mem memobj,void * mapped_ptr,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)4834 CL_API_ENTRY cl_int CL_API_CALL clEnqueueUnmapMemObject(
4835   cl_command_queue command_queue, cl_mem memobj, void* mapped_ptr,
4836   cl_uint num_events_in_wait_list, const cl_event* event_wait_list,
4837   cl_event* event) CL_API_SUFFIX__VERSION_1_0
4838 {
4839   REGISTER_API;
4840 
4841   // Check parameters
4842   if (!command_queue)
4843   {
4844     ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue);
4845   }
4846   if (!memobj)
4847   {
4848     ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, memobj);
4849   }
4850   if (!mapped_ptr)
4851   {
4852     ReturnErrorArg(command_queue->context, CL_INVALID_VALUE, mapped_ptr);
4853   }
4854 
4855   // Enqueue command
4856   oclgrind::UnmapCommand* cmd = new oclgrind::UnmapCommand();
4857   cmd->address = memobj->address;
4858   cmd->ptr = mapped_ptr;
4859   asyncQueueRetain(cmd, memobj);
4860   asyncEnqueue(command_queue, CL_COMMAND_UNMAP_MEM_OBJECT, cmd,
4861                num_events_in_wait_list, event_wait_list, event);
4862 
4863   return CL_SUCCESS;
4864 }
4865 
clEnqueueMigrateMemObjects(cl_command_queue command_queue,cl_uint num_mem_objects,const cl_mem * mem_objects,cl_mem_migration_flags flags,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)4866 CL_API_ENTRY cl_int CL_API_CALL clEnqueueMigrateMemObjects(
4867   cl_command_queue command_queue, cl_uint num_mem_objects,
4868   const cl_mem* mem_objects, cl_mem_migration_flags flags,
4869   cl_uint num_events_in_wait_list, const cl_event* event_wait_list,
4870   cl_event* event) CL_API_SUFFIX__VERSION_1_2
4871 {
4872   REGISTER_API;
4873 
4874   // Check parameters
4875   if (!command_queue)
4876   {
4877     ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue);
4878   }
4879 
4880   // Enqueue command
4881   oclgrind::Command* cmd = new oclgrind::Command();
4882   asyncEnqueue(command_queue, CL_COMMAND_MIGRATE_MEM_OBJECTS, cmd,
4883                num_events_in_wait_list, event_wait_list, event);
4884 
4885   return CL_SUCCESS;
4886 }
4887 
clEnqueueNDRangeKernel(cl_command_queue command_queue,cl_kernel kernel,cl_uint work_dim,const size_t * global_work_offset,const size_t * global_work_size,const size_t * local_work_size,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)4888 CL_API_ENTRY cl_int CL_API_CALL clEnqueueNDRangeKernel(
4889   cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim,
4890   const size_t* global_work_offset, const size_t* global_work_size,
4891   const size_t* local_work_size, cl_uint num_events_in_wait_list,
4892   const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_0
4893 {
4894   REGISTER_API;
4895 
4896   // Check parameters
4897   if (!command_queue)
4898   {
4899     ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue);
4900   }
4901   if (work_dim < 1 || work_dim > 3)
4902   {
4903     ReturnErrorInfo(
4904       command_queue->context, CL_INVALID_WORK_DIMENSION,
4905       "Kernels must be 1, 2 or 3 dimensional (work_dim = " << work_dim << ")");
4906   }
4907   if (!global_work_size)
4908   {
4909     ReturnErrorInfo(command_queue->context, CL_INVALID_GLOBAL_WORK_SIZE,
4910                     "global_work_size cannot be NULL");
4911   }
4912 
4913   // Check global and local sizes are valid
4914   size_t reqdWorkGroupSize[3];
4915   size_t totalWGSize = 1;
4916   kernel->kernel->getRequiredWorkGroupSize(reqdWorkGroupSize);
4917   for (unsigned i = 0; i < work_dim; i++)
4918   {
4919     if (kernel->kernel->requiresUniformWorkGroups() && local_work_size &&
4920         global_work_size[i] % local_work_size[i])
4921     {
4922       ReturnErrorInfo(command_queue->context, CL_INVALID_WORK_GROUP_SIZE,
4923                       "local_work_size[" << i << "]=" << local_work_size[i]
4924                                          << " does not divide global_work_size["
4925                                          << i << "]=" << global_work_size[i]);
4926     }
4927     if (local_work_size)
4928     {
4929       if (local_work_size[i] > m_device->maxWGSize)
4930       {
4931         ReturnErrorInfo(command_queue->context, CL_INVALID_WORK_ITEM_SIZE,
4932                         "local_work_size[" << i << "]=" << local_work_size[i]
4933                                            << " exceeds device maximum of "
4934                                            << m_device->maxWGSize);
4935       }
4936       totalWGSize *= local_work_size[i];
4937     }
4938     if (local_work_size && reqdWorkGroupSize[i] &&
4939         local_work_size[i] != reqdWorkGroupSize[i])
4940     {
4941       ReturnErrorInfo(command_queue->context, CL_INVALID_WORK_GROUP_SIZE,
4942                       "local_work_size["
4943                         << i << "]=" << local_work_size[i]
4944                         << " does not match reqd_work_group_size[" << i
4945                         << "]=" << reqdWorkGroupSize[i])
4946     }
4947   }
4948   if (totalWGSize > m_device->maxWGSize)
4949   {
4950     ReturnErrorInfo(command_queue->context, CL_INVALID_WORK_GROUP_SIZE,
4951                     "total work-group size (" << totalWGSize
4952                                               << ")"
4953                                                  " exceeds device maximum of "
4954                                               << m_device->maxWGSize);
4955   }
4956 
4957   // Ensure all arguments have been set
4958   if (!kernel->kernel->allArgumentsSet())
4959   {
4960     ReturnErrorInfo(command_queue->context, CL_INVALID_KERNEL_ARGS,
4961                     "Not all kernel arguments set");
4962   }
4963 
4964   // Check that local memory requirement is within device maximum
4965   size_t totalLocal = kernel->kernel->getLocalMemorySize();
4966   if (totalLocal > m_device->localMemSize)
4967   {
4968     ReturnErrorInfo(command_queue->context, CL_OUT_OF_RESOURCES,
4969                     "total local memory size (" << totalLocal
4970                                                 << ")"
4971                                                    " exceeds device maximum of "
4972                                                 << m_device->localMemSize);
4973   }
4974 
4975   // Check that constant memory requirement is within device maximum
4976   size_t totalConstant = 0;
4977   std::map<cl_uint, cl_mem>::iterator arg;
4978   for (arg = kernel->memArgs.begin(); arg != kernel->memArgs.end(); arg++)
4979   {
4980     if (kernel->kernel->getArgumentAddressQualifier(arg->first) ==
4981         CL_KERNEL_ARG_ADDRESS_CONSTANT)
4982       totalConstant += arg->second->size;
4983   }
4984   if (totalConstant > m_device->constantMemSize)
4985   {
4986     ReturnErrorInfo(command_queue->context, CL_OUT_OF_RESOURCES,
4987                     "total constant memory size ("
4988                       << totalConstant
4989                       << ")"
4990                          " exceeds device maximum of "
4991                       << m_device->constantMemSize);
4992   }
4993 
4994   // Set-up offsets and sizes
4995   oclgrind::KernelCommand* cmd = new oclgrind::KernelCommand();
4996   cmd->kernel = new oclgrind::Kernel(*kernel->kernel);
4997   cmd->work_dim = work_dim;
4998   cmd->globalSize = oclgrind::Size3(1, 1, 1);
4999   cmd->globalOffset = oclgrind::Size3(0, 0, 0);
5000   cmd->localSize = oclgrind::Size3(1, 1, 1);
5001   memcpy(&cmd->globalSize, global_work_size, work_dim * sizeof(size_t));
5002   if (global_work_offset)
5003   {
5004     memcpy(&cmd->globalOffset, global_work_offset, work_dim * sizeof(size_t));
5005   }
5006   if (local_work_size)
5007   {
5008     memcpy(&cmd->localSize, local_work_size, work_dim * sizeof(size_t));
5009   }
5010 
5011   // Enqueue command
5012   asyncQueueRetain(cmd, kernel);
5013   asyncEnqueue(command_queue, CL_COMMAND_NDRANGE_KERNEL, cmd,
5014                num_events_in_wait_list, event_wait_list, event);
5015 
5016   return CL_SUCCESS;
5017 }
5018 
5019 CL_API_ENTRY cl_int CL_API_CALL
clEnqueueTask(cl_command_queue command_queue,cl_kernel kernel,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)5020 clEnqueueTask(cl_command_queue command_queue, cl_kernel kernel,
5021               cl_uint num_events_in_wait_list, const cl_event* event_wait_list,
5022               cl_event* event) CL_API_SUFFIX__VERSION_1_0
5023 {
5024   REGISTER_API;
5025 
5026   size_t work = 1;
5027   return clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &work, &work,
5028                                 num_events_in_wait_list, event_wait_list,
5029                                 event);
5030 }
5031 
clEnqueueNativeKernel(cl_command_queue command_queue,void (CL_CALLBACK * user_func)(void *),void * args,size_t cb_args,cl_uint num_mem_objects,const cl_mem * mem_list,const void ** args_mem_loc,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)5032 CL_API_ENTRY cl_int CL_API_CALL clEnqueueNativeKernel(
5033   cl_command_queue command_queue, void(CL_CALLBACK* user_func)(void*),
5034   void* args, size_t cb_args, cl_uint num_mem_objects, const cl_mem* mem_list,
5035   const void** args_mem_loc, cl_uint num_events_in_wait_list,
5036   const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_0
5037 {
5038   REGISTER_API;
5039 
5040   // Check parameters
5041   if (!command_queue)
5042   {
5043     ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue);
5044   }
5045   if (!user_func)
5046   {
5047     ReturnErrorArg(command_queue->context, CL_INVALID_VALUE, user_func);
5048   }
5049   if (!args && (cb_args > 0 || num_mem_objects > 0))
5050   {
5051     ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE,
5052                     "args is NULL but cb_args|num_mem_objects >0");
5053   }
5054   if (args && cb_args == 0)
5055   {
5056     ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE,
5057                     "args is non-NULL but cb_args is 0");
5058   }
5059   if (num_mem_objects > 0 && (!mem_list || !args_mem_loc))
5060   {
5061     ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE,
5062                     "num_mem_objects >0 but mem_list|args_mem_loc is NULL");
5063   }
5064   if (num_mem_objects == 0 && (mem_list || args_mem_loc))
5065   {
5066     ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE,
5067                     "num_mem_objects is 0 but mem_list|args_mem_loc not NULL");
5068   }
5069 
5070   // Replace mem objects with real pointers
5071   oclgrind::Memory* memory = command_queue->context->context->getGlobalMemory();
5072   for (unsigned i = 0; i < num_mem_objects; i++)
5073   {
5074     if (!mem_list[i])
5075     {
5076       ReturnErrorInfo(command_queue->context, CL_INVALID_MEM_OBJECT,
5077                       "Memory object " << i << " is NULL");
5078     }
5079 
5080     void* addr = memory->getPointer(mem_list[i]->address);
5081     if (addr == NULL)
5082     {
5083       ReturnErrorInfo(command_queue->context, CL_INVALID_MEM_OBJECT,
5084                       "Memory object " << i << " not valid");
5085     }
5086     memcpy((void*)args_mem_loc[i], &addr, sizeof(void*));
5087   }
5088 
5089   // Create command
5090   oclgrind::NativeKernelCommand* cmd =
5091     new oclgrind::NativeKernelCommand(user_func, args, cb_args);
5092 
5093   // Retain memory objects
5094   for (unsigned i = 0; i < num_mem_objects; i++)
5095   {
5096     asyncQueueRetain(cmd, mem_list[i]);
5097   }
5098 
5099   // Enqueue commands
5100   asyncEnqueue(command_queue, CL_COMMAND_NATIVE_KERNEL, cmd,
5101                num_events_in_wait_list, event_wait_list, event);
5102 
5103   return CL_SUCCESS;
5104 }
5105 
clGetExtensionFunctionAddressForPlatform(cl_platform_id platform,const char * func_name)5106 CL_API_ENTRY void* CL_API_CALL clGetExtensionFunctionAddressForPlatform(
5107   cl_platform_id platform, const char* func_name) CL_API_SUFFIX__VERSION_1_2
5108 {
5109   REGISTER_API;
5110 
5111   return NULL;
5112 }
5113 
clEnqueueMarkerWithWaitList(cl_command_queue command_queue,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)5114 CL_API_ENTRY cl_int CL_API_CALL clEnqueueMarkerWithWaitList(
5115   cl_command_queue command_queue, cl_uint num_events_in_wait_list,
5116   const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_2
5117 {
5118   REGISTER_API;
5119 
5120   // Check parameters
5121   if (!command_queue)
5122   {
5123     ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue);
5124   }
5125 
5126   // Enqueue command
5127   oclgrind::Command* cmd = new oclgrind::Command();
5128   asyncEnqueue(command_queue, CL_COMMAND_MARKER, cmd, num_events_in_wait_list,
5129                event_wait_list, event);
5130 
5131   return CL_SUCCESS;
5132 }
5133 
clEnqueueBarrierWithWaitList(cl_command_queue command_queue,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)5134 CL_API_ENTRY cl_int CL_API_CALL clEnqueueBarrierWithWaitList(
5135   cl_command_queue command_queue, cl_uint num_events_in_wait_list,
5136   const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_2
5137 {
5138   REGISTER_API;
5139 
5140   // Check parameters
5141   if (!command_queue)
5142   {
5143     ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue);
5144   }
5145 
5146   // Enqueue command
5147   oclgrind::Command* cmd = new oclgrind::Command();
5148   asyncEnqueue(command_queue, CL_COMMAND_BARRIER, cmd, num_events_in_wait_list,
5149                event_wait_list, event);
5150 
5151   return CL_SUCCESS;
5152 }
5153 
clSetPrintfCallback(cl_context context,void (CL_CALLBACK * pfn_notify)(cl_context,cl_uint,char *,void *),void * user_data)5154 CL_API_ENTRY cl_int CL_API_CALL clSetPrintfCallback(
5155   cl_context context,
5156   void(CL_CALLBACK* pfn_notify)(cl_context, cl_uint, char*, void*),
5157   void* user_data) CL_API_SUFFIX__VERSION_1_2
5158 {
5159   REGISTER_API;
5160 
5161   ReturnError(NULL, CL_INVALID_OPERATION);
5162 }
5163 
clEnqueueMarker(cl_command_queue command_queue,cl_event * event)5164 CL_API_ENTRY cl_int CL_API_CALL clEnqueueMarker(
5165   cl_command_queue command_queue, cl_event* event) CL_API_SUFFIX__VERSION_1_0
5166 {
5167   REGISTER_API;
5168 
5169   return clEnqueueMarkerWithWaitList(command_queue, 0, NULL, event);
5170 }
5171 
5172 CL_API_ENTRY cl_int CL_API_CALL
clEnqueueWaitForEvents(cl_command_queue command_queue,cl_uint num_events,const cl_event * event_list)5173 clEnqueueWaitForEvents(cl_command_queue command_queue, cl_uint num_events,
5174                        const cl_event* event_list) CL_API_SUFFIX__VERSION_1_0
5175 {
5176   REGISTER_API;
5177 
5178   if (!command_queue)
5179   {
5180     ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue);
5181   }
5182 
5183   // Enqueue command
5184   oclgrind::Command* cmd = new oclgrind::Command();
5185   asyncEnqueue(command_queue, CL_COMMAND_BARRIER, cmd, num_events, event_list,
5186                NULL);
5187 
5188   return CL_SUCCESS;
5189 }
5190 
clEnqueueBarrier(cl_command_queue command_queue)5191 CL_API_ENTRY cl_int CL_API_CALL clEnqueueBarrier(cl_command_queue command_queue)
5192   CL_API_SUFFIX__VERSION_1_0
5193 {
5194   REGISTER_API;
5195 
5196   return clEnqueueBarrierWithWaitList(command_queue, 0, NULL, NULL);
5197 }
5198 
clCreateFromGLBuffer(cl_context context,cl_mem_flags flags,cl_GLuint bufret_mem,int * errcode_ret)5199 CL_API_ENTRY cl_mem CL_API_CALL clCreateFromGLBuffer(
5200   cl_context context, cl_mem_flags flags, cl_GLuint bufret_mem,
5201   int* errcode_ret) CL_API_SUFFIX__VERSION_1_0
5202 {
5203   REGISTER_API;
5204 
5205   SetErrorInfo(NULL, CL_INVALID_CONTEXT, "CL/GL interop not implemented");
5206   return NULL;
5207 }
5208 
clCreateFromGLTexture(cl_context context,cl_mem_flags flags,cl_GLenum target,cl_GLint miplevel,cl_GLuint texture,cl_int * errcode_ret)5209 CL_API_ENTRY cl_mem CL_API_CALL clCreateFromGLTexture(
5210   cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel,
5211   cl_GLuint texture, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2
5212 {
5213   REGISTER_API;
5214 
5215   SetErrorInfo(NULL, CL_INVALID_CONTEXT, "CL/GL interop not implemented");
5216   return NULL;
5217 }
5218 
clCreateFromGLTexture2D(cl_context context,cl_mem_flags flags,cl_GLenum target,cl_GLint miplevel,cl_GLuint texture,cl_int * errcode_ret)5219 CL_API_ENTRY cl_mem CL_API_CALL clCreateFromGLTexture2D(
5220   cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel,
5221   cl_GLuint texture, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0
5222 {
5223   REGISTER_API;
5224 
5225   SetErrorInfo(NULL, CL_INVALID_CONTEXT, "CL/GL interop not implemented");
5226   return NULL;
5227 }
5228 
clCreateFromGLTexture3D(cl_context context,cl_mem_flags flags,cl_GLenum target,cl_GLint miplevel,cl_GLuint texture,cl_int * errcode_ret)5229 CL_API_ENTRY cl_mem CL_API_CALL clCreateFromGLTexture3D(
5230   cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel,
5231   cl_GLuint texture, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0
5232 {
5233   REGISTER_API;
5234 
5235   SetErrorInfo(NULL, CL_INVALID_CONTEXT, "CL/GL interop not implemented");
5236   return NULL;
5237 }
5238 
clCreateFromGLRenderbuffer(cl_context context,cl_mem_flags flags,cl_GLuint renderbuffer,cl_int * errcode_ret)5239 CL_API_ENTRY cl_mem CL_API_CALL clCreateFromGLRenderbuffer(
5240   cl_context context, cl_mem_flags flags, cl_GLuint renderbuffer,
5241   cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0
5242 {
5243   REGISTER_API;
5244 
5245   SetErrorInfo(NULL, CL_INVALID_CONTEXT, "CL/GL interop not implemented");
5246   return NULL;
5247 }
5248 
5249 CL_API_ENTRY cl_int CL_API_CALL
clGetGLObjectInfo(cl_mem memobj,cl_gl_object_type * gl_object_type,cl_GLuint * gl_object_name)5250 clGetGLObjectInfo(cl_mem memobj, cl_gl_object_type* gl_object_type,
5251                   cl_GLuint* gl_object_name) CL_API_SUFFIX__VERSION_1_0
5252 {
5253   REGISTER_API;
5254 
5255   ReturnErrorInfo(NULL, CL_INVALID_MEM_OBJECT, "CL/GL interop not implements");
5256 }
5257 
clGetGLTextureInfo(cl_mem memobj,cl_gl_texture_info param_name,size_t param_value_size,void * param_value,size_t * param_value_size_ret)5258 CL_API_ENTRY cl_int CL_API_CALL clGetGLTextureInfo(
5259   cl_mem memobj, cl_gl_texture_info param_name, size_t param_value_size,
5260   void* param_value, size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
5261 {
5262   REGISTER_API;
5263 
5264   ReturnErrorInfo(NULL, CL_INVALID_MEM_OBJECT, "CL/GL interop not implemented");
5265 }
5266 
clEnqueueAcquireGLObjects(cl_command_queue command_queue,cl_uint num_objects,const cl_mem * mem_objects,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)5267 CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireGLObjects(
5268   cl_command_queue command_queue, cl_uint num_objects,
5269   const cl_mem* mem_objects, cl_uint num_events_in_wait_list,
5270   const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_0
5271 {
5272   REGISTER_API;
5273 
5274   ReturnErrorInfo(NULL, CL_INVALID_CONTEXT, "CL/GL interop not implemented");
5275 }
5276 
clEnqueueReleaseGLObjects(cl_command_queue command_queue,cl_uint num_objects,const cl_mem * mem_objects,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)5277 CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseGLObjects(
5278   cl_command_queue command_queue, cl_uint num_objects,
5279   const cl_mem* mem_objects, cl_uint num_events_in_wait_list,
5280   const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_0
5281 {
5282   REGISTER_API;
5283 
5284   ReturnErrorInfo(NULL, CL_INVALID_CONTEXT, "CL/GL interop not implemented");
5285 }
5286 
clGetGLContextInfoKHR(const cl_context_properties * properties,cl_gl_context_info param_name,size_t param_value_size,void * param_value,size_t * param_value_size_ret)5287 CL_API_ENTRY cl_int CL_API_CALL clGetGLContextInfoKHR(
5288   const cl_context_properties* properties, cl_gl_context_info param_name,
5289   size_t param_value_size, void* param_value,
5290   size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
5291 {
5292   REGISTER_API;
5293 
5294   ReturnErrorInfo(NULL, CL_INVALID_OPERATION, "CL/GL interop not implemented");
5295 }
5296 
5297 CL_API_ENTRY cl_event CL_API_CALL
clCreateEventFromGLsyncKHR(cl_context context,cl_GLsync cl_GLsync,cl_int * errcode_ret)5298 clCreateEventFromGLsyncKHR(cl_context context, cl_GLsync cl_GLsync,
5299                            cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_1
5300 {
5301   REGISTER_API;
5302 
5303   SetErrorInfo(NULL, CL_INVALID_CONTEXT, "CL/GL interop not implemented");
5304   return NULL;
5305 }
5306 
5307 #if defined(_WIN32) && !defined(__MINGW32__) // DX extension functions
5308 
clGetDeviceIDsFromD3D10KHR(cl_platform_id platform,cl_d3d10_device_source_khr d3d_device_source,void * d3d_object,cl_d3d10_device_set_khr d3d_device_set,cl_uint num_entries,cl_device_id * devices,cl_uint * num_devices)5309 CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromD3D10KHR(
5310   cl_platform_id platform, cl_d3d10_device_source_khr d3d_device_source,
5311   void* d3d_object, cl_d3d10_device_set_khr d3d_device_set, cl_uint num_entries,
5312   cl_device_id* devices, cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_0
5313 {
5314   REGISTER_API;
5315 
5316   ReturnErrorInfo(NULL, CL_INVALID_OPERATION, "CL/DX interop not implemented");
5317 }
5318 
clCreateFromD3D10BufferKHR(cl_context context,cl_mem_flags flags,ID3D10Buffer * resource,cl_int * errcode_ret)5319 CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D10BufferKHR(
5320   cl_context context, cl_mem_flags flags, ID3D10Buffer* resource,
5321   cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0
5322 {
5323   REGISTER_API;
5324 
5325   SetErrorInfo(NULL, CL_INVALID_CONTEXT, "CL/DX interop not implemented");
5326   return NULL;
5327 }
5328 
clCreateFromD3D10Texture2DKHR(cl_context context,cl_mem_flags flags,ID3D10Texture2D * resource,UINT subresource,cl_int * errcode_ret)5329 CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D10Texture2DKHR(
5330   cl_context context, cl_mem_flags flags, ID3D10Texture2D* resource,
5331   UINT subresource, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0
5332 {
5333   REGISTER_API;
5334 
5335   SetErrorInfo(NULL, CL_INVALID_OPERATION, "CL/DX interop not implemented");
5336   return NULL;
5337 }
5338 
clCreateFromD3D10Texture3DKHR(cl_context context,cl_mem_flags flags,ID3D10Texture3D * resource,UINT subresource,cl_int * errcode_ret)5339 CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D10Texture3DKHR(
5340   cl_context context, cl_mem_flags flags, ID3D10Texture3D* resource,
5341   UINT subresource, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0
5342 {
5343   REGISTER_API;
5344 
5345   SetErrorInfo(NULL, CL_INVALID_OPERATION, "CL/DX interop not implemented");
5346   return NULL;
5347 }
5348 
clEnqueueAcquireD3D10ObjectsKHR(cl_command_queue command_queue,cl_uint num_objects,const cl_mem * mem_objects,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)5349 CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireD3D10ObjectsKHR(
5350   cl_command_queue command_queue, cl_uint num_objects,
5351   const cl_mem* mem_objects, cl_uint num_events_in_wait_list,
5352   const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_0
5353 {
5354   REGISTER_API;
5355 
5356   ReturnErrorInfo(NULL, CL_INVALID_OPERATION, "CL/DX interop not implemented");
5357 }
5358 
clEnqueueReleaseD3D10ObjectsKHR(cl_command_queue command_queue,cl_uint num_objects,const cl_mem * mem_objects,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)5359 CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseD3D10ObjectsKHR(
5360   cl_command_queue command_queue, cl_uint num_objects,
5361   const cl_mem* mem_objects, cl_uint num_events_in_wait_list,
5362   const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_0
5363 {
5364   REGISTER_API;
5365 
5366   ReturnErrorInfo(NULL, CL_INVALID_OPERATION, "CL/DX interop not implemented");
5367 }
5368 
clGetDeviceIDsFromD3D11KHR(cl_platform_id platform,cl_d3d11_device_source_khr d3d_device_source,void * d3d_object,cl_d3d11_device_set_khr d3d_device_set,cl_uint num_entries,cl_device_id * devices,cl_uint * num_devices)5369 CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromD3D11KHR(
5370   cl_platform_id platform, cl_d3d11_device_source_khr d3d_device_source,
5371   void* d3d_object, cl_d3d11_device_set_khr d3d_device_set, cl_uint num_entries,
5372   cl_device_id* devices, cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_0
5373 {
5374   REGISTER_API;
5375 
5376   ReturnErrorInfo(NULL, CL_INVALID_OPERATION, "CL/DX interop not implemented");
5377 }
5378 
clCreateFromD3D11BufferKHR(cl_context context,cl_mem_flags flags,ID3D11Buffer * resource,cl_int * errcode_ret)5379 CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D11BufferKHR(
5380   cl_context context, cl_mem_flags flags, ID3D11Buffer* resource,
5381   cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0
5382 {
5383   REGISTER_API;
5384 
5385   SetErrorInfo(NULL, CL_INVALID_CONTEXT, "CL/DX interop not implemented");
5386   return NULL;
5387 }
5388 
clCreateFromD3D11Texture2DKHR(cl_context context,cl_mem_flags flags,ID3D11Texture2D * resource,UINT subresource,cl_int * errcode_ret)5389 CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D11Texture2DKHR(
5390   cl_context context, cl_mem_flags flags, ID3D11Texture2D* resource,
5391   UINT subresource, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0
5392 {
5393   REGISTER_API;
5394 
5395   SetErrorInfo(NULL, CL_INVALID_OPERATION, "CL/DX interop not implemented");
5396   return NULL;
5397 }
5398 
clCreateFromD3D11Texture3DKHR(cl_context context,cl_mem_flags flags,ID3D11Texture3D * resource,UINT subresource,cl_int * errcode_ret)5399 CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D11Texture3DKHR(
5400   cl_context context, cl_mem_flags flags, ID3D11Texture3D* resource,
5401   UINT subresource, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0
5402 {
5403   REGISTER_API;
5404 
5405   SetErrorInfo(NULL, CL_INVALID_OPERATION, "CL/DX interop not implemented");
5406   return NULL;
5407 }
5408 
clEnqueueAcquireD3D11ObjectsKHR(cl_command_queue command_queue,cl_uint num_objects,const cl_mem * mem_objects,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)5409 CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireD3D11ObjectsKHR(
5410   cl_command_queue command_queue, cl_uint num_objects,
5411   const cl_mem* mem_objects, cl_uint num_events_in_wait_list,
5412   const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_0
5413 {
5414   REGISTER_API;
5415 
5416   ReturnErrorInfo(NULL, CL_INVALID_OPERATION, "CL/DX interop not implemented");
5417 }
5418 
clEnqueueReleaseD3D11ObjectsKHR(cl_command_queue command_queue,cl_uint num_objects,const cl_mem * mem_objects,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)5419 CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseD3D11ObjectsKHR(
5420   cl_command_queue command_queue, cl_uint num_objects,
5421   const cl_mem* mem_objects, cl_uint num_events_in_wait_list,
5422   const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_0
5423 {
5424   REGISTER_API;
5425 
5426   ReturnErrorInfo(NULL, CL_INVALID_OPERATION, "CL/DX interop not implemented");
5427 }
5428 
clGetDeviceIDsFromDX9MediaAdapterKHR(cl_platform_id platform,cl_uint num_media_adapters,cl_dx9_media_adapter_type_khr * media_adapter_type,void * media_adapters,cl_dx9_media_adapter_set_khr media_adapter_set,cl_uint num_entries,cl_device_id * devices,cl_uint * num_devices)5429 CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromDX9MediaAdapterKHR(
5430   cl_platform_id platform, cl_uint num_media_adapters,
5431   cl_dx9_media_adapter_type_khr* media_adapter_type, void* media_adapters,
5432   cl_dx9_media_adapter_set_khr media_adapter_set, cl_uint num_entries,
5433   cl_device_id* devices, cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_2
5434 {
5435   REGISTER_API;
5436 
5437   ReturnErrorInfo(NULL, CL_INVALID_OPERATION, "CL/DX interop not implemented");
5438 }
5439 
clCreateFromDX9MediaSurfaceKHR(cl_context context,cl_mem_flags flags,cl_dx9_media_adapter_type_khr adapter_type,void * surface_info,cl_uint plane,cl_int * errcode_ret)5440 CL_API_ENTRY cl_mem CL_API_CALL clCreateFromDX9MediaSurfaceKHR(
5441   cl_context context, cl_mem_flags flags,
5442   cl_dx9_media_adapter_type_khr adapter_type, void* surface_info, cl_uint plane,
5443   cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2
5444 {
5445   REGISTER_API;
5446 
5447   SetErrorInfo(NULL, CL_INVALID_CONTEXT, "CL/DX interop not implemented");
5448   return NULL;
5449 }
5450 
clEnqueueAcquireDX9MediaSurfacesKHR(cl_command_queue command_queue,cl_uint num_objects,const cl_mem * mem_objects,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)5451 CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireDX9MediaSurfacesKHR(
5452   cl_command_queue command_queue, cl_uint num_objects,
5453   const cl_mem* mem_objects, cl_uint num_events_in_wait_list,
5454   const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_2
5455 {
5456   REGISTER_API;
5457 
5458   ReturnErrorInfo(NULL, CL_INVALID_OPERATION, "CL/DX interop not implemented");
5459 }
5460 
clEnqueueReleaseDX9MediaSurfacesKHR(cl_command_queue command_queue,cl_uint num_objects,const cl_mem * mem_objects,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)5461 CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseDX9MediaSurfacesKHR(
5462   cl_command_queue command_queue, cl_uint num_objects,
5463   const cl_mem* mem_objects, cl_uint num_events_in_wait_list,
5464   const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_2
5465 {
5466   REGISTER_API;
5467 
5468   ReturnErrorInfo(NULL, CL_INVALID_OPERATION, "CL/DX interop not implemented");
5469 }
5470 
5471 #endif // DX extension functions
5472 
5473 /////////////////////
5474 // OpenCL 2.0 APIs //
5475 /////////////////////
5476 
clCreateCommandQueueWithProperties(cl_context context,cl_device_id device,const cl_queue_properties * properties,cl_int * errcode_ret)5477 CL_API_ENTRY cl_command_queue CL_API_CALL clCreateCommandQueueWithProperties(
5478   cl_context context, cl_device_id device,
5479   const cl_queue_properties* properties,
5480   cl_int* errcode_ret) CL_API_SUFFIX__VERSION_2_0
5481 {
5482   REGISTER_API;
5483 
5484   // Check parameters
5485   if (!context)
5486   {
5487     SetErrorArg(NULL, CL_INVALID_CONTEXT, context);
5488     return NULL;
5489   }
5490   if (device != m_device)
5491   {
5492     SetErrorArg(context, CL_INVALID_DEVICE, device);
5493     return NULL;
5494   }
5495 
5496   // Parse properties
5497   cl_command_queue_properties props = 0;
5498   bool out_of_order = false;
5499   unsigned i = 0;
5500   while (properties && properties[i])
5501   {
5502     switch (properties[i++])
5503     {
5504     case CL_QUEUE_PROPERTIES:
5505       if (properties[i] & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)
5506       {
5507         out_of_order = true;
5508       }
5509       if (properties[i] & (CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT))
5510       {
5511         SetErrorInfo(context, CL_INVALID_QUEUE_PROPERTIES,
5512                      "On device queues not implemented");
5513         return NULL;
5514       }
5515       props = properties[i];
5516       break;
5517     case CL_QUEUE_SIZE:
5518       SetErrorInfo(context, CL_INVALID_VALUE, "CL_QUEUE_SIZE not implemented");
5519       return NULL;
5520     default:
5521       SetErrorInfo(context, CL_INVALID_VALUE, properties);
5522       return NULL;
5523     }
5524     i++;
5525   }
5526   unsigned numProperties = i + 1;
5527 
5528   // Create command-queue object
5529   cl_command_queue queue;
5530   queue = new _cl_command_queue;
5531   queue->queue = new oclgrind::Queue(context->context, out_of_order);
5532   queue->dispatch = m_dispatchTable;
5533   queue->properties = props;
5534   queue->context = context;
5535   queue->refCount = 1;
5536   if (properties)
5537   {
5538     queue->properties_array.assign(properties, properties + numProperties);
5539   }
5540 
5541   clRetainContext(context);
5542 
5543   SetError(context, CL_SUCCESS);
5544   return queue;
5545 }
5546 
5547 CL_API_ENTRY cl_mem CL_API_CALL
clCreatePipe(cl_context context,cl_mem_flags flags,cl_uint pipe_packet_size,cl_uint pipe_max_packets,const cl_pipe_properties * properties,cl_int * errcode_ret)5548 clCreatePipe(cl_context context, cl_mem_flags flags, cl_uint pipe_packet_size,
5549              cl_uint pipe_max_packets, const cl_pipe_properties* properties,
5550              cl_int* errcode_ret) CL_API_SUFFIX__VERSION_2_0
5551 {
5552   REGISTER_API;
5553 
5554   SetErrorInfo(context, CL_INVALID_OPERATION, "Unimplemented OpenCL 2.0 API");
5555   return NULL;
5556 }
5557 
clGetPipeInfo(cl_mem pipe,cl_pipe_info param_name,size_t param_value_size,void * param_value,size_t * param_value_size_ret)5558 CL_API_ENTRY cl_int CL_API_CALL clGetPipeInfo(
5559   cl_mem pipe, cl_pipe_info param_name, size_t param_value_size,
5560   void* param_value, size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_2_0
5561 {
5562   REGISTER_API;
5563 
5564   ReturnErrorInfo(NULL, CL_INVALID_MEM_OBJECT, "Pipes are not supported");
5565 }
5566 
5567 CL_API_ENTRY void* CL_API_CALL
clSVMAlloc(cl_context context,cl_svm_mem_flags flags,size_t size,cl_uint alignment)5568 clSVMAlloc(cl_context context, cl_svm_mem_flags flags, size_t size,
5569            cl_uint alignment) CL_API_SUFFIX__VERSION_2_0
5570 {
5571   REGISTER_API;
5572 
5573   notifyAPIError(context, CL_INVALID_OPERATION, __func__,
5574                  "Unimplemented OpenCL 2.0 API");
5575   return NULL;
5576 }
5577 
clSVMFree(cl_context context,void * svm_pointer)5578 CL_API_ENTRY void CL_API_CALL clSVMFree(cl_context context, void* svm_pointer)
5579   CL_API_SUFFIX__VERSION_2_0
5580 {
5581   REGISTER_API;
5582 
5583   notifyAPIError(context, CL_INVALID_OPERATION, __func__,
5584                  "Unimplemented OpenCL 2.0 API");
5585 }
5586 
clEnqueueSVMFree(cl_command_queue command_queue,cl_uint num_svm_pointers,void * svm_pointers[],void (CL_CALLBACK * pfn_free_func)(cl_command_queue queue,cl_uint num_svm_pointers,void * svm_pointers[],void * user_data),void * user_data,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)5587 CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMFree(
5588   cl_command_queue command_queue, cl_uint num_svm_pointers,
5589   void* svm_pointers[],
5590   void(CL_CALLBACK* pfn_free_func)(cl_command_queue queue,
5591                                    cl_uint num_svm_pointers,
5592                                    void* svm_pointers[], void* user_data),
5593   void* user_data, cl_uint num_events_in_wait_list,
5594   const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_2_0
5595 {
5596   REGISTER_API;
5597 
5598   ReturnErrorInfo(command_queue->context, CL_INVALID_OPERATION,
5599                   "Unimplemented OpenCL 2.0 API");
5600 }
5601 
clEnqueueSVMMemcpy(cl_command_queue command_queue,cl_bool blocking_copy,void * dst_ptr,const void * src_ptr,size_t size,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)5602 CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMemcpy(
5603   cl_command_queue command_queue, cl_bool blocking_copy, void* dst_ptr,
5604   const void* src_ptr, size_t size, cl_uint num_events_in_wait_list,
5605   const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_2_0
5606 {
5607   REGISTER_API;
5608 
5609   ReturnErrorInfo(command_queue->context, CL_INVALID_OPERATION,
5610                   "Unimplemented OpenCL 2.0 API");
5611 }
5612 
clEnqueueSVMMemFill(cl_command_queue command_queue,void * svm_ptr,const void * pattern,size_t pattern_size,size_t size,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)5613 CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMemFill(
5614   cl_command_queue command_queue, void* svm_ptr, const void* pattern,
5615   size_t pattern_size, size_t size, cl_uint num_events_in_wait_list,
5616   const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_2_0
5617 {
5618   REGISTER_API;
5619 
5620   ReturnErrorInfo(command_queue->context, CL_INVALID_OPERATION,
5621                   "Unimplemented OpenCL 2.0 API");
5622 }
5623 
clEnqueueSVMMap(cl_command_queue command_queue,cl_bool blocking_map,cl_map_flags flags,void * svm_ptr,size_t size,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)5624 CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMap(
5625   cl_command_queue command_queue, cl_bool blocking_map, cl_map_flags flags,
5626   void* svm_ptr, size_t size, cl_uint num_events_in_wait_list,
5627   const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_2_0
5628 {
5629   REGISTER_API;
5630 
5631   ReturnErrorInfo(command_queue->context, CL_INVALID_OPERATION,
5632                   "Unimplemented OpenCL 2.0 API");
5633 }
5634 
clEnqueueSVMUnmap(cl_command_queue command_queue,void * svm_ptr,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)5635 CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMUnmap(
5636   cl_command_queue command_queue, void* svm_ptr,
5637   cl_uint num_events_in_wait_list, const cl_event* event_wait_list,
5638   cl_event* event) CL_API_SUFFIX__VERSION_2_0
5639 {
5640   REGISTER_API;
5641 
5642   ReturnErrorInfo(command_queue->context, CL_INVALID_OPERATION,
5643                   "Unimplemented OpenCL 2.0 API");
5644 }
5645 
clCreateSamplerWithProperties(cl_context context,const cl_sampler_properties * sampler_properties,cl_int * errcode_ret)5646 CL_API_ENTRY cl_sampler CL_API_CALL clCreateSamplerWithProperties(
5647   cl_context context, const cl_sampler_properties* sampler_properties,
5648   cl_int* errcode_ret) CL_API_SUFFIX__VERSION_2_0
5649 {
5650   REGISTER_API;
5651 
5652   // Check parameters
5653   if (!context)
5654   {
5655     SetErrorArg(NULL, CL_INVALID_CONTEXT, context);
5656     return NULL;
5657   }
5658 
5659   cl_bool normalized_coords = CL_TRUE;
5660   cl_addressing_mode addressing_mode = CL_ADDRESS_CLAMP;
5661   cl_filter_mode filter_mode = CL_FILTER_NEAREST;
5662 
5663   // Parse properties
5664   unsigned i = 0;
5665   while (sampler_properties && sampler_properties[i])
5666   {
5667     switch (sampler_properties[i++])
5668     {
5669     case CL_SAMPLER_NORMALIZED_COORDS:
5670       normalized_coords = sampler_properties[i];
5671       break;
5672     case CL_SAMPLER_ADDRESSING_MODE:
5673       addressing_mode = sampler_properties[i];
5674       break;
5675     case CL_SAMPLER_FILTER_MODE:
5676       filter_mode = sampler_properties[i];
5677       break;
5678     default:
5679       SetErrorInfo(context, CL_INVALID_VALUE, sampler_properties);
5680       return NULL;
5681     }
5682     i++;
5683   }
5684   unsigned numProperties = i + 1;
5685 
5686   // Create sampler bitfield
5687   uint32_t bitfield = 0;
5688 
5689   if (normalized_coords)
5690   {
5691     bitfield |= CLK_NORMALIZED_COORDS_TRUE;
5692   }
5693 
5694   switch (addressing_mode)
5695   {
5696   case CL_ADDRESS_NONE:
5697     break;
5698   case CL_ADDRESS_CLAMP_TO_EDGE:
5699     bitfield |= CLK_ADDRESS_CLAMP_TO_EDGE;
5700     break;
5701   case CL_ADDRESS_CLAMP:
5702     bitfield |= CLK_ADDRESS_CLAMP;
5703     break;
5704   case CL_ADDRESS_REPEAT:
5705     bitfield |= CLK_ADDRESS_REPEAT;
5706     break;
5707   case CL_ADDRESS_MIRRORED_REPEAT:
5708     bitfield |= CLK_ADDRESS_MIRRORED_REPEAT;
5709     break;
5710   default:
5711     SetErrorArg(context, CL_INVALID_VALUE, sampler_properties);
5712     return NULL;
5713   }
5714 
5715   switch (filter_mode)
5716   {
5717   case CL_FILTER_NEAREST:
5718     bitfield |= CLK_FILTER_NEAREST;
5719     break;
5720   case CL_FILTER_LINEAR:
5721     bitfield |= CLK_FILTER_LINEAR;
5722     break;
5723   default:
5724     SetErrorArg(context, CL_INVALID_VALUE, sampler_properties);
5725     return NULL;
5726   }
5727 
5728   // Create sampler
5729   cl_sampler sampler = new _cl_sampler;
5730   sampler->dispatch = m_dispatchTable;
5731   sampler->context = context;
5732   sampler->normCoords = normalized_coords;
5733   sampler->addressMode = addressing_mode;
5734   sampler->filterMode = filter_mode;
5735   sampler->sampler = bitfield;
5736   if (sampler_properties)
5737   {
5738     sampler->properties.assign(sampler_properties,
5739                                sampler_properties + numProperties);
5740   }
5741 
5742   SetError(context, CL_SUCCESS);
5743   return sampler;
5744 }
5745 
5746 CL_API_ENTRY cl_int CL_API_CALL
clSetKernelArgSVMPointer(cl_kernel kernel,cl_uint arg_index,const void * arg_value)5747 clSetKernelArgSVMPointer(cl_kernel kernel, cl_uint arg_index,
5748                          const void* arg_value) CL_API_SUFFIX__VERSION_2_0
5749 {
5750   REGISTER_API;
5751 
5752   ReturnErrorInfo(kernel->program->context, CL_INVALID_OPERATION,
5753                   "Unimplemented OpenCL 2.0 API");
5754 }
5755 
clSetKernelExecInfo(cl_kernel kernel,cl_kernel_exec_info param_name,size_t param_value_size,const void * param_value)5756 CL_API_ENTRY cl_int CL_API_CALL clSetKernelExecInfo(
5757   cl_kernel kernel, cl_kernel_exec_info param_name, size_t param_value_size,
5758   const void* param_value) CL_API_SUFFIX__VERSION_2_0
5759 {
5760   REGISTER_API;
5761 
5762   ReturnErrorInfo(kernel->program->context, CL_INVALID_OPERATION,
5763                   "Unimplemented OpenCL 2.0 API");
5764 }
5765 
clCloneKernel(cl_kernel source_kernel,cl_int * errcode_ret)5766 CL_API_ENTRY cl_kernel CL_API_CALL clCloneKernel(
5767   cl_kernel source_kernel, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_2_1
5768 {
5769   REGISTER_API;
5770 
5771   if (!source_kernel)
5772   {
5773     SetErrorArg(nullptr, CL_INVALID_KERNEL, source_kernel);
5774     return nullptr;
5775   }
5776 
5777   // Create kernel object
5778   cl_kernel kernel = new _cl_kernel;
5779   kernel->dispatch = m_dispatchTable;
5780   kernel->kernel = new oclgrind::Kernel(*source_kernel->kernel);
5781   kernel->program = source_kernel->program;
5782   kernel->memArgs = source_kernel->memArgs;
5783   for (auto src_img : source_kernel->imageArgs)
5784   {
5785     oclgrind::Image* image = new oclgrind::Image;
5786     image->address = src_img->address;
5787     image->format = src_img->format;
5788     image->desc = src_img->desc;
5789     kernel->imageArgs.push_back(image);
5790   }
5791   kernel->refCount = 1;
5792 
5793   clRetainProgram(kernel->program);
5794 
5795   SetError(nullptr, CL_SUCCESS);
5796   return kernel;
5797 }
5798 
5799 CL_API_ENTRY cl_program CL_API_CALL
clCreateProgramWithIL(cl_context context,const void * il,size_t length,cl_int * errcode_ret)5800 clCreateProgramWithIL(cl_context context, const void* il, size_t length,
5801                       cl_int* errcode_ret) CL_API_SUFFIX__VERSION_2_1
5802 {
5803   REGISTER_API;
5804 
5805   SetErrorInfo(context, CL_INVALID_OPERATION, "Unimplemented OpenCL 2.1 API");
5806   return nullptr;
5807 }
5808 
clEnqueueSVMMigrateMem(cl_command_queue command_queue,cl_uint num_svm_pointers,const void ** svm_pointers,const size_t * sizes,cl_mem_migration_flags flags,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)5809 CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMigrateMem(
5810   cl_command_queue command_queue, cl_uint num_svm_pointers,
5811   const void** svm_pointers, const size_t* sizes, cl_mem_migration_flags flags,
5812   cl_uint num_events_in_wait_list, const cl_event* event_wait_list,
5813   cl_event* event) CL_API_SUFFIX__VERSION_2_1
5814 {
5815   REGISTER_API;
5816 
5817   ReturnErrorInfo(command_queue->context, CL_INVALID_OPERATION,
5818                   "Unimplemented OpenCL 2.1 API");
5819 }
5820 
5821 CL_API_ENTRY cl_int CL_API_CALL
clGetDeviceAndHostTimer(cl_device_id device,cl_ulong * device_timestamp,cl_ulong * host_timestamp)5822 clGetDeviceAndHostTimer(cl_device_id device, cl_ulong* device_timestamp,
5823                         cl_ulong* host_timestamp) CL_API_SUFFIX__VERSION_2_1
5824 {
5825   REGISTER_API;
5826 
5827   ReturnErrorInfo(nullptr, CL_INVALID_OPERATION,
5828                   "Unimplemented OpenCL 2.1 API");
5829 }
5830 
clGetHostTimer(cl_device_id device,cl_ulong * host_timestamp)5831 CL_API_ENTRY cl_int CL_API_CALL clGetHostTimer(
5832   cl_device_id device, cl_ulong* host_timestamp) CL_API_SUFFIX__VERSION_2_1
5833 {
5834   REGISTER_API;
5835 
5836   ReturnErrorInfo(nullptr, CL_INVALID_OPERATION,
5837                   "Unimplemented OpenCL 2.1 API");
5838 }
5839 
clGetKernelSubGroupInfo(cl_kernel kernel,cl_device_id device,cl_kernel_sub_group_info param_name,size_t input_value_size,const void * input_value,size_t param_value_size,void * param_value,size_t * param_value_size_ret)5840 CL_API_ENTRY cl_int CL_API_CALL clGetKernelSubGroupInfo(
5841   cl_kernel kernel, cl_device_id device, cl_kernel_sub_group_info param_name,
5842   size_t input_value_size, const void* input_value, size_t param_value_size,
5843   void* param_value, size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_2_1
5844 {
5845   REGISTER_API;
5846 
5847   ReturnErrorInfo(kernel->program->context, CL_INVALID_OPERATION,
5848                   "Unimplemented OpenCL 2.1 API");
5849 }
5850 
clSetDefaultDeviceCommandQueue(cl_context context,cl_device_id device,cl_command_queue command_queue)5851 CL_API_ENTRY cl_int CL_API_CALL clSetDefaultDeviceCommandQueue(
5852   cl_context context, cl_device_id device,
5853   cl_command_queue command_queue) CL_API_SUFFIX__VERSION_2_1
5854 {
5855   REGISTER_API;
5856 
5857   ReturnErrorInfo(context, CL_INVALID_OPERATION,
5858                   "Unimplemented OpenCL 2.1 API");
5859 }
5860 
5861 CL_API_ENTRY CL_EXT_PREFIX__VERSION_2_2_DEPRECATED cl_int CL_API_CALL
clSetProgramReleaseCallback(cl_program program,void (CL_CALLBACK * pfn_notify)(cl_program program,void * user_data),void * user_data)5862 clSetProgramReleaseCallback(
5863   cl_program program,
5864   void(CL_CALLBACK* pfn_notify)(cl_program program, void* user_data),
5865   void* user_data) CL_EXT_SUFFIX__VERSION_2_2_DEPRECATED
5866 {
5867   REGISTER_API;
5868 
5869   ReturnErrorInfo(program->context, CL_INVALID_OPERATION,
5870                   "Unimplemented OpenCL 2.2 API");
5871 }
5872 
clSetProgramSpecializationConstant(cl_program program,cl_uint spec_id,size_t spec_size,const void * spec_value)5873 CL_API_ENTRY cl_int CL_API_CALL clSetProgramSpecializationConstant(
5874   cl_program program, cl_uint spec_id, size_t spec_size,
5875   const void* spec_value) CL_API_SUFFIX__VERSION_2_2
5876 {
5877   REGISTER_API;
5878 
5879   ReturnErrorInfo(program->context, CL_INVALID_OPERATION,
5880                   "Unimplemented OpenCL 2.2 API");
5881 }
5882 
clSetContextDestructorCallback(cl_context context,void (CL_CALLBACK * pfn_notify)(cl_context context,void * user_data),void * user_data)5883 CL_API_ENTRY cl_int CL_API_CALL clSetContextDestructorCallback(
5884   cl_context context,
5885   void(CL_CALLBACK* pfn_notify)(cl_context context, void* user_data),
5886   void* user_data) CL_API_SUFFIX__VERSION_3_0
5887 {
5888   REGISTER_API;
5889 
5890   // Check parameters
5891   if (!context)
5892   {
5893     ReturnErrorArg(NULL, CL_INVALID_CONTEXT, context);
5894   }
5895   if (!pfn_notify)
5896   {
5897     ReturnErrorArg(context, CL_INVALID_VALUE, pfn_notify);
5898   }
5899 
5900   context->callbacks.push(make_pair(pfn_notify, user_data));
5901 
5902   return CL_SUCCESS;
5903 }
5904 
5905 ////////////////////
5906 // Dispatch Table //
5907 ////////////////////
5908 
5909 #define _NULL_ NULL
5910 #define DISPATCH_TABLE_ENTRY(FUNCTION) (void*)(FUNCTION)
5911 void* m_dispatchTable[] = {
5912   DISPATCH_TABLE_ENTRY(clGetPlatformIDs),
5913   DISPATCH_TABLE_ENTRY(clGetPlatformInfo),
5914   DISPATCH_TABLE_ENTRY(clGetDeviceIDs),
5915   DISPATCH_TABLE_ENTRY(clGetDeviceInfo),
5916   DISPATCH_TABLE_ENTRY(clCreateContext),
5917   DISPATCH_TABLE_ENTRY(clCreateContextFromType),
5918   DISPATCH_TABLE_ENTRY(clRetainContext),
5919   DISPATCH_TABLE_ENTRY(clReleaseContext),
5920   DISPATCH_TABLE_ENTRY(clGetContextInfo),
5921   DISPATCH_TABLE_ENTRY(clCreateCommandQueue),
5922   DISPATCH_TABLE_ENTRY(clRetainCommandQueue),
5923   DISPATCH_TABLE_ENTRY(clReleaseCommandQueue),
5924   DISPATCH_TABLE_ENTRY(clGetCommandQueueInfo),
5925   DISPATCH_TABLE_ENTRY(clSetCommandQueueProperty),
5926   DISPATCH_TABLE_ENTRY(clCreateBuffer),
5927   DISPATCH_TABLE_ENTRY(clCreateImage2D),
5928   DISPATCH_TABLE_ENTRY(clCreateImage3D),
5929   DISPATCH_TABLE_ENTRY(clRetainMemObject),
5930   DISPATCH_TABLE_ENTRY(clReleaseMemObject),
5931   DISPATCH_TABLE_ENTRY(clGetSupportedImageFormats),
5932   DISPATCH_TABLE_ENTRY(clGetMemObjectInfo),
5933   DISPATCH_TABLE_ENTRY(clGetImageInfo),
5934   DISPATCH_TABLE_ENTRY(clCreateSampler),
5935   DISPATCH_TABLE_ENTRY(clRetainSampler),
5936   DISPATCH_TABLE_ENTRY(clReleaseSampler),
5937   DISPATCH_TABLE_ENTRY(clGetSamplerInfo),
5938   DISPATCH_TABLE_ENTRY(clCreateProgramWithSource),
5939   DISPATCH_TABLE_ENTRY(clCreateProgramWithBinary),
5940   DISPATCH_TABLE_ENTRY(clRetainProgram),
5941   DISPATCH_TABLE_ENTRY(clReleaseProgram),
5942   DISPATCH_TABLE_ENTRY(clBuildProgram),
5943   DISPATCH_TABLE_ENTRY(clUnloadCompiler),
5944   DISPATCH_TABLE_ENTRY(clGetProgramInfo),
5945   DISPATCH_TABLE_ENTRY(clGetProgramBuildInfo),
5946   DISPATCH_TABLE_ENTRY(clCreateKernel),
5947   DISPATCH_TABLE_ENTRY(clCreateKernelsInProgram),
5948   DISPATCH_TABLE_ENTRY(clRetainKernel),
5949   DISPATCH_TABLE_ENTRY(clReleaseKernel),
5950   DISPATCH_TABLE_ENTRY(clSetKernelArg),
5951   DISPATCH_TABLE_ENTRY(clGetKernelInfo),
5952   DISPATCH_TABLE_ENTRY(clGetKernelWorkGroupInfo),
5953   DISPATCH_TABLE_ENTRY(clWaitForEvents),
5954   DISPATCH_TABLE_ENTRY(clGetEventInfo),
5955   DISPATCH_TABLE_ENTRY(clRetainEvent),
5956   DISPATCH_TABLE_ENTRY(clReleaseEvent),
5957   DISPATCH_TABLE_ENTRY(clGetEventProfilingInfo),
5958   DISPATCH_TABLE_ENTRY(clFlush),
5959   DISPATCH_TABLE_ENTRY(clFinish),
5960   DISPATCH_TABLE_ENTRY(clEnqueueReadBuffer),
5961   DISPATCH_TABLE_ENTRY(clEnqueueWriteBuffer),
5962   DISPATCH_TABLE_ENTRY(clEnqueueCopyBuffer),
5963   DISPATCH_TABLE_ENTRY(clEnqueueReadImage),
5964   DISPATCH_TABLE_ENTRY(clEnqueueWriteImage),
5965   DISPATCH_TABLE_ENTRY(clEnqueueCopyImage),
5966   DISPATCH_TABLE_ENTRY(clEnqueueCopyImageToBuffer),
5967   DISPATCH_TABLE_ENTRY(clEnqueueCopyBufferToImage),
5968   DISPATCH_TABLE_ENTRY(clEnqueueMapBuffer),
5969   DISPATCH_TABLE_ENTRY(clEnqueueMapImage),
5970   DISPATCH_TABLE_ENTRY(clEnqueueUnmapMemObject),
5971   DISPATCH_TABLE_ENTRY(clEnqueueNDRangeKernel),
5972   DISPATCH_TABLE_ENTRY(clEnqueueTask),
5973   DISPATCH_TABLE_ENTRY(clEnqueueNativeKernel),
5974   DISPATCH_TABLE_ENTRY(clEnqueueMarker),
5975   DISPATCH_TABLE_ENTRY(clEnqueueWaitForEvents),
5976   DISPATCH_TABLE_ENTRY(clEnqueueBarrier),
5977   DISPATCH_TABLE_ENTRY(clGetExtensionFunctionAddress),
5978   DISPATCH_TABLE_ENTRY(clCreateFromGLBuffer),
5979   DISPATCH_TABLE_ENTRY(clCreateFromGLTexture2D),
5980   DISPATCH_TABLE_ENTRY(clCreateFromGLTexture3D),
5981   DISPATCH_TABLE_ENTRY(clCreateFromGLRenderbuffer),
5982   DISPATCH_TABLE_ENTRY(clGetGLObjectInfo),
5983   DISPATCH_TABLE_ENTRY(clGetGLTextureInfo),
5984   DISPATCH_TABLE_ENTRY(clEnqueueAcquireGLObjects),
5985   DISPATCH_TABLE_ENTRY(clEnqueueReleaseGLObjects),
5986 
5987   DISPATCH_TABLE_ENTRY(clGetGLContextInfoKHR),
5988 
5989 #if defined(_WIN32)
5990   DISPATCH_TABLE_ENTRY(clGetDeviceIDsFromD3D10KHR),
5991   DISPATCH_TABLE_ENTRY(clCreateFromD3D10BufferKHR),
5992   DISPATCH_TABLE_ENTRY(clCreateFromD3D10Texture2DKHR),
5993   DISPATCH_TABLE_ENTRY(clCreateFromD3D10Texture3DKHR),
5994   DISPATCH_TABLE_ENTRY(clEnqueueAcquireD3D10ObjectsKHR),
5995   DISPATCH_TABLE_ENTRY(clEnqueueReleaseD3D10ObjectsKHR),
5996 #else
5997   DISPATCH_TABLE_ENTRY(NULL),
5998   DISPATCH_TABLE_ENTRY(NULL),
5999   DISPATCH_TABLE_ENTRY(NULL),
6000   DISPATCH_TABLE_ENTRY(NULL),
6001   DISPATCH_TABLE_ENTRY(NULL),
6002   DISPATCH_TABLE_ENTRY(NULL),
6003 #endif
6004 
6005   // OpenCL 1.1
6006   DISPATCH_TABLE_ENTRY(clSetEventCallback),
6007   DISPATCH_TABLE_ENTRY(clCreateSubBuffer),
6008   DISPATCH_TABLE_ENTRY(clSetMemObjectDestructorCallback),
6009   DISPATCH_TABLE_ENTRY(clCreateUserEvent),
6010   DISPATCH_TABLE_ENTRY(clSetUserEventStatus),
6011   DISPATCH_TABLE_ENTRY(clEnqueueReadBufferRect),
6012   DISPATCH_TABLE_ENTRY(clEnqueueWriteBufferRect),
6013   DISPATCH_TABLE_ENTRY(clEnqueueCopyBufferRect),
6014 
6015   DISPATCH_TABLE_ENTRY(NULL), // clCreateSubDevicesEXT
6016   DISPATCH_TABLE_ENTRY(NULL), // clRetainDeviceEXT
6017   DISPATCH_TABLE_ENTRY(NULL), // clReleaseDeviceEXT
6018 
6019   DISPATCH_TABLE_ENTRY(clCreateEventFromGLsyncKHR),
6020 
6021   // OpenCL 1.2
6022   DISPATCH_TABLE_ENTRY(clCreateSubDevices),
6023   DISPATCH_TABLE_ENTRY(clRetainDevice),
6024   DISPATCH_TABLE_ENTRY(clReleaseDevice),
6025   DISPATCH_TABLE_ENTRY(clCreateImage),
6026   DISPATCH_TABLE_ENTRY(clCreateProgramWithBuiltInKernels),
6027   DISPATCH_TABLE_ENTRY(clCompileProgram),
6028   DISPATCH_TABLE_ENTRY(clLinkProgram),
6029   DISPATCH_TABLE_ENTRY(clUnloadPlatformCompiler),
6030   DISPATCH_TABLE_ENTRY(clGetKernelArgInfo),
6031   DISPATCH_TABLE_ENTRY(clEnqueueFillBuffer),
6032   DISPATCH_TABLE_ENTRY(clEnqueueFillImage),
6033   DISPATCH_TABLE_ENTRY(clEnqueueMigrateMemObjects),
6034   DISPATCH_TABLE_ENTRY(clEnqueueMarkerWithWaitList),
6035   DISPATCH_TABLE_ENTRY(clEnqueueBarrierWithWaitList),
6036   DISPATCH_TABLE_ENTRY(clGetExtensionFunctionAddressForPlatform),
6037   DISPATCH_TABLE_ENTRY(clCreateFromGLTexture),
6038 
6039 #if defined(_WIN32)
6040   DISPATCH_TABLE_ENTRY(clGetDeviceIDsFromD3D11KHR),
6041   DISPATCH_TABLE_ENTRY(clCreateFromD3D11BufferKHR),
6042   DISPATCH_TABLE_ENTRY(clCreateFromD3D11Texture2DKHR),
6043   DISPATCH_TABLE_ENTRY(clCreateFromD3D11Texture3DKHR),
6044   DISPATCH_TABLE_ENTRY(clCreateFromDX9MediaSurfaceKHR),
6045   DISPATCH_TABLE_ENTRY(clEnqueueAcquireD3D11ObjectsKHR),
6046   DISPATCH_TABLE_ENTRY(clEnqueueReleaseD3D11ObjectsKHR),
6047   DISPATCH_TABLE_ENTRY(clGetDeviceIDsFromDX9MediaAdapterKHR),
6048   DISPATCH_TABLE_ENTRY(clEnqueueAcquireDX9MediaSurfacesKHR),
6049   DISPATCH_TABLE_ENTRY(clEnqueueReleaseDX9MediaSurfacesKHR),
6050 #else
6051   DISPATCH_TABLE_ENTRY(NULL),
6052   DISPATCH_TABLE_ENTRY(NULL),
6053   DISPATCH_TABLE_ENTRY(NULL),
6054   DISPATCH_TABLE_ENTRY(NULL),
6055   DISPATCH_TABLE_ENTRY(NULL),
6056   DISPATCH_TABLE_ENTRY(NULL),
6057   DISPATCH_TABLE_ENTRY(NULL),
6058   DISPATCH_TABLE_ENTRY(NULL),
6059   DISPATCH_TABLE_ENTRY(NULL),
6060   DISPATCH_TABLE_ENTRY(NULL),
6061 #endif
6062 
6063   // cl_khr_egl_image
6064   DISPATCH_TABLE_ENTRY(NULL),
6065   DISPATCH_TABLE_ENTRY(NULL),
6066   DISPATCH_TABLE_ENTRY(NULL),
6067 
6068   // cl_khr_egl_event
6069   DISPATCH_TABLE_ENTRY(NULL),
6070 
6071   // OpenCL 2.0
6072   DISPATCH_TABLE_ENTRY(clCreateCommandQueueWithProperties),
6073   DISPATCH_TABLE_ENTRY(clCreatePipe),
6074   DISPATCH_TABLE_ENTRY(clGetPipeInfo),
6075   DISPATCH_TABLE_ENTRY(clSVMAlloc),
6076   DISPATCH_TABLE_ENTRY(clSVMFree),
6077   DISPATCH_TABLE_ENTRY(clEnqueueSVMFree),
6078   DISPATCH_TABLE_ENTRY(clEnqueueSVMMemcpy),
6079   DISPATCH_TABLE_ENTRY(clEnqueueSVMMemFill),
6080   DISPATCH_TABLE_ENTRY(clEnqueueSVMMap),
6081   DISPATCH_TABLE_ENTRY(clEnqueueSVMUnmap),
6082   DISPATCH_TABLE_ENTRY(clCreateSamplerWithProperties),
6083   DISPATCH_TABLE_ENTRY(clSetKernelArgSVMPointer),
6084   DISPATCH_TABLE_ENTRY(clSetKernelExecInfo),
6085 
6086   // cl_khr_sub_groups
6087   DISPATCH_TABLE_ENTRY(NULL),
6088 
6089   // OpenCL 2.1
6090   DISPATCH_TABLE_ENTRY(clCloneKernel),
6091   DISPATCH_TABLE_ENTRY(clCreateProgramWithIL),
6092   DISPATCH_TABLE_ENTRY(clEnqueueSVMMigrateMem),
6093   DISPATCH_TABLE_ENTRY(clGetDeviceAndHostTimer),
6094   DISPATCH_TABLE_ENTRY(clGetHostTimer),
6095   DISPATCH_TABLE_ENTRY(clGetKernelSubGroupInfo),
6096   DISPATCH_TABLE_ENTRY(clSetDefaultDeviceCommandQueue),
6097 
6098   // OpenCL 2.2
6099   DISPATCH_TABLE_ENTRY(clSetProgramReleaseCallback),
6100   DISPATCH_TABLE_ENTRY(clSetProgramSpecializationConstant),
6101 
6102   // OpenCL 3.0
6103   DISPATCH_TABLE_ENTRY(clCreateBufferWithProperties),
6104   DISPATCH_TABLE_ENTRY(clCreateImageWithProperties),
6105   DISPATCH_TABLE_ENTRY(clSetContextDestructorCallback),
6106 };
6107 
6108 #if defined(_WIN32) && !defined(OCLGRIND_ICD)
6109 
6110 #include <Psapi.h>
6111 
6112 // Function to replace calls to clGetPlatformIDs with
6113 // the Oclgrind implementation.
6114 //
6115 // This is invoked by oclgrind.exe after this DLL is
6116 // injected into the child process.
6117 //
6118 // Returns true on success, false on failure.
initOclgrind()6119 bool initOclgrind()
6120 {
6121   // Get base address of process
6122   char* base = (char*)GetModuleHandle(NULL);
6123 
6124   // Get pointer to NT headers
6125   PIMAGE_DOS_HEADER dosHeader = (PIMAGE_DOS_HEADER)(base);
6126   PIMAGE_NT_HEADERS ntHeaders = (PIMAGE_NT_HEADERS)(base + dosHeader->e_lfanew);
6127   if (ntHeaders->Signature != IMAGE_NT_SIGNATURE)
6128   {
6129     std::cerr << "[Oclgrind] Invalid NT signature: " << ntHeaders->Signature
6130               << std::endl;
6131     return false;
6132   }
6133 
6134   // Get pointer to import directory
6135   DWORD importOffset =
6136     ntHeaders->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT]
6137       .VirtualAddress;
6138   PIMAGE_IMPORT_DESCRIPTOR importDesc =
6139     (PIMAGE_IMPORT_DESCRIPTOR)(base + importOffset);
6140 
6141   // Loop over directory entries
6142   while (importDesc->Name)
6143   {
6144     // Look for OpenCL.dll
6145     const char* modname = (const char*)(base + importDesc->Name);
6146     if (!stricmp(modname, "opencl.dll"))
6147     {
6148       // We use the OriginalFirstThunk to match the name,
6149       // and then replace the function pointer in FirstThunk
6150       PIMAGE_THUNK_DATA origThunk =
6151         (PIMAGE_THUNK_DATA)(base + importDesc->OriginalFirstThunk);
6152       PIMAGE_THUNK_DATA firstThunk =
6153         (PIMAGE_THUNK_DATA)(base + importDesc->FirstThunk);
6154 
6155       // Loop over functions
6156       while (origThunk->u1.AddressOfData)
6157       {
6158         // Skip unnamed functions
6159         if (!(origThunk->u1.Ordinal & IMAGE_ORDINAL_FLAG))
6160         {
6161           // Get function name and check for clGetPlatformIDs
6162           PIMAGE_IMPORT_BY_NAME import =
6163             (PIMAGE_IMPORT_BY_NAME)(base + origThunk->u1.AddressOfData);
6164           if (!stricmp((char*)import->Name, "clGetPlatformIDs"))
6165           {
6166             // Make page writable temporarily
6167             MEMORY_BASIC_INFORMATION mbinfo;
6168             VirtualQuery(firstThunk, &mbinfo, sizeof(mbinfo));
6169             if (!VirtualProtect(mbinfo.BaseAddress, mbinfo.RegionSize,
6170                                 PAGE_EXECUTE_READWRITE, &mbinfo.Protect))
6171             {
6172               std::cerr << "[Oclgrind] Failed to make page writeable: "
6173                         << GetLastError() << std::endl;
6174               return false;
6175             }
6176 
6177             // Replace function pointer with our implementation
6178             firstThunk->u1.Function = (ULONG64)clGetPlatformIDs;
6179 
6180             // Restore page protection
6181             DWORD zero = 0;
6182             if (!VirtualProtect(mbinfo.BaseAddress, mbinfo.RegionSize,
6183                                 mbinfo.Protect, &zero))
6184             {
6185               std::cerr << "[Oclgrind] Failed to restore page protection: "
6186                         << GetLastError() << std::endl;
6187               return false;
6188             }
6189 
6190             return true;
6191           }
6192         }
6193 
6194         origThunk++;
6195         firstThunk++;
6196       }
6197     }
6198     importDesc++;
6199   }
6200 
6201   // We didn't find the function, so just warn user
6202   std::cerr << "[Oclgrind] Warning: unable to patch clGetPlatformIDs"
6203             << std::endl;
6204 
6205   return true;
6206 }
6207 
6208 #endif
6209