1 /*
2 Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
3 Copyright (C) 2006 - 2011 Sony Computer Entertainment Inc.
4 
5 This software is provided 'as-is', without any express or implied warranty.
6 In no event will the authors be held liable for any damages arising from the use of this software.
7 Permission is granted to anyone to use this software for any purpose,
8 including commercial applications, and to alter it and redistribute it freely,
9 subject to the following restrictions:
10 
11 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
12 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
13 3. This notice may not be removed or altered from any source distribution.
14 */
15 
16 //Original author: Roman Ponomarev
17 //Mostly Reimplemented by Erwin Coumans
18 
19 bool gDebugForceLoadingFromSource = false;
20 bool gDebugSkipLoadingBinary = false;
21 
22 #include "Bullet3Common/b3Logging.h"
23 
24 #include <string.h>
25 
26 #ifdef _WIN32
27 #pragma warning(disable : 4996)
28 #endif
29 #include "b3OpenCLUtils.h"
30 //#include "b3OpenCLInclude.h"
31 
32 #include <stdio.h>
33 #include <stdlib.h>
34 
35 #define B3_MAX_CL_DEVICES 16  //who needs 16 devices?
36 
37 #ifdef _WIN32
38 #include <windows.h>
39 #endif
40 
41 #include <assert.h>
42 #define b3Assert assert
43 #ifndef _WIN32
44 #include <sys/stat.h>
45 
46 #endif
47 
48 static const char* sCachedBinaryPath = "cache";
49 
50 //Set the preferred platform vendor using the OpenCL SDK
51 static const char* spPlatformVendor =
52 #if defined(CL_PLATFORM_MINI_CL)
53 	"MiniCL, SCEA";
54 #elif defined(CL_PLATFORM_AMD)
55 	"Advanced Micro Devices, Inc.";
56 #elif defined(CL_PLATFORM_NVIDIA)
57 	"NVIDIA Corporation";
58 #elif defined(CL_PLATFORM_INTEL)
59 	"Intel(R) Corporation";
60 #elif defined(B3_USE_CLEW)
61 	"clew (OpenCL Extension Wrangler library)";
62 #else
63 	"Unknown Vendor";
64 #endif
65 
66 #ifndef CL_PLATFORM_MINI_CL
67 #ifdef _WIN32
68 #ifndef B3_USE_CLEW
69 #include "CL/cl_gl.h"
70 #endif  //B3_USE_CLEW
71 #endif  //_WIN32
72 #endif
73 
MyFatalBreakAPPLE(const char * errstr,const void * private_info,size_t cb,void * user_data)74 void MyFatalBreakAPPLE(const char* errstr,
75 					   const void* private_info,
76 					   size_t cb,
77 					   void* user_data)
78 {
79 	const char* patloc = strstr(errstr, "Warning");
80 	//find out if it is a warning or error, exit if error
81 
82 	if (patloc)
83 	{
84 		b3Warning("Warning: %s\n", errstr);
85 	}
86 	else
87 	{
88 		b3Error("Error: %s\n", errstr);
89 		b3Assert(0);
90 	}
91 }
92 
93 #ifdef B3_USE_CLEW
94 
b3OpenCLUtils_clewInit()95 int b3OpenCLUtils_clewInit()
96 {
97 	int result = -1;
98 
99 #ifdef _WIN32
100 	const char* cl = "OpenCL.dll";
101 #elif defined __APPLE__
102 	const char* cl = "/System/Library/Frameworks/OpenCL.framework/Versions/Current/OpenCL";
103 #else  //presumable Linux? \
104 	   //linux (tested on Ubuntu 12.10 with Catalyst 13.4 beta drivers, not that there is no symbolic link from libOpenCL.so
105 	const char* cl = "libOpenCL.so.1";
106 	result = clewInit(cl);
107 	if (result != CLEW_SUCCESS)
108 	{
109 		cl = "libOpenCL.so";
110 	}
111 	else
112 	{
113 		clewExit();
114 	}
115 #endif
116 	result = clewInit(cl);
117 	if (result != CLEW_SUCCESS)
118 	{
119 		b3Error("clewInit failed with error code %d\n", result);
120 	}
121 	else
122 	{
123 		b3Printf("clewInit succesfull using %s\n", cl);
124 	}
125 	return result;
126 }
127 #endif
128 
b3OpenCLUtils_getNumPlatforms(cl_int * pErrNum)129 int b3OpenCLUtils_getNumPlatforms(cl_int* pErrNum)
130 {
131 #ifdef B3_USE_CLEW
132 	b3OpenCLUtils_clewInit();
133 #endif
134 
135 	cl_platform_id pPlatforms[10] = {0};
136 
137 	cl_uint numPlatforms = 0;
138 	cl_int ciErrNum = clGetPlatformIDs(10, pPlatforms, &numPlatforms);
139 	//cl_int ciErrNum = clGetPlatformIDs(0, NULL, &numPlatforms);
140 
141 	if (ciErrNum != CL_SUCCESS)
142 	{
143 		if (pErrNum != NULL)
144 			*pErrNum = ciErrNum;
145 	}
146 	return numPlatforms;
147 }
148 
b3OpenCLUtils_getSdkVendorName()149 const char* b3OpenCLUtils_getSdkVendorName()
150 {
151 	return spPlatformVendor;
152 }
153 
b3OpenCLUtils_setCachePath(const char * path)154 void b3OpenCLUtils_setCachePath(const char* path)
155 {
156 	sCachedBinaryPath = path;
157 }
158 
b3OpenCLUtils_getPlatform(int platformIndex0,cl_int * pErrNum)159 cl_platform_id b3OpenCLUtils_getPlatform(int platformIndex0, cl_int* pErrNum)
160 {
161 #ifdef B3_USE_CLEW
162 	b3OpenCLUtils_clewInit();
163 #endif
164 
165 	cl_platform_id platform = 0;
166 	unsigned int platformIndex = (unsigned int)platformIndex0;
167 	cl_uint numPlatforms;
168 	cl_int ciErrNum = clGetPlatformIDs(0, NULL, &numPlatforms);
169 
170 	if (platformIndex < numPlatforms)
171 	{
172 		cl_platform_id* platforms = (cl_platform_id*)malloc(sizeof(cl_platform_id) * numPlatforms);
173 		ciErrNum = clGetPlatformIDs(numPlatforms, platforms, NULL);
174 		if (ciErrNum != CL_SUCCESS)
175 		{
176 			if (pErrNum != NULL)
177 				*pErrNum = ciErrNum;
178 			return platform;
179 		}
180 
181 		platform = platforms[platformIndex];
182 
183 		free(platforms);
184 	}
185 
186 	return platform;
187 }
188 
getPlatformInfo(cl_platform_id platform,b3OpenCLPlatformInfo * platformInfo)189 void b3OpenCLUtils::getPlatformInfo(cl_platform_id platform, b3OpenCLPlatformInfo* platformInfo)
190 {
191 	b3Assert(platform);
192 	cl_int ciErrNum;
193 	ciErrNum = clGetPlatformInfo(platform, CL_PLATFORM_VENDOR, B3_MAX_STRING_LENGTH, platformInfo->m_platformVendor, NULL);
194 	oclCHECKERROR(ciErrNum, CL_SUCCESS);
195 	ciErrNum = clGetPlatformInfo(platform, CL_PLATFORM_NAME, B3_MAX_STRING_LENGTH, platformInfo->m_platformName, NULL);
196 	oclCHECKERROR(ciErrNum, CL_SUCCESS);
197 	ciErrNum = clGetPlatformInfo(platform, CL_PLATFORM_VERSION, B3_MAX_STRING_LENGTH, platformInfo->m_platformVersion, NULL);
198 	oclCHECKERROR(ciErrNum, CL_SUCCESS);
199 }
200 
b3OpenCLUtils_printPlatformInfo(cl_platform_id platform)201 void b3OpenCLUtils_printPlatformInfo(cl_platform_id platform)
202 {
203 	b3OpenCLPlatformInfo platformInfo;
204 	b3OpenCLUtils::getPlatformInfo(platform, &platformInfo);
205 	b3Printf("Platform info:\n");
206 	b3Printf("  CL_PLATFORM_VENDOR: \t\t\t%s\n", platformInfo.m_platformVendor);
207 	b3Printf("  CL_PLATFORM_NAME: \t\t\t%s\n", platformInfo.m_platformName);
208 	b3Printf("  CL_PLATFORM_VERSION: \t\t\t%s\n", platformInfo.m_platformVersion);
209 }
210 
b3OpenCLUtils_createContextFromPlatform(cl_platform_id platform,cl_device_type deviceType,cl_int * pErrNum,void * pGLContext,void * pGLDC,int preferredDeviceIndex,int preferredPlatformIndex)211 cl_context b3OpenCLUtils_createContextFromPlatform(cl_platform_id platform, cl_device_type deviceType, cl_int* pErrNum, void* pGLContext, void* pGLDC, int preferredDeviceIndex, int preferredPlatformIndex)
212 {
213 	cl_context retContext = 0;
214 	cl_int ciErrNum = 0;
215 	cl_uint num_entries;
216 	cl_device_id devices[B3_MAX_CL_DEVICES];
217 	cl_uint num_devices;
218 	cl_context_properties* cprops;
219 
220 	/*
221 	* If we could find our platform, use it. Otherwise pass a NULL and get whatever the
222 	* implementation thinks we should be using.
223 	*/
224 	cl_context_properties cps[7] = {0, 0, 0, 0, 0, 0, 0};
225 	cps[0] = CL_CONTEXT_PLATFORM;
226 	cps[1] = (cl_context_properties)platform;
227 #ifdef _WIN32
228 #ifndef B3_USE_CLEW
229 	if (pGLContext && pGLDC)
230 	{
231 		cps[2] = CL_GL_CONTEXT_KHR;
232 		cps[3] = (cl_context_properties)pGLContext;
233 		cps[4] = CL_WGL_HDC_KHR;
234 		cps[5] = (cl_context_properties)pGLDC;
235 	}
236 #endif  //B3_USE_CLEW
237 #endif  //_WIN32
238 	num_entries = B3_MAX_CL_DEVICES;
239 
240 	num_devices = -1;
241 
242 	ciErrNum = clGetDeviceIDs(
243 		platform,
244 		deviceType,
245 		num_entries,
246 		devices,
247 		&num_devices);
248 
249 	if (ciErrNum < 0)
250 	{
251 		b3Printf("clGetDeviceIDs returned %d\n", ciErrNum);
252 		return 0;
253 	}
254 	cprops = (NULL == platform) ? NULL : cps;
255 
256 	if (!num_devices)
257 		return 0;
258 
259 	if (pGLContext)
260 	{
261 		//search for the GPU that relates to the OpenCL context
262 		unsigned int i;
263 		for (i = 0; i < num_devices; i++)
264 		{
265 			retContext = clCreateContext(cprops, 1, &devices[i], NULL, NULL, &ciErrNum);
266 			if (ciErrNum == CL_SUCCESS)
267 				break;
268 		}
269 	}
270 	else
271 	{
272 		if (preferredDeviceIndex >= 0 && (unsigned int)preferredDeviceIndex < num_devices)
273 		{
274 			//create a context of the preferred device index
275 			retContext = clCreateContext(cprops, 1, &devices[preferredDeviceIndex], NULL, NULL, &ciErrNum);
276 		}
277 		else
278 		{
279 			//create a context of all devices
280 #if defined(__APPLE__)
281 			retContext = clCreateContext(cprops, num_devices, devices, MyFatalBreakAPPLE, NULL, &ciErrNum);
282 #else
283 			b3Printf("numDevices=%d\n", num_devices);
284 
285 			retContext = clCreateContext(cprops, num_devices, devices, NULL, NULL, &ciErrNum);
286 #endif
287 		}
288 	}
289 	if (pErrNum != NULL)
290 	{
291 		*pErrNum = ciErrNum;
292 	};
293 
294 	return retContext;
295 }
296 
b3OpenCLUtils_createContextFromType(cl_device_type deviceType,cl_int * pErrNum,void * pGLContext,void * pGLDC,int preferredDeviceIndex,int preferredPlatformIndex,cl_platform_id * retPlatformId)297 cl_context b3OpenCLUtils_createContextFromType(cl_device_type deviceType, cl_int* pErrNum, void* pGLContext, void* pGLDC, int preferredDeviceIndex, int preferredPlatformIndex, cl_platform_id* retPlatformId)
298 {
299 #ifdef B3_USE_CLEW
300 	b3OpenCLUtils_clewInit();
301 #endif
302 
303 	cl_uint numPlatforms;
304 	cl_context retContext = 0;
305 	unsigned int i;
306 
307 	cl_int ciErrNum = clGetPlatformIDs(0, NULL, &numPlatforms);
308 	if (ciErrNum != CL_SUCCESS)
309 	{
310 		if (pErrNum != NULL) *pErrNum = ciErrNum;
311 		return NULL;
312 	}
313 	if (numPlatforms > 0)
314 	{
315 		cl_platform_id* platforms = (cl_platform_id*)malloc(sizeof(cl_platform_id) * numPlatforms);
316 		ciErrNum = clGetPlatformIDs(numPlatforms, platforms, NULL);
317 		if (ciErrNum != CL_SUCCESS)
318 		{
319 			if (pErrNum != NULL)
320 				*pErrNum = ciErrNum;
321 			free(platforms);
322 			return NULL;
323 		}
324 
325 		for (i = 0; i < numPlatforms; ++i)
326 		{
327 			char pbuf[128];
328 			ciErrNum = clGetPlatformInfo(platforms[i],
329 										 CL_PLATFORM_VENDOR,
330 										 sizeof(pbuf),
331 										 pbuf,
332 										 NULL);
333 			if (ciErrNum != CL_SUCCESS)
334 			{
335 				if (pErrNum != NULL) *pErrNum = ciErrNum;
336 				return NULL;
337 			}
338 
339 			if (preferredPlatformIndex >= 0 && i == preferredPlatformIndex)
340 			{
341 				cl_platform_id tmpPlatform = platforms[0];
342 				platforms[0] = platforms[i];
343 				platforms[i] = tmpPlatform;
344 				break;
345 			}
346 			else
347 			{
348 				if (!strcmp(pbuf, spPlatformVendor))
349 				{
350 					cl_platform_id tmpPlatform = platforms[0];
351 					platforms[0] = platforms[i];
352 					platforms[i] = tmpPlatform;
353 				}
354 			}
355 		}
356 
357 		for (i = 0; i < numPlatforms; ++i)
358 		{
359 			cl_platform_id platform = platforms[i];
360 			assert(platform);
361 
362 			retContext = b3OpenCLUtils_createContextFromPlatform(platform, deviceType, pErrNum, pGLContext, pGLDC, preferredDeviceIndex, preferredPlatformIndex);
363 
364 			if (retContext)
365 			{
366 				//				printf("OpenCL platform details:\n");
367 				b3OpenCLPlatformInfo platformInfo;
368 
369 				b3OpenCLUtils::getPlatformInfo(platform, &platformInfo);
370 
371 				if (retPlatformId)
372 					*retPlatformId = platform;
373 
374 				break;
375 			}
376 		}
377 
378 		free(platforms);
379 	}
380 	return retContext;
381 }
382 
383 //////////////////////////////////////////////////////////////////////////////
384 //! Gets the id of the nth device from the context
385 //!
386 //! @return the id or -1 when out of range
387 //! @param cxMainContext         OpenCL context
388 //! @param device_idx            index of the device of interest
389 //////////////////////////////////////////////////////////////////////////////
b3OpenCLUtils_getDevice(cl_context cxMainContext,int deviceIndex)390 cl_device_id b3OpenCLUtils_getDevice(cl_context cxMainContext, int deviceIndex)
391 {
392 	assert(cxMainContext);
393 
394 	size_t szParmDataBytes;
395 	cl_device_id* cdDevices;
396 	cl_device_id device;
397 
398 	// get the list of devices associated with context
399 	clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);
400 
401 	if (szParmDataBytes / sizeof(cl_device_id) < (unsigned int)deviceIndex)
402 	{
403 		return (cl_device_id)-1;
404 	}
405 
406 	cdDevices = (cl_device_id*)malloc(szParmDataBytes);
407 
408 	clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);
409 
410 	device = cdDevices[deviceIndex];
411 	free(cdDevices);
412 
413 	return device;
414 }
415 
b3OpenCLUtils_getNumDevices(cl_context cxMainContext)416 int b3OpenCLUtils_getNumDevices(cl_context cxMainContext)
417 {
418 	size_t szParamDataBytes;
419 	int device_count;
420 	clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, 0, NULL, &szParamDataBytes);
421 	device_count = (int)szParamDataBytes / sizeof(cl_device_id);
422 	return device_count;
423 }
424 
getDeviceInfo(cl_device_id device,b3OpenCLDeviceInfo * info)425 void b3OpenCLUtils::getDeviceInfo(cl_device_id device, b3OpenCLDeviceInfo* info)
426 {
427 	// CL_DEVICE_NAME
428 	clGetDeviceInfo(device, CL_DEVICE_NAME, B3_MAX_STRING_LENGTH, &info->m_deviceName, NULL);
429 
430 	// CL_DEVICE_VENDOR
431 	clGetDeviceInfo(device, CL_DEVICE_VENDOR, B3_MAX_STRING_LENGTH, &info->m_deviceVendor, NULL);
432 
433 	// CL_DRIVER_VERSION
434 	clGetDeviceInfo(device, CL_DRIVER_VERSION, B3_MAX_STRING_LENGTH, &info->m_driverVersion, NULL);
435 
436 	// CL_DEVICE_INFO
437 	clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(cl_device_type), &info->m_deviceType, NULL);
438 
439 	// CL_DEVICE_MAX_COMPUTE_UNITS
440 	clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(info->m_computeUnits), &info->m_computeUnits, NULL);
441 
442 	// CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS
443 	clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(info->m_workitemDims), &info->m_workitemDims, NULL);
444 
445 	// CL_DEVICE_MAX_WORK_ITEM_SIZES
446 	clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(info->m_workItemSize), &info->m_workItemSize, NULL);
447 
448 	// CL_DEVICE_MAX_WORK_GROUP_SIZE
449 	clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(info->m_workgroupSize), &info->m_workgroupSize, NULL);
450 
451 	// CL_DEVICE_MAX_CLOCK_FREQUENCY
452 	clGetDeviceInfo(device, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(info->m_clockFrequency), &info->m_clockFrequency, NULL);
453 
454 	// CL_DEVICE_ADDRESS_BITS
455 	clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(info->m_addressBits), &info->m_addressBits, NULL);
456 
457 	// CL_DEVICE_MAX_MEM_ALLOC_SIZE
458 	clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(info->m_maxMemAllocSize), &info->m_maxMemAllocSize, NULL);
459 
460 	// CL_DEVICE_GLOBAL_MEM_SIZE
461 	clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(info->m_globalMemSize), &info->m_globalMemSize, NULL);
462 
463 	// CL_DEVICE_ERROR_CORRECTION_SUPPORT
464 	clGetDeviceInfo(device, CL_DEVICE_ERROR_CORRECTION_SUPPORT, sizeof(info->m_errorCorrectionSupport), &info->m_errorCorrectionSupport, NULL);
465 
466 	// CL_DEVICE_LOCAL_MEM_TYPE
467 	clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_TYPE, sizeof(info->m_localMemType), &info->m_localMemType, NULL);
468 
469 	// CL_DEVICE_LOCAL_MEM_SIZE
470 	clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(info->m_localMemSize), &info->m_localMemSize, NULL);
471 
472 	// CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE
473 	clGetDeviceInfo(device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(info->m_constantBufferSize), &info->m_constantBufferSize, NULL);
474 
475 	// CL_DEVICE_QUEUE_PROPERTIES
476 	clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES, sizeof(info->m_queueProperties), &info->m_queueProperties, NULL);
477 
478 	// CL_DEVICE_IMAGE_SUPPORT
479 	clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(info->m_imageSupport), &info->m_imageSupport, NULL);
480 
481 	// CL_DEVICE_MAX_READ_IMAGE_ARGS
482 	clGetDeviceInfo(device, CL_DEVICE_MAX_READ_IMAGE_ARGS, sizeof(info->m_maxReadImageArgs), &info->m_maxReadImageArgs, NULL);
483 
484 	// CL_DEVICE_MAX_WRITE_IMAGE_ARGS
485 	clGetDeviceInfo(device, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, sizeof(info->m_maxWriteImageArgs), &info->m_maxWriteImageArgs, NULL);
486 
487 	// CL_DEVICE_IMAGE2D_MAX_WIDTH, CL_DEVICE_IMAGE2D_MAX_HEIGHT, CL_DEVICE_IMAGE3D_MAX_WIDTH, CL_DEVICE_IMAGE3D_MAX_HEIGHT, CL_DEVICE_IMAGE3D_MAX_DEPTH
488 	clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), &info->m_image2dMaxWidth, NULL);
489 	clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), &info->m_image2dMaxHeight, NULL);
490 	clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(size_t), &info->m_image3dMaxWidth, NULL);
491 	clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(size_t), &info->m_image3dMaxHeight, NULL);
492 	clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(size_t), &info->m_image3dMaxDepth, NULL);
493 
494 	// CL_DEVICE_EXTENSIONS: get device extensions, and if any then parse & log the string onto separate lines
495 	clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, B3_MAX_STRING_LENGTH, &info->m_deviceExtensions, NULL);
496 
497 	// CL_DEVICE_PREFERRED_VECTOR_WIDTH_<type>
498 	clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, sizeof(cl_uint), &info->m_vecWidthChar, NULL);
499 	clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, sizeof(cl_uint), &info->m_vecWidthShort, NULL);
500 	clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), &info->m_vecWidthInt, NULL);
501 	clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, sizeof(cl_uint), &info->m_vecWidthLong, NULL);
502 	clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, sizeof(cl_uint), &info->m_vecWidthFloat, NULL);
503 	clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, sizeof(cl_uint), &info->m_vecWidthDouble, NULL);
504 }
505 
b3OpenCLUtils_printDeviceInfo(cl_device_id device)506 void b3OpenCLUtils_printDeviceInfo(cl_device_id device)
507 {
508 	b3OpenCLDeviceInfo info;
509 	b3OpenCLUtils::getDeviceInfo(device, &info);
510 	b3Printf("Device Info:\n");
511 	b3Printf("  CL_DEVICE_NAME: \t\t\t%s\n", info.m_deviceName);
512 	b3Printf("  CL_DEVICE_VENDOR: \t\t\t%s\n", info.m_deviceVendor);
513 	b3Printf("  CL_DRIVER_VERSION: \t\t\t%s\n", info.m_driverVersion);
514 
515 	if (info.m_deviceType & CL_DEVICE_TYPE_CPU)
516 		b3Printf("  CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_CPU");
517 	if (info.m_deviceType & CL_DEVICE_TYPE_GPU)
518 		b3Printf("  CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_GPU");
519 	if (info.m_deviceType & CL_DEVICE_TYPE_ACCELERATOR)
520 		b3Printf("  CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_ACCELERATOR");
521 	if (info.m_deviceType & CL_DEVICE_TYPE_DEFAULT)
522 		b3Printf("  CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_DEFAULT");
523 
524 	b3Printf("  CL_DEVICE_MAX_COMPUTE_UNITS:\t\t%u\n", info.m_computeUnits);
525 	b3Printf("  CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS:\t%u\n", info.m_workitemDims);
526 	b3Printf("  CL_DEVICE_MAX_WORK_ITEM_SIZES:\t%u / %u / %u \n", info.m_workItemSize[0], info.m_workItemSize[1], info.m_workItemSize[2]);
527 	b3Printf("  CL_DEVICE_MAX_WORK_GROUP_SIZE:\t%u\n", info.m_workgroupSize);
528 	b3Printf("  CL_DEVICE_MAX_CLOCK_FREQUENCY:\t%u MHz\n", info.m_clockFrequency);
529 	b3Printf("  CL_DEVICE_ADDRESS_BITS:\t\t%u\n", info.m_addressBits);
530 	b3Printf("  CL_DEVICE_MAX_MEM_ALLOC_SIZE:\t\t%u MByte\n", (unsigned int)(info.m_maxMemAllocSize / (1024 * 1024)));
531 	b3Printf("  CL_DEVICE_GLOBAL_MEM_SIZE:\t\t%u MByte\n", (unsigned int)(info.m_globalMemSize / (1024 * 1024)));
532 	b3Printf("  CL_DEVICE_ERROR_CORRECTION_SUPPORT:\t%s\n", info.m_errorCorrectionSupport == CL_TRUE ? "yes" : "no");
533 	b3Printf("  CL_DEVICE_LOCAL_MEM_TYPE:\t\t%s\n", info.m_localMemType == 1 ? "local" : "global");
534 	b3Printf("  CL_DEVICE_LOCAL_MEM_SIZE:\t\t%u KByte\n", (unsigned int)(info.m_localMemSize / 1024));
535 	b3Printf("  CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE:\t%u KByte\n", (unsigned int)(info.m_constantBufferSize / 1024));
536 	if (info.m_queueProperties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)
537 		b3Printf("  CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE");
538 	if (info.m_queueProperties & CL_QUEUE_PROFILING_ENABLE)
539 		b3Printf("  CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_PROFILING_ENABLE");
540 
541 	b3Printf("  CL_DEVICE_IMAGE_SUPPORT:\t\t%u\n", info.m_imageSupport);
542 
543 	b3Printf("  CL_DEVICE_MAX_READ_IMAGE_ARGS:\t%u\n", info.m_maxReadImageArgs);
544 	b3Printf("  CL_DEVICE_MAX_WRITE_IMAGE_ARGS:\t%u\n", info.m_maxWriteImageArgs);
545 	b3Printf("\n  CL_DEVICE_IMAGE <dim>");
546 	b3Printf("\t\t\t2D_MAX_WIDTH\t %u\n", info.m_image2dMaxWidth);
547 	b3Printf("\t\t\t\t\t2D_MAX_HEIGHT\t %u\n", info.m_image2dMaxHeight);
548 	b3Printf("\t\t\t\t\t3D_MAX_WIDTH\t %u\n", info.m_image3dMaxWidth);
549 	b3Printf("\t\t\t\t\t3D_MAX_HEIGHT\t %u\n", info.m_image3dMaxHeight);
550 	b3Printf("\t\t\t\t\t3D_MAX_DEPTH\t %u\n", info.m_image3dMaxDepth);
551 	if (*info.m_deviceExtensions != 0)
552 	{
553 		b3Printf("\n  CL_DEVICE_EXTENSIONS:%s\n", info.m_deviceExtensions);
554 	}
555 	else
556 	{
557 		b3Printf("  CL_DEVICE_EXTENSIONS: None\n");
558 	}
559 	b3Printf("  CL_DEVICE_PREFERRED_VECTOR_WIDTH_<t>\t");
560 	b3Printf("CHAR %u, SHORT %u, INT %u,LONG %u, FLOAT %u, DOUBLE %u\n\n\n",
561 			 info.m_vecWidthChar, info.m_vecWidthShort, info.m_vecWidthInt, info.m_vecWidthLong, info.m_vecWidthFloat, info.m_vecWidthDouble);
562 }
563 
strip2(const char * name,const char * pattern)564 static const char* strip2(const char* name, const char* pattern)
565 {
566 	size_t const patlen = strlen(pattern);
567 	size_t patcnt = 0;
568 	const char* oriptr;
569 	const char* patloc;
570 	// find how many times the pattern occurs in the original string
571 	for (oriptr = name; (patloc = strstr(oriptr, pattern)); oriptr = patloc + patlen)
572 	{
573 		patcnt++;
574 	}
575 	return oriptr;
576 }
577 
b3OpenCLUtils_compileCLProgramFromString(cl_context clContext,cl_device_id device,const char * kernelSourceOrg,cl_int * pErrNum,const char * additionalMacrosArg,const char * clFileNameForCaching,bool disableBinaryCaching)578 cl_program b3OpenCLUtils_compileCLProgramFromString(cl_context clContext, cl_device_id device, const char* kernelSourceOrg, cl_int* pErrNum, const char* additionalMacrosArg, const char* clFileNameForCaching, bool disableBinaryCaching)
579 {
580 	const char* additionalMacros = additionalMacrosArg ? additionalMacrosArg : "";
581 
582 	if (disableBinaryCaching)
583 	{
584 		//kernelSourceOrg = 0;
585 	}
586 
587 	cl_program m_cpProgram = 0;
588 	cl_int status;
589 
590 	char binaryFileName[B3_MAX_STRING_LENGTH];
591 
592 	char deviceName[256];
593 	char driverVersion[256];
594 	const char* strippedName;
595 	int fileUpToDate = 0;
596 #ifdef _WIN32
597 	int binaryFileValid = 0;
598 #endif
599 	if (!disableBinaryCaching && clFileNameForCaching)
600 	{
601 		clGetDeviceInfo(device, CL_DEVICE_NAME, 256, &deviceName, NULL);
602 		clGetDeviceInfo(device, CL_DRIVER_VERSION, 256, &driverVersion, NULL);
603 
604 		strippedName = strip2(clFileNameForCaching, "\\");
605 		strippedName = strip2(strippedName, "/");
606 
607 #ifdef _MSC_VER
608 		sprintf_s(binaryFileName, B3_MAX_STRING_LENGTH, "%s/%s.%s.%s.bin", sCachedBinaryPath, strippedName, deviceName, driverVersion);
609 #else
610 		sprintf(binaryFileName, "%s/%s.%s.%s.bin", sCachedBinaryPath, strippedName, deviceName, driverVersion);
611 #endif
612 	}
613 	if (clFileNameForCaching && !(disableBinaryCaching || gDebugSkipLoadingBinary || gDebugForceLoadingFromSource))
614 	{
615 #ifdef _WIN32
616 		char* bla = 0;
617 
618 		//printf("searching for %s\n", binaryFileName);
619 
620 		FILETIME modtimeBinary;
621 		CreateDirectoryA(sCachedBinaryPath, 0);
622 		{
623 			HANDLE binaryFileHandle = CreateFileA(binaryFileName, GENERIC_READ, 0, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
624 			if (binaryFileHandle == INVALID_HANDLE_VALUE)
625 			{
626 				DWORD errorCode;
627 				errorCode = GetLastError();
628 				switch (errorCode)
629 				{
630 					case ERROR_FILE_NOT_FOUND:
631 					{
632 						b3Warning("\nCached file not found %s\n", binaryFileName);
633 						break;
634 					}
635 					case ERROR_PATH_NOT_FOUND:
636 					{
637 						b3Warning("\nCached file path not found %s\n", binaryFileName);
638 						break;
639 					}
640 					default:
641 					{
642 						b3Warning("\nFailed reading cached file with errorCode = %d\n", errorCode);
643 					}
644 				}
645 			}
646 			else
647 			{
648 				if (GetFileTime(binaryFileHandle, NULL, NULL, &modtimeBinary) == 0)
649 				{
650 					DWORD errorCode;
651 					errorCode = GetLastError();
652 					b3Warning("\nGetFileTime errorCode = %d\n", errorCode);
653 				}
654 				else
655 				{
656 					binaryFileValid = 1;
657 				}
658 				CloseHandle(binaryFileHandle);
659 			}
660 
661 			if (binaryFileValid)
662 			{
663 				HANDLE srcFileHandle = CreateFileA(clFileNameForCaching, GENERIC_READ, 0, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
664 
665 				if (srcFileHandle == INVALID_HANDLE_VALUE)
666 				{
667 					const char* prefix[] = {"./", "../", "../../", "../../../", "../../../../"};
668 					for (int i = 0; (srcFileHandle == INVALID_HANDLE_VALUE) && i < 5; i++)
669 					{
670 						char relativeFileName[1024];
671 						sprintf(relativeFileName, "%s%s", prefix[i], clFileNameForCaching);
672 						srcFileHandle = CreateFileA(relativeFileName, GENERIC_READ, 0, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
673 					}
674 				}
675 
676 				if (srcFileHandle != INVALID_HANDLE_VALUE)
677 				{
678 					FILETIME modtimeSrc;
679 					if (GetFileTime(srcFileHandle, NULL, NULL, &modtimeSrc) == 0)
680 					{
681 						DWORD errorCode;
682 						errorCode = GetLastError();
683 						b3Warning("\nGetFileTime errorCode = %d\n", errorCode);
684 					}
685 					if ((modtimeSrc.dwHighDateTime < modtimeBinary.dwHighDateTime) || ((modtimeSrc.dwHighDateTime == modtimeBinary.dwHighDateTime) && (modtimeSrc.dwLowDateTime <= modtimeBinary.dwLowDateTime)))
686 					{
687 						fileUpToDate = 1;
688 					}
689 					else
690 					{
691 						b3Warning("\nCached binary file out-of-date (%s)\n", binaryFileName);
692 					}
693 					CloseHandle(srcFileHandle);
694 				}
695 				else
696 				{
697 #ifdef _DEBUG
698 					DWORD errorCode;
699 					errorCode = GetLastError();
700 					switch (errorCode)
701 					{
702 						case ERROR_FILE_NOT_FOUND:
703 						{
704 							b3Warning("\nSrc file not found %s\n", clFileNameForCaching);
705 							break;
706 						}
707 						case ERROR_PATH_NOT_FOUND:
708 						{
709 							b3Warning("\nSrc path not found %s\n", clFileNameForCaching);
710 							break;
711 						}
712 						default:
713 						{
714 							b3Warning("\nnSrc file reading errorCode = %d\n", errorCode);
715 						}
716 					}
717 
718 					//we should make sure the src file exists so we can verify the timestamp with binary
719 					//					assert(0);
720 					b3Warning("Warning: cannot find OpenCL kernel %s to verify timestamp of binary cached kernel %s\n", clFileNameForCaching, binaryFileName);
721 					fileUpToDate = true;
722 #else
723 					//if we cannot find the source, assume it is OK in release builds
724 					fileUpToDate = true;
725 #endif
726 				}
727 			}
728 		}
729 
730 #else
731 		fileUpToDate = true;
732 		if (mkdir(sCachedBinaryPath, 0777) == -1)
733 		{
734 		}
735 		else
736 		{
737 			b3Printf("Succesfully created cache directory: %s\n", sCachedBinaryPath);
738 		}
739 #endif  //_WIN32
740 	}
741 
742 	if (fileUpToDate)
743 	{
744 #ifdef _MSC_VER
745 		FILE* file;
746 		if (fopen_s(&file, binaryFileName, "rb") != 0)
747 			file = 0;
748 #else
749 		FILE* file = fopen(binaryFileName, "rb");
750 #endif
751 
752 		if (file)
753 		{
754 			size_t binarySize = 0;
755 			char* binary = 0;
756 
757 			fseek(file, 0L, SEEK_END);
758 			binarySize = ftell(file);
759 			rewind(file);
760 			binary = (char*)malloc(sizeof(char) * binarySize);
761 			int bytesRead;
762 			bytesRead = fread(binary, sizeof(char), binarySize, file);
763 			fclose(file);
764 
765 			m_cpProgram = clCreateProgramWithBinary(clContext, 1, &device, &binarySize, (const unsigned char**)&binary, 0, &status);
766 			b3Assert(status == CL_SUCCESS);
767 			status = clBuildProgram(m_cpProgram, 1, &device, additionalMacros, 0, 0);
768 			b3Assert(status == CL_SUCCESS);
769 
770 			if (status != CL_SUCCESS)
771 			{
772 				char* build_log;
773 				size_t ret_val_size;
774 				clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
775 				build_log = (char*)malloc(sizeof(char) * (ret_val_size + 1));
776 				clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
777 				build_log[ret_val_size] = '\0';
778 				b3Error("%s\n", build_log);
779 				free(build_log);
780 				b3Assert(0);
781 				m_cpProgram = 0;
782 
783 				b3Warning("clBuildProgram reported failure on cached binary: %s\n", binaryFileName);
784 			}
785 			else
786 			{
787 				b3Printf("clBuildProgram successfully compiled cached binary: %s\n", binaryFileName);
788 			}
789 			free(binary);
790 		}
791 		else
792 		{
793 			b3Warning("Cannot open cached binary: %s\n", binaryFileName);
794 		}
795 	}
796 
797 	if (!m_cpProgram)
798 	{
799 		cl_int localErrNum;
800 		char* compileFlags;
801 		int flagsize;
802 
803 		const char* kernelSource = kernelSourceOrg;
804 
805 		if (!kernelSourceOrg || gDebugForceLoadingFromSource)
806 		{
807 			if (clFileNameForCaching)
808 			{
809 				FILE* file = fopen(clFileNameForCaching, "rb");
810 				//in many cases the relative path is a few levels up the directory hierarchy, so try it
811 				if (!file)
812 				{
813 					const char* prefix[] = {"../", "../../", "../../../", "../../../../"};
814 					for (int i = 0; !file && i < 3; i++)
815 					{
816 						char relativeFileName[1024];
817 						sprintf(relativeFileName, "%s%s", prefix[i], clFileNameForCaching);
818 						file = fopen(relativeFileName, "rb");
819 					}
820 				}
821 
822 				if (file)
823 				{
824 					char* kernelSrc = 0;
825 					fseek(file, 0L, SEEK_END);
826 					int kernelSize = ftell(file);
827 					rewind(file);
828 					kernelSrc = (char*)malloc(kernelSize + 1);
829 					int readBytes;
830 					readBytes = fread((void*)kernelSrc, 1, kernelSize, file);
831 					kernelSrc[kernelSize] = 0;
832 					fclose(file);
833 					kernelSource = kernelSrc;
834 				}
835 			}
836 		}
837 
838 		size_t program_length = kernelSource ? strlen(kernelSource) : 0;
839 #ifdef MAC  //or __APPLE__?
840 		char* flags = "-cl-mad-enable -DMAC ";
841 #else
842 		const char* flags = "";
843 #endif
844 
845 		m_cpProgram = clCreateProgramWithSource(clContext, 1, (const char**)&kernelSource, &program_length, &localErrNum);
846 		if (localErrNum != CL_SUCCESS)
847 		{
848 			if (pErrNum)
849 				*pErrNum = localErrNum;
850 			return 0;
851 		}
852 
853 		// Build the program with 'mad' Optimization option
854 
855 		flagsize = sizeof(char) * (strlen(additionalMacros) + strlen(flags) + 5);
856 		compileFlags = (char*)malloc(flagsize);
857 #ifdef _MSC_VER
858 		sprintf_s(compileFlags, flagsize, "%s %s", flags, additionalMacros);
859 #else
860 		sprintf(compileFlags, "%s %s", flags, additionalMacros);
861 #endif
862 		localErrNum = clBuildProgram(m_cpProgram, 1, &device, compileFlags, NULL, NULL);
863 		if (localErrNum != CL_SUCCESS)
864 		{
865 			char* build_log;
866 			size_t ret_val_size;
867 			clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
868 			build_log = (char*)malloc(sizeof(char) * (ret_val_size + 1));
869 			clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
870 
871 			// to be carefully, terminate with \0
872 			// there's no information in the reference whether the string is 0 terminated or not
873 			build_log[ret_val_size] = '\0';
874 
875 			b3Error("Error in clBuildProgram, Line %u in file %s, Log: \n%s\n !!!\n\n", __LINE__, __FILE__, build_log);
876 			free(build_log);
877 			if (pErrNum)
878 				*pErrNum = localErrNum;
879 			return 0;
880 		}
881 
882 		if (!disableBinaryCaching && clFileNameForCaching)
883 		{  //	write to binary
884 
885 			cl_uint numAssociatedDevices;
886 			status = clGetProgramInfo(m_cpProgram, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &numAssociatedDevices, 0);
887 			b3Assert(status == CL_SUCCESS);
888 			if (numAssociatedDevices == 1)
889 			{
890 				size_t binarySize;
891 				char* binary;
892 
893 				status = clGetProgramInfo(m_cpProgram, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binarySize, 0);
894 				b3Assert(status == CL_SUCCESS);
895 
896 				binary = (char*)malloc(sizeof(char) * binarySize);
897 
898 				status = clGetProgramInfo(m_cpProgram, CL_PROGRAM_BINARIES, sizeof(char*), &binary, 0);
899 				b3Assert(status == CL_SUCCESS);
900 
901 				{
902 					FILE* file = 0;
903 #ifdef _MSC_VER
904 					if (fopen_s(&file, binaryFileName, "wb") != 0)
905 						file = 0;
906 #else
907 					file = fopen(binaryFileName, "wb");
908 #endif
909 					if (file)
910 					{
911 						fwrite(binary, sizeof(char), binarySize, file);
912 						fclose(file);
913 					}
914 					else
915 					{
916 						b3Warning("cannot write file %s\n", binaryFileName);
917 					}
918 				}
919 
920 				free(binary);
921 			}
922 		}
923 
924 		free(compileFlags);
925 	}
926 	return m_cpProgram;
927 }
928 
b3OpenCLUtils_compileCLKernelFromString(cl_context clContext,cl_device_id device,const char * kernelSource,const char * kernelName,cl_int * pErrNum,cl_program prog,const char * additionalMacros)929 cl_kernel b3OpenCLUtils_compileCLKernelFromString(cl_context clContext, cl_device_id device, const char* kernelSource, const char* kernelName, cl_int* pErrNum, cl_program prog, const char* additionalMacros)
930 {
931 	cl_kernel kernel;
932 	cl_int localErrNum;
933 
934 	cl_program m_cpProgram = prog;
935 
936 	b3Printf("compiling kernel %s ", kernelName);
937 
938 	if (!m_cpProgram)
939 	{
940 		m_cpProgram = b3OpenCLUtils_compileCLProgramFromString(clContext, device, kernelSource, pErrNum, additionalMacros, 0, false);
941 	}
942 
943 	// Create the kernel
944 	kernel = clCreateKernel(m_cpProgram, kernelName, &localErrNum);
945 	if (localErrNum != CL_SUCCESS)
946 	{
947 		b3Error("Error in clCreateKernel, Line %u in file %s, cannot find kernel function %s !!!\n\n", __LINE__, __FILE__, kernelName);
948 		assert(0);
949 		if (pErrNum)
950 			*pErrNum = localErrNum;
951 		return 0;
952 	}
953 
954 	if (!prog && m_cpProgram)
955 	{
956 		clReleaseProgram(m_cpProgram);
957 	}
958 	b3Printf("ready. \n");
959 
960 	if (pErrNum)
961 		*pErrNum = CL_SUCCESS;
962 	return kernel;
963 }
964