1 /*
2 Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
3 Copyright (C) 2006 - 2011 Sony Computer Entertainment Inc.
4
5 This software is provided 'as-is', without any express or implied warranty.
6 In no event will the authors be held liable for any damages arising from the use of this software.
7 Permission is granted to anyone to use this software for any purpose,
8 including commercial applications, and to alter it and redistribute it freely,
9 subject to the following restrictions:
10
11 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
12 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
13 3. This notice may not be removed or altered from any source distribution.
14 */
15
16 //Original author: Roman Ponomarev
17 //Mostly Reimplemented by Erwin Coumans
18
19 bool gDebugForceLoadingFromSource = false;
20 bool gDebugSkipLoadingBinary = false;
21
22 #include "Bullet3Common/b3Logging.h"
23
24 #include <string.h>
25
26 #ifdef _WIN32
27 #pragma warning(disable : 4996)
28 #endif
29 #include "b3OpenCLUtils.h"
30 //#include "b3OpenCLInclude.h"
31
32 #include <stdio.h>
33 #include <stdlib.h>
34
35 #define B3_MAX_CL_DEVICES 16 //who needs 16 devices?
36
37 #ifdef _WIN32
38 #include <windows.h>
39 #endif
40
41 #include <assert.h>
42 #define b3Assert assert
43 #ifndef _WIN32
44 #include <sys/stat.h>
45
46 #endif
47
48 static const char* sCachedBinaryPath = "cache";
49
50 //Set the preferred platform vendor using the OpenCL SDK
51 static const char* spPlatformVendor =
52 #if defined(CL_PLATFORM_MINI_CL)
53 "MiniCL, SCEA";
54 #elif defined(CL_PLATFORM_AMD)
55 "Advanced Micro Devices, Inc.";
56 #elif defined(CL_PLATFORM_NVIDIA)
57 "NVIDIA Corporation";
58 #elif defined(CL_PLATFORM_INTEL)
59 "Intel(R) Corporation";
60 #elif defined(B3_USE_CLEW)
61 "clew (OpenCL Extension Wrangler library)";
62 #else
63 "Unknown Vendor";
64 #endif
65
66 #ifndef CL_PLATFORM_MINI_CL
67 #ifdef _WIN32
68 #ifndef B3_USE_CLEW
69 #include "CL/cl_gl.h"
70 #endif //B3_USE_CLEW
71 #endif //_WIN32
72 #endif
73
MyFatalBreakAPPLE(const char * errstr,const void * private_info,size_t cb,void * user_data)74 void MyFatalBreakAPPLE(const char* errstr,
75 const void* private_info,
76 size_t cb,
77 void* user_data)
78 {
79 const char* patloc = strstr(errstr, "Warning");
80 //find out if it is a warning or error, exit if error
81
82 if (patloc)
83 {
84 b3Warning("Warning: %s\n", errstr);
85 }
86 else
87 {
88 b3Error("Error: %s\n", errstr);
89 b3Assert(0);
90 }
91 }
92
93 #ifdef B3_USE_CLEW
94
b3OpenCLUtils_clewInit()95 int b3OpenCLUtils_clewInit()
96 {
97 int result = -1;
98
99 #ifdef _WIN32
100 const char* cl = "OpenCL.dll";
101 #elif defined __APPLE__
102 const char* cl = "/System/Library/Frameworks/OpenCL.framework/Versions/Current/OpenCL";
103 #else //presumable Linux? \
104 //linux (tested on Ubuntu 12.10 with Catalyst 13.4 beta drivers, not that there is no symbolic link from libOpenCL.so
105 const char* cl = "libOpenCL.so.1";
106 result = clewInit(cl);
107 if (result != CLEW_SUCCESS)
108 {
109 cl = "libOpenCL.so";
110 }
111 else
112 {
113 clewExit();
114 }
115 #endif
116 result = clewInit(cl);
117 if (result != CLEW_SUCCESS)
118 {
119 b3Error("clewInit failed with error code %d\n", result);
120 }
121 else
122 {
123 b3Printf("clewInit succesfull using %s\n", cl);
124 }
125 return result;
126 }
127 #endif
128
b3OpenCLUtils_getNumPlatforms(cl_int * pErrNum)129 int b3OpenCLUtils_getNumPlatforms(cl_int* pErrNum)
130 {
131 #ifdef B3_USE_CLEW
132 b3OpenCLUtils_clewInit();
133 #endif
134
135 cl_platform_id pPlatforms[10] = {0};
136
137 cl_uint numPlatforms = 0;
138 cl_int ciErrNum = clGetPlatformIDs(10, pPlatforms, &numPlatforms);
139 //cl_int ciErrNum = clGetPlatformIDs(0, NULL, &numPlatforms);
140
141 if (ciErrNum != CL_SUCCESS)
142 {
143 if (pErrNum != NULL)
144 *pErrNum = ciErrNum;
145 }
146 return numPlatforms;
147 }
148
b3OpenCLUtils_getSdkVendorName()149 const char* b3OpenCLUtils_getSdkVendorName()
150 {
151 return spPlatformVendor;
152 }
153
b3OpenCLUtils_setCachePath(const char * path)154 void b3OpenCLUtils_setCachePath(const char* path)
155 {
156 sCachedBinaryPath = path;
157 }
158
b3OpenCLUtils_getPlatform(int platformIndex0,cl_int * pErrNum)159 cl_platform_id b3OpenCLUtils_getPlatform(int platformIndex0, cl_int* pErrNum)
160 {
161 #ifdef B3_USE_CLEW
162 b3OpenCLUtils_clewInit();
163 #endif
164
165 cl_platform_id platform = 0;
166 unsigned int platformIndex = (unsigned int)platformIndex0;
167 cl_uint numPlatforms;
168 cl_int ciErrNum = clGetPlatformIDs(0, NULL, &numPlatforms);
169
170 if (platformIndex < numPlatforms)
171 {
172 cl_platform_id* platforms = (cl_platform_id*)malloc(sizeof(cl_platform_id) * numPlatforms);
173 ciErrNum = clGetPlatformIDs(numPlatforms, platforms, NULL);
174 if (ciErrNum != CL_SUCCESS)
175 {
176 if (pErrNum != NULL)
177 *pErrNum = ciErrNum;
178 return platform;
179 }
180
181 platform = platforms[platformIndex];
182
183 free(platforms);
184 }
185
186 return platform;
187 }
188
getPlatformInfo(cl_platform_id platform,b3OpenCLPlatformInfo * platformInfo)189 void b3OpenCLUtils::getPlatformInfo(cl_platform_id platform, b3OpenCLPlatformInfo* platformInfo)
190 {
191 b3Assert(platform);
192 cl_int ciErrNum;
193 ciErrNum = clGetPlatformInfo(platform, CL_PLATFORM_VENDOR, B3_MAX_STRING_LENGTH, platformInfo->m_platformVendor, NULL);
194 oclCHECKERROR(ciErrNum, CL_SUCCESS);
195 ciErrNum = clGetPlatformInfo(platform, CL_PLATFORM_NAME, B3_MAX_STRING_LENGTH, platformInfo->m_platformName, NULL);
196 oclCHECKERROR(ciErrNum, CL_SUCCESS);
197 ciErrNum = clGetPlatformInfo(platform, CL_PLATFORM_VERSION, B3_MAX_STRING_LENGTH, platformInfo->m_platformVersion, NULL);
198 oclCHECKERROR(ciErrNum, CL_SUCCESS);
199 }
200
b3OpenCLUtils_printPlatformInfo(cl_platform_id platform)201 void b3OpenCLUtils_printPlatformInfo(cl_platform_id platform)
202 {
203 b3OpenCLPlatformInfo platformInfo;
204 b3OpenCLUtils::getPlatformInfo(platform, &platformInfo);
205 b3Printf("Platform info:\n");
206 b3Printf(" CL_PLATFORM_VENDOR: \t\t\t%s\n", platformInfo.m_platformVendor);
207 b3Printf(" CL_PLATFORM_NAME: \t\t\t%s\n", platformInfo.m_platformName);
208 b3Printf(" CL_PLATFORM_VERSION: \t\t\t%s\n", platformInfo.m_platformVersion);
209 }
210
b3OpenCLUtils_createContextFromPlatform(cl_platform_id platform,cl_device_type deviceType,cl_int * pErrNum,void * pGLContext,void * pGLDC,int preferredDeviceIndex,int preferredPlatformIndex)211 cl_context b3OpenCLUtils_createContextFromPlatform(cl_platform_id platform, cl_device_type deviceType, cl_int* pErrNum, void* pGLContext, void* pGLDC, int preferredDeviceIndex, int preferredPlatformIndex)
212 {
213 cl_context retContext = 0;
214 cl_int ciErrNum = 0;
215 cl_uint num_entries;
216 cl_device_id devices[B3_MAX_CL_DEVICES];
217 cl_uint num_devices;
218 cl_context_properties* cprops;
219
220 /*
221 * If we could find our platform, use it. Otherwise pass a NULL and get whatever the
222 * implementation thinks we should be using.
223 */
224 cl_context_properties cps[7] = {0, 0, 0, 0, 0, 0, 0};
225 cps[0] = CL_CONTEXT_PLATFORM;
226 cps[1] = (cl_context_properties)platform;
227 #ifdef _WIN32
228 #ifndef B3_USE_CLEW
229 if (pGLContext && pGLDC)
230 {
231 cps[2] = CL_GL_CONTEXT_KHR;
232 cps[3] = (cl_context_properties)pGLContext;
233 cps[4] = CL_WGL_HDC_KHR;
234 cps[5] = (cl_context_properties)pGLDC;
235 }
236 #endif //B3_USE_CLEW
237 #endif //_WIN32
238 num_entries = B3_MAX_CL_DEVICES;
239
240 num_devices = -1;
241
242 ciErrNum = clGetDeviceIDs(
243 platform,
244 deviceType,
245 num_entries,
246 devices,
247 &num_devices);
248
249 if (ciErrNum < 0)
250 {
251 b3Printf("clGetDeviceIDs returned %d\n", ciErrNum);
252 return 0;
253 }
254 cprops = (NULL == platform) ? NULL : cps;
255
256 if (!num_devices)
257 return 0;
258
259 if (pGLContext)
260 {
261 //search for the GPU that relates to the OpenCL context
262 unsigned int i;
263 for (i = 0; i < num_devices; i++)
264 {
265 retContext = clCreateContext(cprops, 1, &devices[i], NULL, NULL, &ciErrNum);
266 if (ciErrNum == CL_SUCCESS)
267 break;
268 }
269 }
270 else
271 {
272 if (preferredDeviceIndex >= 0 && (unsigned int)preferredDeviceIndex < num_devices)
273 {
274 //create a context of the preferred device index
275 retContext = clCreateContext(cprops, 1, &devices[preferredDeviceIndex], NULL, NULL, &ciErrNum);
276 }
277 else
278 {
279 //create a context of all devices
280 #if defined(__APPLE__)
281 retContext = clCreateContext(cprops, num_devices, devices, MyFatalBreakAPPLE, NULL, &ciErrNum);
282 #else
283 b3Printf("numDevices=%d\n", num_devices);
284
285 retContext = clCreateContext(cprops, num_devices, devices, NULL, NULL, &ciErrNum);
286 #endif
287 }
288 }
289 if (pErrNum != NULL)
290 {
291 *pErrNum = ciErrNum;
292 };
293
294 return retContext;
295 }
296
b3OpenCLUtils_createContextFromType(cl_device_type deviceType,cl_int * pErrNum,void * pGLContext,void * pGLDC,int preferredDeviceIndex,int preferredPlatformIndex,cl_platform_id * retPlatformId)297 cl_context b3OpenCLUtils_createContextFromType(cl_device_type deviceType, cl_int* pErrNum, void* pGLContext, void* pGLDC, int preferredDeviceIndex, int preferredPlatformIndex, cl_platform_id* retPlatformId)
298 {
299 #ifdef B3_USE_CLEW
300 b3OpenCLUtils_clewInit();
301 #endif
302
303 cl_uint numPlatforms;
304 cl_context retContext = 0;
305 unsigned int i;
306
307 cl_int ciErrNum = clGetPlatformIDs(0, NULL, &numPlatforms);
308 if (ciErrNum != CL_SUCCESS)
309 {
310 if (pErrNum != NULL) *pErrNum = ciErrNum;
311 return NULL;
312 }
313 if (numPlatforms > 0)
314 {
315 cl_platform_id* platforms = (cl_platform_id*)malloc(sizeof(cl_platform_id) * numPlatforms);
316 ciErrNum = clGetPlatformIDs(numPlatforms, platforms, NULL);
317 if (ciErrNum != CL_SUCCESS)
318 {
319 if (pErrNum != NULL)
320 *pErrNum = ciErrNum;
321 free(platforms);
322 return NULL;
323 }
324
325 for (i = 0; i < numPlatforms; ++i)
326 {
327 char pbuf[128];
328 ciErrNum = clGetPlatformInfo(platforms[i],
329 CL_PLATFORM_VENDOR,
330 sizeof(pbuf),
331 pbuf,
332 NULL);
333 if (ciErrNum != CL_SUCCESS)
334 {
335 if (pErrNum != NULL) *pErrNum = ciErrNum;
336 return NULL;
337 }
338
339 if (preferredPlatformIndex >= 0 && i == preferredPlatformIndex)
340 {
341 cl_platform_id tmpPlatform = platforms[0];
342 platforms[0] = platforms[i];
343 platforms[i] = tmpPlatform;
344 break;
345 }
346 else
347 {
348 if (!strcmp(pbuf, spPlatformVendor))
349 {
350 cl_platform_id tmpPlatform = platforms[0];
351 platforms[0] = platforms[i];
352 platforms[i] = tmpPlatform;
353 }
354 }
355 }
356
357 for (i = 0; i < numPlatforms; ++i)
358 {
359 cl_platform_id platform = platforms[i];
360 assert(platform);
361
362 retContext = b3OpenCLUtils_createContextFromPlatform(platform, deviceType, pErrNum, pGLContext, pGLDC, preferredDeviceIndex, preferredPlatformIndex);
363
364 if (retContext)
365 {
366 // printf("OpenCL platform details:\n");
367 b3OpenCLPlatformInfo platformInfo;
368
369 b3OpenCLUtils::getPlatformInfo(platform, &platformInfo);
370
371 if (retPlatformId)
372 *retPlatformId = platform;
373
374 break;
375 }
376 }
377
378 free(platforms);
379 }
380 return retContext;
381 }
382
383 //////////////////////////////////////////////////////////////////////////////
384 //! Gets the id of the nth device from the context
385 //!
386 //! @return the id or -1 when out of range
387 //! @param cxMainContext OpenCL context
388 //! @param device_idx index of the device of interest
389 //////////////////////////////////////////////////////////////////////////////
b3OpenCLUtils_getDevice(cl_context cxMainContext,int deviceIndex)390 cl_device_id b3OpenCLUtils_getDevice(cl_context cxMainContext, int deviceIndex)
391 {
392 assert(cxMainContext);
393
394 size_t szParmDataBytes;
395 cl_device_id* cdDevices;
396 cl_device_id device;
397
398 // get the list of devices associated with context
399 clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);
400
401 if (szParmDataBytes / sizeof(cl_device_id) < (unsigned int)deviceIndex)
402 {
403 return (cl_device_id)-1;
404 }
405
406 cdDevices = (cl_device_id*)malloc(szParmDataBytes);
407
408 clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);
409
410 device = cdDevices[deviceIndex];
411 free(cdDevices);
412
413 return device;
414 }
415
b3OpenCLUtils_getNumDevices(cl_context cxMainContext)416 int b3OpenCLUtils_getNumDevices(cl_context cxMainContext)
417 {
418 size_t szParamDataBytes;
419 int device_count;
420 clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, 0, NULL, &szParamDataBytes);
421 device_count = (int)szParamDataBytes / sizeof(cl_device_id);
422 return device_count;
423 }
424
getDeviceInfo(cl_device_id device,b3OpenCLDeviceInfo * info)425 void b3OpenCLUtils::getDeviceInfo(cl_device_id device, b3OpenCLDeviceInfo* info)
426 {
427 // CL_DEVICE_NAME
428 clGetDeviceInfo(device, CL_DEVICE_NAME, B3_MAX_STRING_LENGTH, &info->m_deviceName, NULL);
429
430 // CL_DEVICE_VENDOR
431 clGetDeviceInfo(device, CL_DEVICE_VENDOR, B3_MAX_STRING_LENGTH, &info->m_deviceVendor, NULL);
432
433 // CL_DRIVER_VERSION
434 clGetDeviceInfo(device, CL_DRIVER_VERSION, B3_MAX_STRING_LENGTH, &info->m_driverVersion, NULL);
435
436 // CL_DEVICE_INFO
437 clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(cl_device_type), &info->m_deviceType, NULL);
438
439 // CL_DEVICE_MAX_COMPUTE_UNITS
440 clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(info->m_computeUnits), &info->m_computeUnits, NULL);
441
442 // CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS
443 clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(info->m_workitemDims), &info->m_workitemDims, NULL);
444
445 // CL_DEVICE_MAX_WORK_ITEM_SIZES
446 clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(info->m_workItemSize), &info->m_workItemSize, NULL);
447
448 // CL_DEVICE_MAX_WORK_GROUP_SIZE
449 clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(info->m_workgroupSize), &info->m_workgroupSize, NULL);
450
451 // CL_DEVICE_MAX_CLOCK_FREQUENCY
452 clGetDeviceInfo(device, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(info->m_clockFrequency), &info->m_clockFrequency, NULL);
453
454 // CL_DEVICE_ADDRESS_BITS
455 clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(info->m_addressBits), &info->m_addressBits, NULL);
456
457 // CL_DEVICE_MAX_MEM_ALLOC_SIZE
458 clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(info->m_maxMemAllocSize), &info->m_maxMemAllocSize, NULL);
459
460 // CL_DEVICE_GLOBAL_MEM_SIZE
461 clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(info->m_globalMemSize), &info->m_globalMemSize, NULL);
462
463 // CL_DEVICE_ERROR_CORRECTION_SUPPORT
464 clGetDeviceInfo(device, CL_DEVICE_ERROR_CORRECTION_SUPPORT, sizeof(info->m_errorCorrectionSupport), &info->m_errorCorrectionSupport, NULL);
465
466 // CL_DEVICE_LOCAL_MEM_TYPE
467 clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_TYPE, sizeof(info->m_localMemType), &info->m_localMemType, NULL);
468
469 // CL_DEVICE_LOCAL_MEM_SIZE
470 clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(info->m_localMemSize), &info->m_localMemSize, NULL);
471
472 // CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE
473 clGetDeviceInfo(device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(info->m_constantBufferSize), &info->m_constantBufferSize, NULL);
474
475 // CL_DEVICE_QUEUE_PROPERTIES
476 clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES, sizeof(info->m_queueProperties), &info->m_queueProperties, NULL);
477
478 // CL_DEVICE_IMAGE_SUPPORT
479 clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(info->m_imageSupport), &info->m_imageSupport, NULL);
480
481 // CL_DEVICE_MAX_READ_IMAGE_ARGS
482 clGetDeviceInfo(device, CL_DEVICE_MAX_READ_IMAGE_ARGS, sizeof(info->m_maxReadImageArgs), &info->m_maxReadImageArgs, NULL);
483
484 // CL_DEVICE_MAX_WRITE_IMAGE_ARGS
485 clGetDeviceInfo(device, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, sizeof(info->m_maxWriteImageArgs), &info->m_maxWriteImageArgs, NULL);
486
487 // CL_DEVICE_IMAGE2D_MAX_WIDTH, CL_DEVICE_IMAGE2D_MAX_HEIGHT, CL_DEVICE_IMAGE3D_MAX_WIDTH, CL_DEVICE_IMAGE3D_MAX_HEIGHT, CL_DEVICE_IMAGE3D_MAX_DEPTH
488 clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), &info->m_image2dMaxWidth, NULL);
489 clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), &info->m_image2dMaxHeight, NULL);
490 clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(size_t), &info->m_image3dMaxWidth, NULL);
491 clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(size_t), &info->m_image3dMaxHeight, NULL);
492 clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(size_t), &info->m_image3dMaxDepth, NULL);
493
494 // CL_DEVICE_EXTENSIONS: get device extensions, and if any then parse & log the string onto separate lines
495 clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, B3_MAX_STRING_LENGTH, &info->m_deviceExtensions, NULL);
496
497 // CL_DEVICE_PREFERRED_VECTOR_WIDTH_<type>
498 clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, sizeof(cl_uint), &info->m_vecWidthChar, NULL);
499 clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, sizeof(cl_uint), &info->m_vecWidthShort, NULL);
500 clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), &info->m_vecWidthInt, NULL);
501 clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, sizeof(cl_uint), &info->m_vecWidthLong, NULL);
502 clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, sizeof(cl_uint), &info->m_vecWidthFloat, NULL);
503 clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, sizeof(cl_uint), &info->m_vecWidthDouble, NULL);
504 }
505
b3OpenCLUtils_printDeviceInfo(cl_device_id device)506 void b3OpenCLUtils_printDeviceInfo(cl_device_id device)
507 {
508 b3OpenCLDeviceInfo info;
509 b3OpenCLUtils::getDeviceInfo(device, &info);
510 b3Printf("Device Info:\n");
511 b3Printf(" CL_DEVICE_NAME: \t\t\t%s\n", info.m_deviceName);
512 b3Printf(" CL_DEVICE_VENDOR: \t\t\t%s\n", info.m_deviceVendor);
513 b3Printf(" CL_DRIVER_VERSION: \t\t\t%s\n", info.m_driverVersion);
514
515 if (info.m_deviceType & CL_DEVICE_TYPE_CPU)
516 b3Printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_CPU");
517 if (info.m_deviceType & CL_DEVICE_TYPE_GPU)
518 b3Printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_GPU");
519 if (info.m_deviceType & CL_DEVICE_TYPE_ACCELERATOR)
520 b3Printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_ACCELERATOR");
521 if (info.m_deviceType & CL_DEVICE_TYPE_DEFAULT)
522 b3Printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_DEFAULT");
523
524 b3Printf(" CL_DEVICE_MAX_COMPUTE_UNITS:\t\t%u\n", info.m_computeUnits);
525 b3Printf(" CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS:\t%u\n", info.m_workitemDims);
526 b3Printf(" CL_DEVICE_MAX_WORK_ITEM_SIZES:\t%u / %u / %u \n", info.m_workItemSize[0], info.m_workItemSize[1], info.m_workItemSize[2]);
527 b3Printf(" CL_DEVICE_MAX_WORK_GROUP_SIZE:\t%u\n", info.m_workgroupSize);
528 b3Printf(" CL_DEVICE_MAX_CLOCK_FREQUENCY:\t%u MHz\n", info.m_clockFrequency);
529 b3Printf(" CL_DEVICE_ADDRESS_BITS:\t\t%u\n", info.m_addressBits);
530 b3Printf(" CL_DEVICE_MAX_MEM_ALLOC_SIZE:\t\t%u MByte\n", (unsigned int)(info.m_maxMemAllocSize / (1024 * 1024)));
531 b3Printf(" CL_DEVICE_GLOBAL_MEM_SIZE:\t\t%u MByte\n", (unsigned int)(info.m_globalMemSize / (1024 * 1024)));
532 b3Printf(" CL_DEVICE_ERROR_CORRECTION_SUPPORT:\t%s\n", info.m_errorCorrectionSupport == CL_TRUE ? "yes" : "no");
533 b3Printf(" CL_DEVICE_LOCAL_MEM_TYPE:\t\t%s\n", info.m_localMemType == 1 ? "local" : "global");
534 b3Printf(" CL_DEVICE_LOCAL_MEM_SIZE:\t\t%u KByte\n", (unsigned int)(info.m_localMemSize / 1024));
535 b3Printf(" CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE:\t%u KByte\n", (unsigned int)(info.m_constantBufferSize / 1024));
536 if (info.m_queueProperties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)
537 b3Printf(" CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE");
538 if (info.m_queueProperties & CL_QUEUE_PROFILING_ENABLE)
539 b3Printf(" CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_PROFILING_ENABLE");
540
541 b3Printf(" CL_DEVICE_IMAGE_SUPPORT:\t\t%u\n", info.m_imageSupport);
542
543 b3Printf(" CL_DEVICE_MAX_READ_IMAGE_ARGS:\t%u\n", info.m_maxReadImageArgs);
544 b3Printf(" CL_DEVICE_MAX_WRITE_IMAGE_ARGS:\t%u\n", info.m_maxWriteImageArgs);
545 b3Printf("\n CL_DEVICE_IMAGE <dim>");
546 b3Printf("\t\t\t2D_MAX_WIDTH\t %u\n", info.m_image2dMaxWidth);
547 b3Printf("\t\t\t\t\t2D_MAX_HEIGHT\t %u\n", info.m_image2dMaxHeight);
548 b3Printf("\t\t\t\t\t3D_MAX_WIDTH\t %u\n", info.m_image3dMaxWidth);
549 b3Printf("\t\t\t\t\t3D_MAX_HEIGHT\t %u\n", info.m_image3dMaxHeight);
550 b3Printf("\t\t\t\t\t3D_MAX_DEPTH\t %u\n", info.m_image3dMaxDepth);
551 if (*info.m_deviceExtensions != 0)
552 {
553 b3Printf("\n CL_DEVICE_EXTENSIONS:%s\n", info.m_deviceExtensions);
554 }
555 else
556 {
557 b3Printf(" CL_DEVICE_EXTENSIONS: None\n");
558 }
559 b3Printf(" CL_DEVICE_PREFERRED_VECTOR_WIDTH_<t>\t");
560 b3Printf("CHAR %u, SHORT %u, INT %u,LONG %u, FLOAT %u, DOUBLE %u\n\n\n",
561 info.m_vecWidthChar, info.m_vecWidthShort, info.m_vecWidthInt, info.m_vecWidthLong, info.m_vecWidthFloat, info.m_vecWidthDouble);
562 }
563
strip2(const char * name,const char * pattern)564 static const char* strip2(const char* name, const char* pattern)
565 {
566 size_t const patlen = strlen(pattern);
567 size_t patcnt = 0;
568 const char* oriptr;
569 const char* patloc;
570 // find how many times the pattern occurs in the original string
571 for (oriptr = name; (patloc = strstr(oriptr, pattern)); oriptr = patloc + patlen)
572 {
573 patcnt++;
574 }
575 return oriptr;
576 }
577
b3OpenCLUtils_compileCLProgramFromString(cl_context clContext,cl_device_id device,const char * kernelSourceOrg,cl_int * pErrNum,const char * additionalMacrosArg,const char * clFileNameForCaching,bool disableBinaryCaching)578 cl_program b3OpenCLUtils_compileCLProgramFromString(cl_context clContext, cl_device_id device, const char* kernelSourceOrg, cl_int* pErrNum, const char* additionalMacrosArg, const char* clFileNameForCaching, bool disableBinaryCaching)
579 {
580 const char* additionalMacros = additionalMacrosArg ? additionalMacrosArg : "";
581
582 if (disableBinaryCaching)
583 {
584 //kernelSourceOrg = 0;
585 }
586
587 cl_program m_cpProgram = 0;
588 cl_int status;
589
590 char binaryFileName[B3_MAX_STRING_LENGTH];
591
592 char deviceName[256];
593 char driverVersion[256];
594 const char* strippedName;
595 int fileUpToDate = 0;
596 #ifdef _WIN32
597 int binaryFileValid = 0;
598 #endif
599 if (!disableBinaryCaching && clFileNameForCaching)
600 {
601 clGetDeviceInfo(device, CL_DEVICE_NAME, 256, &deviceName, NULL);
602 clGetDeviceInfo(device, CL_DRIVER_VERSION, 256, &driverVersion, NULL);
603
604 strippedName = strip2(clFileNameForCaching, "\\");
605 strippedName = strip2(strippedName, "/");
606
607 #ifdef _MSC_VER
608 sprintf_s(binaryFileName, B3_MAX_STRING_LENGTH, "%s/%s.%s.%s.bin", sCachedBinaryPath, strippedName, deviceName, driverVersion);
609 #else
610 sprintf(binaryFileName, "%s/%s.%s.%s.bin", sCachedBinaryPath, strippedName, deviceName, driverVersion);
611 #endif
612 }
613 if (clFileNameForCaching && !(disableBinaryCaching || gDebugSkipLoadingBinary || gDebugForceLoadingFromSource))
614 {
615 #ifdef _WIN32
616 char* bla = 0;
617
618 //printf("searching for %s\n", binaryFileName);
619
620 FILETIME modtimeBinary;
621 CreateDirectoryA(sCachedBinaryPath, 0);
622 {
623 HANDLE binaryFileHandle = CreateFileA(binaryFileName, GENERIC_READ, 0, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
624 if (binaryFileHandle == INVALID_HANDLE_VALUE)
625 {
626 DWORD errorCode;
627 errorCode = GetLastError();
628 switch (errorCode)
629 {
630 case ERROR_FILE_NOT_FOUND:
631 {
632 b3Warning("\nCached file not found %s\n", binaryFileName);
633 break;
634 }
635 case ERROR_PATH_NOT_FOUND:
636 {
637 b3Warning("\nCached file path not found %s\n", binaryFileName);
638 break;
639 }
640 default:
641 {
642 b3Warning("\nFailed reading cached file with errorCode = %d\n", errorCode);
643 }
644 }
645 }
646 else
647 {
648 if (GetFileTime(binaryFileHandle, NULL, NULL, &modtimeBinary) == 0)
649 {
650 DWORD errorCode;
651 errorCode = GetLastError();
652 b3Warning("\nGetFileTime errorCode = %d\n", errorCode);
653 }
654 else
655 {
656 binaryFileValid = 1;
657 }
658 CloseHandle(binaryFileHandle);
659 }
660
661 if (binaryFileValid)
662 {
663 HANDLE srcFileHandle = CreateFileA(clFileNameForCaching, GENERIC_READ, 0, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
664
665 if (srcFileHandle == INVALID_HANDLE_VALUE)
666 {
667 const char* prefix[] = {"./", "../", "../../", "../../../", "../../../../"};
668 for (int i = 0; (srcFileHandle == INVALID_HANDLE_VALUE) && i < 5; i++)
669 {
670 char relativeFileName[1024];
671 sprintf(relativeFileName, "%s%s", prefix[i], clFileNameForCaching);
672 srcFileHandle = CreateFileA(relativeFileName, GENERIC_READ, 0, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
673 }
674 }
675
676 if (srcFileHandle != INVALID_HANDLE_VALUE)
677 {
678 FILETIME modtimeSrc;
679 if (GetFileTime(srcFileHandle, NULL, NULL, &modtimeSrc) == 0)
680 {
681 DWORD errorCode;
682 errorCode = GetLastError();
683 b3Warning("\nGetFileTime errorCode = %d\n", errorCode);
684 }
685 if ((modtimeSrc.dwHighDateTime < modtimeBinary.dwHighDateTime) || ((modtimeSrc.dwHighDateTime == modtimeBinary.dwHighDateTime) && (modtimeSrc.dwLowDateTime <= modtimeBinary.dwLowDateTime)))
686 {
687 fileUpToDate = 1;
688 }
689 else
690 {
691 b3Warning("\nCached binary file out-of-date (%s)\n", binaryFileName);
692 }
693 CloseHandle(srcFileHandle);
694 }
695 else
696 {
697 #ifdef _DEBUG
698 DWORD errorCode;
699 errorCode = GetLastError();
700 switch (errorCode)
701 {
702 case ERROR_FILE_NOT_FOUND:
703 {
704 b3Warning("\nSrc file not found %s\n", clFileNameForCaching);
705 break;
706 }
707 case ERROR_PATH_NOT_FOUND:
708 {
709 b3Warning("\nSrc path not found %s\n", clFileNameForCaching);
710 break;
711 }
712 default:
713 {
714 b3Warning("\nnSrc file reading errorCode = %d\n", errorCode);
715 }
716 }
717
718 //we should make sure the src file exists so we can verify the timestamp with binary
719 // assert(0);
720 b3Warning("Warning: cannot find OpenCL kernel %s to verify timestamp of binary cached kernel %s\n", clFileNameForCaching, binaryFileName);
721 fileUpToDate = true;
722 #else
723 //if we cannot find the source, assume it is OK in release builds
724 fileUpToDate = true;
725 #endif
726 }
727 }
728 }
729
730 #else
731 fileUpToDate = true;
732 if (mkdir(sCachedBinaryPath, 0777) == -1)
733 {
734 }
735 else
736 {
737 b3Printf("Succesfully created cache directory: %s\n", sCachedBinaryPath);
738 }
739 #endif //_WIN32
740 }
741
742 if (fileUpToDate)
743 {
744 #ifdef _MSC_VER
745 FILE* file;
746 if (fopen_s(&file, binaryFileName, "rb") != 0)
747 file = 0;
748 #else
749 FILE* file = fopen(binaryFileName, "rb");
750 #endif
751
752 if (file)
753 {
754 size_t binarySize = 0;
755 char* binary = 0;
756
757 fseek(file, 0L, SEEK_END);
758 binarySize = ftell(file);
759 rewind(file);
760 binary = (char*)malloc(sizeof(char) * binarySize);
761 int bytesRead;
762 bytesRead = fread(binary, sizeof(char), binarySize, file);
763 fclose(file);
764
765 m_cpProgram = clCreateProgramWithBinary(clContext, 1, &device, &binarySize, (const unsigned char**)&binary, 0, &status);
766 b3Assert(status == CL_SUCCESS);
767 status = clBuildProgram(m_cpProgram, 1, &device, additionalMacros, 0, 0);
768 b3Assert(status == CL_SUCCESS);
769
770 if (status != CL_SUCCESS)
771 {
772 char* build_log;
773 size_t ret_val_size;
774 clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
775 build_log = (char*)malloc(sizeof(char) * (ret_val_size + 1));
776 clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
777 build_log[ret_val_size] = '\0';
778 b3Error("%s\n", build_log);
779 free(build_log);
780 b3Assert(0);
781 m_cpProgram = 0;
782
783 b3Warning("clBuildProgram reported failure on cached binary: %s\n", binaryFileName);
784 }
785 else
786 {
787 b3Printf("clBuildProgram successfully compiled cached binary: %s\n", binaryFileName);
788 }
789 free(binary);
790 }
791 else
792 {
793 b3Warning("Cannot open cached binary: %s\n", binaryFileName);
794 }
795 }
796
797 if (!m_cpProgram)
798 {
799 cl_int localErrNum;
800 char* compileFlags;
801 int flagsize;
802
803 const char* kernelSource = kernelSourceOrg;
804
805 if (!kernelSourceOrg || gDebugForceLoadingFromSource)
806 {
807 if (clFileNameForCaching)
808 {
809 FILE* file = fopen(clFileNameForCaching, "rb");
810 //in many cases the relative path is a few levels up the directory hierarchy, so try it
811 if (!file)
812 {
813 const char* prefix[] = {"../", "../../", "../../../", "../../../../"};
814 for (int i = 0; !file && i < 3; i++)
815 {
816 char relativeFileName[1024];
817 sprintf(relativeFileName, "%s%s", prefix[i], clFileNameForCaching);
818 file = fopen(relativeFileName, "rb");
819 }
820 }
821
822 if (file)
823 {
824 char* kernelSrc = 0;
825 fseek(file, 0L, SEEK_END);
826 int kernelSize = ftell(file);
827 rewind(file);
828 kernelSrc = (char*)malloc(kernelSize + 1);
829 int readBytes;
830 readBytes = fread((void*)kernelSrc, 1, kernelSize, file);
831 kernelSrc[kernelSize] = 0;
832 fclose(file);
833 kernelSource = kernelSrc;
834 }
835 }
836 }
837
838 size_t program_length = kernelSource ? strlen(kernelSource) : 0;
839 #ifdef MAC //or __APPLE__?
840 char* flags = "-cl-mad-enable -DMAC ";
841 #else
842 const char* flags = "";
843 #endif
844
845 m_cpProgram = clCreateProgramWithSource(clContext, 1, (const char**)&kernelSource, &program_length, &localErrNum);
846 if (localErrNum != CL_SUCCESS)
847 {
848 if (pErrNum)
849 *pErrNum = localErrNum;
850 return 0;
851 }
852
853 // Build the program with 'mad' Optimization option
854
855 flagsize = sizeof(char) * (strlen(additionalMacros) + strlen(flags) + 5);
856 compileFlags = (char*)malloc(flagsize);
857 #ifdef _MSC_VER
858 sprintf_s(compileFlags, flagsize, "%s %s", flags, additionalMacros);
859 #else
860 sprintf(compileFlags, "%s %s", flags, additionalMacros);
861 #endif
862 localErrNum = clBuildProgram(m_cpProgram, 1, &device, compileFlags, NULL, NULL);
863 if (localErrNum != CL_SUCCESS)
864 {
865 char* build_log;
866 size_t ret_val_size;
867 clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
868 build_log = (char*)malloc(sizeof(char) * (ret_val_size + 1));
869 clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
870
871 // to be carefully, terminate with \0
872 // there's no information in the reference whether the string is 0 terminated or not
873 build_log[ret_val_size] = '\0';
874
875 b3Error("Error in clBuildProgram, Line %u in file %s, Log: \n%s\n !!!\n\n", __LINE__, __FILE__, build_log);
876 free(build_log);
877 if (pErrNum)
878 *pErrNum = localErrNum;
879 return 0;
880 }
881
882 if (!disableBinaryCaching && clFileNameForCaching)
883 { // write to binary
884
885 cl_uint numAssociatedDevices;
886 status = clGetProgramInfo(m_cpProgram, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &numAssociatedDevices, 0);
887 b3Assert(status == CL_SUCCESS);
888 if (numAssociatedDevices == 1)
889 {
890 size_t binarySize;
891 char* binary;
892
893 status = clGetProgramInfo(m_cpProgram, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binarySize, 0);
894 b3Assert(status == CL_SUCCESS);
895
896 binary = (char*)malloc(sizeof(char) * binarySize);
897
898 status = clGetProgramInfo(m_cpProgram, CL_PROGRAM_BINARIES, sizeof(char*), &binary, 0);
899 b3Assert(status == CL_SUCCESS);
900
901 {
902 FILE* file = 0;
903 #ifdef _MSC_VER
904 if (fopen_s(&file, binaryFileName, "wb") != 0)
905 file = 0;
906 #else
907 file = fopen(binaryFileName, "wb");
908 #endif
909 if (file)
910 {
911 fwrite(binary, sizeof(char), binarySize, file);
912 fclose(file);
913 }
914 else
915 {
916 b3Warning("cannot write file %s\n", binaryFileName);
917 }
918 }
919
920 free(binary);
921 }
922 }
923
924 free(compileFlags);
925 }
926 return m_cpProgram;
927 }
928
b3OpenCLUtils_compileCLKernelFromString(cl_context clContext,cl_device_id device,const char * kernelSource,const char * kernelName,cl_int * pErrNum,cl_program prog,const char * additionalMacros)929 cl_kernel b3OpenCLUtils_compileCLKernelFromString(cl_context clContext, cl_device_id device, const char* kernelSource, const char* kernelName, cl_int* pErrNum, cl_program prog, const char* additionalMacros)
930 {
931 cl_kernel kernel;
932 cl_int localErrNum;
933
934 cl_program m_cpProgram = prog;
935
936 b3Printf("compiling kernel %s ", kernelName);
937
938 if (!m_cpProgram)
939 {
940 m_cpProgram = b3OpenCLUtils_compileCLProgramFromString(clContext, device, kernelSource, pErrNum, additionalMacros, 0, false);
941 }
942
943 // Create the kernel
944 kernel = clCreateKernel(m_cpProgram, kernelName, &localErrNum);
945 if (localErrNum != CL_SUCCESS)
946 {
947 b3Error("Error in clCreateKernel, Line %u in file %s, cannot find kernel function %s !!!\n\n", __LINE__, __FILE__, kernelName);
948 assert(0);
949 if (pErrNum)
950 *pErrNum = localErrNum;
951 return 0;
952 }
953
954 if (!prog && m_cpProgram)
955 {
956 clReleaseProgram(m_cpProgram);
957 }
958 b3Printf("ready. \n");
959
960 if (pErrNum)
961 *pErrNum = CL_SUCCESS;
962 return kernel;
963 }
964