1 // This file is part of BOINC.
2 // http://boinc.berkeley.edu
3 // Copyright (C) 2012 University of California
4 //
5 // BOINC is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU Lesser General Public License
7 // as published by the Free Software Foundation,
8 // either version 3 of the License, or (at your option) any later version.
9 //
10 // BOINC is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
13 // See the GNU Lesser General Public License for more details.
14 //
15 // You should have received a copy of the GNU Lesser General Public License
16 // along with BOINC. If not, see <http://www.gnu.org/licenses/>.
17
18 // Detection of GPUs using OpenCL
19
20 #define TEST_OTHER_COPROC_LOGIC 0
21
22 #ifdef _WIN32
23 #include "boinc_win.h"
24 #ifdef _MSC_VER
25 #define snprintf _snprintf
26 #endif
27 #else
28 #ifdef __APPLE__
29 // Suppress obsolete warning when building for OS 10.3.9
30 #define DLOPEN_NO_WARN
31 #include <mach-o/dyld.h>
32 #endif
33 #include "config.h"
34 #include <dlfcn.h>
35 #endif
36
37 #include <vector>
38 #include <string>
39
40 using std::vector;
41 using std::string;
42
43 #include "coproc.h"
44 #include "str_replace.h"
45 #include "util.h"
46
47 #include "client_msgs.h"
48 #include "client_state.h"
49 #include "gpu_detect.h"
50
51 #ifdef _WIN32
52
53 HMODULE opencl_lib = NULL;
54
55 typedef cl_int (__stdcall *CL_PLATFORMIDS) (cl_uint, cl_platform_id*, cl_uint*);
56 typedef cl_int (__stdcall *CL_PLATFORMINFO) (cl_platform_id, cl_platform_info, size_t, void*, size_t*);
57 typedef cl_int (__stdcall *CL_DEVICEIDS)(cl_platform_id, cl_device_type, cl_uint, cl_device_id*, cl_uint*);
58 typedef cl_int (__stdcall *CL_INFO) (cl_device_id, cl_device_info, size_t, void*, size_t*);
59
60 CL_PLATFORMIDS p_clGetPlatformIDs = NULL;
61 CL_PLATFORMINFO p_clGetPlatformInfo = NULL;
62 CL_DEVICEIDS p_clGetDeviceIDs = NULL;
63 CL_INFO p_clGetDeviceInfo = NULL;
64
65 #else
66
67 void* opencl_lib = NULL;
68
69 cl_int (*p_clGetPlatformIDs)(
70 cl_uint, // num_entries,
71 cl_platform_id*, // platforms
72 cl_uint * // num_platforms
73 );
74 cl_int (*p_clGetPlatformInfo)(
75 cl_platform_id, // platform
76 cl_platform_info, // param_name
77 size_t, // param_value_size
78 void*, // param_value
79 size_t* // param_value_size_ret
80 );
81 cl_int (*p_clGetDeviceIDs)(
82 cl_platform_id, // platform
83 cl_device_type, // device_type
84 cl_uint, // num_entries
85 cl_device_id*, // devices
86 cl_uint* // num_devices
87 );
88 cl_int (*p_clGetDeviceInfo)(
89 cl_device_id, // device
90 cl_device_info, // param_name
91 size_t, // param_value_size
92 void*, // param_value
93 size_t* // param_value_size_ret
94 );
95
96 #endif
97
is_AMD(char * vendor)98 static bool is_AMD(char *vendor) {
99 if (strstr(vendor, "ATI")) return true;
100 if (strstr(vendor, "AMD")) return true;
101 if (strstr(vendor, "Advanced Micro Devices, Inc.")) return true;
102 return false;
103 }
104
is_NVIDIA(char * vendor)105 static bool is_NVIDIA(char* vendor) {
106 if (strstr(vendor, "NVIDIA")) return true;
107 return false;
108 }
109
is_intel(char * vendor)110 static bool is_intel(char* vendor) {
111 if (strcasestr(vendor, "intel")) return true;
112 return false;
113 }
114
115 // If "loose", tolerate small diff
116 //
opencl_compare(OPENCL_DEVICE_PROP & c1,OPENCL_DEVICE_PROP & c2,bool loose)117 static int opencl_compare(OPENCL_DEVICE_PROP& c1, OPENCL_DEVICE_PROP& c2, bool loose) {
118 if (c1.opencl_device_version_int > c2.opencl_device_version_int) return 1;
119 if (c1.opencl_device_version_int < c2.opencl_device_version_int) return -1;
120 if (loose) {
121 if (c1.global_mem_size > 1.4*c2.global_mem_size) return 1;
122 if (c1.global_mem_size < .7*c2.global_mem_size) return -1;
123 return 0;
124 }
125 if (c1.global_mem_size > c2.global_mem_size) return 1;
126 if (c1.global_mem_size < c2.global_mem_size) return -1;
127 if (c1.peak_flops > c2.peak_flops) return 1;
128 if (c1.peak_flops < c2.peak_flops) return -1;
129 return 0;
130 }
131
132 #ifdef __APPLE__
compare_pci_slots(int NVIDIA_GPU_Index1,int NVIDIA_GPU_Index2)133 static bool compare_pci_slots(int NVIDIA_GPU_Index1, int NVIDIA_GPU_Index2) {
134 if (NVIDIA_GPU_Index1 >= (int)nvidia_gpus.size()) return false; // Should never happen
135 if (NVIDIA_GPU_Index2 >= (int)nvidia_gpus.size()) return false; // Should never happen
136 return (
137 nvidia_gpus[NVIDIA_GPU_Index1].pci_info.bus_id <
138 nvidia_gpus[NVIDIA_GPU_Index2].pci_info.bus_id
139 );
140 }
141 #endif
142
143
144 // OpenCL interfaces are documented here:
145 // http://www.khronos.org/registry/cl/sdk/1.0/docs/man/xhtml/ and
146 // http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/
147
get_opencl(vector<string> & warnings)148 void COPROCS::get_opencl(
149 vector<string>& warnings
150 ) {
151 cl_int ciErrNum;
152 cl_platform_id platforms[MAX_OPENCL_PLATFORMS];
153 cl_uint num_platforms, platform_index, num_devices, device_index;
154 cl_device_id devices[MAX_COPROC_INSTANCES];
155 char platform_version[256];
156 char platform_vendor[256];
157 char buf[256];
158 OPENCL_DEVICE_PROP prop;
159 int current_CUDA_index;
160 int current_CAL_index;
161 int min_CAL_target;
162 int num_CAL_devices = (int)ati_gpus.size();
163 vector<int>devnums_pci_slot_sort;
164 vector<OPENCL_DEVICE_PROP>::iterator it;
165 int max_other_coprocs = MAX_RSC-1; // coprocs[0] is reserved for CPU
166
167 if (cc_config.no_opencl) {
168 return;
169 }
170
171 #ifdef _WIN32
172 opencl_lib = LoadLibrary("OpenCL.dll");
173 if (!opencl_lib) {
174 warnings.push_back("No OpenCL library found");
175 return;
176 }
177
178 p_clGetPlatformIDs = (CL_PLATFORMIDS)GetProcAddress( opencl_lib, "clGetPlatformIDs" );
179 p_clGetPlatformInfo = (CL_PLATFORMINFO)GetProcAddress( opencl_lib, "clGetPlatformInfo" );
180 p_clGetDeviceIDs = (CL_DEVICEIDS)GetProcAddress( opencl_lib, "clGetDeviceIDs" );
181 p_clGetDeviceInfo = (CL_INFO)GetProcAddress( opencl_lib, "clGetDeviceInfo" );
182 #else
183 #ifdef __APPLE__
184 opencl_lib = dlopen("/System/Library/Frameworks/OpenCL.framework/Versions/Current/OpenCL", RTLD_NOW);
185 #else
186 opencl_lib = dlopen("libOpenCL.so", RTLD_NOW);
187 if (!opencl_lib) {
188 opencl_lib = dlopen("libOpenCL.so.1", RTLD_NOW);
189 }
190 #endif
191 if (!opencl_lib) {
192 sprintf(buf, "OpenCL: %s", dlerror());
193 warnings.push_back(buf);
194 return;
195 }
196 p_clGetPlatformIDs = (cl_int(*)(cl_uint, cl_platform_id*, cl_uint*)) dlsym( opencl_lib, "clGetPlatformIDs" );
197 p_clGetPlatformInfo = (cl_int(*)(cl_platform_id, cl_platform_info, size_t, void*, size_t*)) dlsym( opencl_lib, "clGetPlatformInfo" );
198 p_clGetDeviceIDs = (cl_int(*)(cl_platform_id, cl_device_type, cl_uint, cl_device_id*, cl_uint*)) dlsym( opencl_lib, "clGetDeviceIDs" );
199 p_clGetDeviceInfo = (cl_int(*)(cl_device_id, cl_device_info, size_t, void*, size_t*)) dlsym( opencl_lib, "clGetDeviceInfo" );
200 #endif
201
202 if (!p_clGetPlatformIDs) {
203 warnings.push_back("clGetPlatformIDs() missing from OpenCL library");
204 goto leave;
205 }
206 if (!p_clGetPlatformInfo) {
207 warnings.push_back("clGetPlatformInfo() missing from OpenCL library");
208 goto leave;
209 }
210 if (!p_clGetDeviceIDs) {
211 warnings.push_back("clGetDeviceIDs() missing from OpenCL library");
212 goto leave;
213 }
214 if (!p_clGetDeviceInfo) {
215 warnings.push_back("clGetDeviceInfo() missing from OpenCL library");
216 goto leave;
217 }
218
219 ciErrNum = (*p_clGetPlatformIDs)(MAX_OPENCL_PLATFORMS, platforms, &num_platforms);
220 if ((ciErrNum != CL_SUCCESS) || (num_platforms == 0)) {
221 warnings.push_back("clGetPlatformIDs() failed to return any OpenCL platforms");
222 goto leave;
223 }
224
225 if (nvidia_gpus.size()) {
226 for (int i=0; i<(int)nvidia_gpus.size(); ++i) {
227 devnums_pci_slot_sort.push_back(i);
228 }
229 #ifdef __APPLE__
230 std::stable_sort(
231 devnums_pci_slot_sort.begin(),
232 devnums_pci_slot_sort.end(),
233 compare_pci_slots
234 );
235 #endif
236 }
237
238 for (platform_index=0; platform_index<num_platforms; ++platform_index) {
239 ciErrNum = (*p_clGetPlatformInfo)(
240 platforms[platform_index], CL_PLATFORM_VERSION,
241 sizeof(platform_version), &platform_version, NULL
242 );
243 if (ciErrNum != CL_SUCCESS) {
244 snprintf(buf, sizeof(buf),
245 "Couldn't get PLATFORM_VERSION for platform #%d; error %d",
246 platform_index, ciErrNum
247 );
248 warnings.push_back(buf);
249 continue;
250 }
251
252 ciErrNum = (*p_clGetPlatformInfo)(
253 platforms[platform_index], CL_PLATFORM_VENDOR,
254 sizeof(platform_vendor), &platform_vendor, NULL
255 );
256 if (ciErrNum != CL_SUCCESS) {
257 snprintf(buf, sizeof(buf),
258 "Couldn't get PLATFORM_VENDOR for platform #%d; error %d",
259 platform_index, ciErrNum
260 );
261 warnings.push_back(buf);
262 }
263
264 //////////// CPU //////////////
265
266 ciErrNum = (*p_clGetDeviceIDs)(
267 platforms[platform_index], (CL_DEVICE_TYPE_CPU),
268 MAX_COPROC_INSTANCES, devices, &num_devices
269 );
270
271 if ((ciErrNum != CL_SUCCESS) && (num_devices != 0)) {
272 num_devices = 0; // No devices
273 if (ciErrNum != CL_DEVICE_NOT_FOUND) {
274 snprintf(buf, sizeof(buf),
275 "Couldn't get CPU Device IDs for platform #%d: error %d",
276 platform_index, ciErrNum
277 );
278 warnings.push_back(buf);
279 }
280 }
281
282 for (device_index=0; device_index<num_devices; ++device_index) {
283 memset(&prop, 0, sizeof(prop));
284 prop.device_id = devices[device_index];
285 strlcpy(
286 prop.opencl_platform_version, platform_version,
287 sizeof(prop.opencl_platform_version)
288 );
289
290 ciErrNum = get_opencl_info(prop, device_index, warnings);
291 if (ciErrNum != CL_SUCCESS) continue;
292
293 prop.is_used = COPROC_UNUSED;
294 prop.get_device_version_int();
295
296 OPENCL_CPU_PROP c;
297 strlcpy(c.platform_vendor, platform_vendor, sizeof(c.platform_vendor));
298 c.opencl_prop = prop;
299 cpu_opencls.push_back(c);
300 }
301
302 //////////// GPUs and Accelerators //////////////
303
304 ciErrNum = (*p_clGetDeviceIDs)(
305 platforms[platform_index],
306 (CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_ACCELERATOR),
307 MAX_COPROC_INSTANCES, devices, &num_devices
308 );
309
310 if (ciErrNum == CL_DEVICE_NOT_FOUND) continue; // No devices
311 if (num_devices == 0) continue; // No devices
312
313 if (ciErrNum != CL_SUCCESS) {
314 snprintf(buf, sizeof(buf),
315 "Couldn't get Device IDs for platform #%d: error %d",
316 platform_index, ciErrNum
317 );
318 warnings.push_back(buf);
319 continue;
320 }
321
322 // Mac OpenCL does not recognize all NVIDIA GPUs returned by CUDA
323 // Fortunately, CUDA and OpenCL return the same GPU model name on
324 // the Mac, so we can use this to match OpenCL devices with CUDA.
325 //
326 current_CUDA_index = 0;
327
328 // ATI/AMD OpenCL does not always recognize all GPUs returned by CAL.
329 // This is complicated for several reasons:
330 // * CAL returns only an enum (CALtargetEnum) for the GPU's family,
331 // not specific model information.
332 // * OpenCL return only the GPU family name
333 // * Which GPUs support OpenCL varies with different versions of the
334 // AMD Catalyst drivers.
335 //
336 // To deal with this, we make some (probably imperfect) assumptions:
337 // * AMD drivers eliminate OpenCL support for older GPU families first.
338 // * Lower values of CALtargetEnum represent older GPU families.
339 // * All ATI/AMD GPUs reported by OpenCL are also reported by CAL (on
340 // systems where CAL is available) though the converse may not be true.
341 //
342 current_CAL_index = 0;
343 min_CAL_target = 0;
344 if (is_AMD(platform_vendor) && (num_CAL_devices > 0)) {
345 while (1) {
346 int numToMatch = 0;
347 for (int i=0; i<num_CAL_devices; ++i) {
348 if ((int)ati_gpus[i].attribs.target >= min_CAL_target) {
349 ++numToMatch;
350 }
351 }
352 if (numToMatch == (int)num_devices) break;
353 if (numToMatch < (int)num_devices) {
354 warnings.push_back(
355 "Could not match ATI OpenCL and CAL GPUs: ignoring CAL."
356 );
357 // If we can't match ATI OpenCL and CAL GPUs, ignore CAL
358 // and keep OpenCL because AMD has deprecated CAL.
359 ati_gpus.clear();
360 ati.have_cal = false;
361 num_CAL_devices = 0;
362 break;
363 }
364 ++min_CAL_target;
365 }
366 }
367
368 for (device_index=0; device_index<num_devices; ++device_index) {
369 memset(&prop, 0, sizeof(prop));
370 prop.device_id = devices[device_index];
371 strlcpy(
372 prop.opencl_platform_version, platform_version,
373 sizeof(prop.opencl_platform_version)
374 );
375
376 //TODO: Should we store the platform(s) for each GPU found?
377 //TODO: Must we check if multiple platforms found the same GPU and merge the records?
378 ciErrNum = get_opencl_info(prop, device_index, warnings);
379 if (ciErrNum != CL_SUCCESS) continue;
380
381 // TODO: Eliminate this, or improve it
382 #if TEST_OTHER_COPROC_LOGIC
383 if (is_NVIDIA(prop.vendor)) {
384 safe_strcpy(prop.vendor, "FAKE VENDOR X");
385 } else if (is_AMD(prop.vendor)) {
386 safe_strcpy(prop.vendor, "FAKE VENDOR Y");
387 } else {
388 safe_strcpy(prop.vendor, "FAKE VENDOR Z");
389 }
390 #endif
391
392 prop.is_used = COPROC_UNUSED;
393 prop.get_device_version_int();
394
395 //////////// NVIDIA //////////////
396 if (is_NVIDIA(prop.vendor)) {
397 bool cuda_match_found = false;
398 if (nvidia.have_cuda) {
399 // Mac OpenCL does not recognize all NVIDIA GPUs returned by
400 // CUDA but we assume that OpenCL and CUDA return devices
401 // with identical model name strings and that OpenCL returns
402 // devices in order of acending PCI slot.
403 //
404 // On other systems, assume OpenCL and CUDA return devices
405 // in the same order.
406 //
407 int saved_CUDA_index = current_CUDA_index;
408
409 while (1) {
410 if (current_CUDA_index >= (int)(nvidia_gpus.size())) {
411 snprintf(buf, sizeof(buf),
412 "OpenCL NVIDIA index #%d does not match any CUDA device",
413 device_index
414 );
415 warnings.push_back(buf);
416 // Newer versions of CUDA driver don't support older NVIDIA GPUs
417 if (nvidia.cuda_version >= 6050) {
418 prop.device_num = (int)(nvidia_opencls.size());
419 current_CUDA_index = saved_CUDA_index;
420 prop.warn_bad_cuda = true;
421 break;
422 } else {
423 // Older CUDA drivers should report all NVIDIA GPUs reported by OpenCL
424 goto leave; // Should never happen
425 }
426 }
427 if (!strcmp(prop.name,
428 nvidia_gpus[devnums_pci_slot_sort[current_CUDA_index]].prop.name)
429 ) {
430 cuda_match_found = true;
431 prop.device_num = devnums_pci_slot_sort[current_CUDA_index];
432 break; // We have a match
433 }
434 // This CUDA GPU is not recognized by OpenCL,
435 // so try the next
436 //
437 ++current_CUDA_index;
438 }
439 } else {
440 prop.device_num = (int)(nvidia_opencls.size());
441 }
442 prop.opencl_device_index = device_index;
443
444 if (cuda_match_found) {
445 prop.peak_flops = nvidia_gpus[prop.device_num].peak_flops;
446 } else {
447 COPROC_NVIDIA c;
448 c.opencl_prop = prop;
449 c.set_peak_flops();
450 prop.peak_flops = c.peak_flops;
451 }
452 if (cuda_match_found) {
453 // Assumes OpenCL device_num and CUDA device_num now match
454 //
455 prop.opencl_available_ram = nvidia_gpus[prop.device_num].available_ram;
456 } else {
457 prop.opencl_available_ram = prop.global_mem_size;
458 }
459
460 // Build nvidia_opencls vector in device_num order
461 for (it=nvidia_opencls.begin(); it != nvidia_opencls.end(); ++it) {
462 if (it->device_num > prop.device_num) break;
463 }
464 nvidia_opencls.insert(it, prop);
465
466 if (cuda_match_found) ++current_CUDA_index;
467 }
468
469 //////////// AMD / ATI //////////////
470 else if (is_AMD(prop.vendor)) {
471 prop.opencl_device_index = device_index;
472
473 if (ati.have_cal) {
474 // AMD OpenCL does not recognize all AMD GPUs returned by
475 // CAL but we assume that OpenCL and CAL return devices in
476 // the same order. See additional comments earlier in
477 // this source file for more details.
478 //
479 while (1) {
480 if (current_CAL_index >= num_CAL_devices) {
481 snprintf(buf, sizeof(buf),
482 "OpenCL ATI device #%d does not match any CAL device",
483 device_index
484 );
485 warnings.push_back(buf);
486 goto leave; // Should never happen
487 }
488 if ((int)ati_gpus[current_CAL_index].attribs.target >= min_CAL_target) {
489 break; // We have a match
490 }
491 // This CAL GPU is not recognized by OpenCL,
492 // so try the next
493 //
494 ++current_CAL_index;
495 }
496 prop.device_num = current_CAL_index++;
497
498 // Always use GPU model name from CAL if
499 // available for ATI / AMD GPUs because
500 // (we believe) it is more user-friendly.
501 //
502 safe_strcpy(prop.name, ati_gpus[prop.device_num].name);
503
504 // Work around a bug in OpenCL which returns only
505 // 1/2 of total global RAM size: use the value from CAL.
506 // This bug applies only to ATI GPUs, not to NVIDIA
507 // See also further workaround code for Macs.
508 //
509 prop.global_mem_size = ati_gpus[prop.device_num].attribs.localRAM * MEGA;
510 prop.peak_flops = ati_gpus[prop.device_num].peak_flops;
511 } else { // ! ati.have_cal
512 prop.device_num = (int)(ati_opencls.size());
513 COPROC_ATI c;
514 c.opencl_prop = prop;
515 c.set_peak_flops();
516 prop.peak_flops = c.peak_flops;
517 }
518
519 if (ati_gpus.size()) {
520 prop.opencl_available_ram = ati_gpus[prop.device_num].available_ram;
521 } else {
522 prop.opencl_available_ram = prop.global_mem_size;
523 }
524 ati_opencls.push_back(prop);
525 }
526
527 //////////// INTEL GPU //////////////
528 else if (is_intel(prop.vendor)) {
529 prop.device_num = (int)(intel_gpu_opencls.size());
530 prop.opencl_device_index = device_index;
531
532 COPROC_INTEL c;
533 c.opencl_prop = prop;
534 c.is_used = COPROC_UNUSED;
535 c.available_ram = prop.global_mem_size;
536 safe_strcpy(c.name, prop.name);
537 safe_strcpy(c.version, prop.opencl_driver_version);
538
539 c.set_peak_flops();
540 prop.peak_flops = c.peak_flops;
541 prop.opencl_available_ram = prop.global_mem_size;
542
543 intel_gpu_opencls.push_back(prop);
544
545 // At present Intel GPUs only support OpenCL
546 // and do not have a native GPGPU framework,
547 // so treat each detected Intel OpenCL GPU device as
548 // a native device.
549 //
550 intel_gpus.push_back(c);
551 } else {
552 //////////// OTHER GPU OR ACCELERATOR //////////////
553 // Put each coprocessor instance into a separate other_opencls element
554
555 // opencl_device_index is passed to project apps via init_data.xml
556 // to differentiate among OpenCL devices from the same vendor. It is
557 // used by boinc_get_opencl_ids() to select the correct OpenCL device.
558 int opencl_device_index = 0;
559 for (unsigned int coproc_index=0; coproc_index<other_opencls.size(); coproc_index++) {
560 if (!strcmp(other_opencls[coproc_index].vendor, prop.vendor)) {
561 opencl_device_index++; // Another OpenCL device from same vendor
562 }
563 }
564
565 prop.device_num = 0; // Each vector entry has only one device
566 prop.opencl_device_index = opencl_device_index;
567 prop.opencl_available_ram = prop.global_mem_size;
568 prop.is_used = COPROC_USED;
569
570 // TODO: Find a better way to calculate / estimate peak_flops for future coprocessors?
571 prop.peak_flops = 0;
572 if (prop.max_compute_units) {
573 prop.peak_flops = prop.max_compute_units * prop.max_clock_frequency * MEGA;
574 }
575 if (prop.peak_flops <= 0) prop.peak_flops = 45e9;
576
577 other_opencls.push_back(prop);
578 }
579 }
580 }
581
582 // Neither nvidia.count, ati.count nor intel_gpu.count have been set yet,
583 // so we can't test have_nvidia(), have_ati() or have_intel_gpu() here.
584 //
585 if ((nvidia_opencls.size() > 0) || nvidia.have_cuda) max_other_coprocs--;
586 if ((ati_opencls.size() > 0) || ati.have_cal) max_other_coprocs--;
587 if (intel_gpu_opencls.size() > 0) max_other_coprocs--;
588 if ((int)other_opencls.size() > max_other_coprocs) {
589 warnings.push_back("Too many OpenCL device types found");
590 }
591
592
593 #ifdef __APPLE__
594 // Work around a bug in OpenCL which returns only
595 // 1/2 of total global RAM size.
596 // This bug applies only to ATI GPUs, not to NVIDIA
597 // This has already been fixed on latest Catalyst
598 // drivers, but Mac does not use Catalyst drivers.
599 if (ati_opencls.size() > 0) {
600 opencl_get_ati_mem_size_from_opengl(warnings);
601 }
602 #endif
603
604 if ((nvidia_opencls.size() == 0) &&
605 (ati_opencls.size() == 0) &&
606 (intel_gpu_opencls.size() == 0) &&
607 (cpu_opencls.size() == 0) &&
608 (other_opencls.size() == 0)
609 ) {
610 warnings.push_back(
611 "OpenCL library present but no OpenCL-capable devices found"
612 );
613 }
614 leave:
615 #ifdef _WIN32
616 if (opencl_lib) FreeLibrary(opencl_lib);
617 #else
618 if (opencl_lib) dlclose(opencl_lib);
619 #endif
620 }
621
correlate_opencl(bool use_all,IGNORE_GPU_INSTANCE & ignore_gpu_instance)622 void COPROCS::correlate_opencl(
623 bool use_all,
624 IGNORE_GPU_INSTANCE& ignore_gpu_instance
625 ) {
626 if (nvidia_opencls.size() > 0) {
627 if (nvidia.have_cuda) { // If CUDA already found the "best" NVIDIA GPU
628 nvidia.merge_opencl(
629 nvidia_opencls, ignore_gpu_instance[PROC_TYPE_NVIDIA_GPU]
630 );
631 } else {
632 nvidia.find_best_opencls(
633 use_all, nvidia_opencls, ignore_gpu_instance[PROC_TYPE_NVIDIA_GPU]
634 );
635 nvidia.prop.totalGlobalMem = nvidia.opencl_prop.global_mem_size;
636 nvidia.available_ram = nvidia.opencl_prop.global_mem_size;
637 nvidia.prop.clockRate = nvidia.opencl_prop.max_clock_frequency * 1000;
638 safe_strcpy(nvidia.prop.name, nvidia.opencl_prop.name);
639 }
640 }
641
642 if (ati_opencls.size() > 0) {
643 if (ati.have_cal) { // If CAL already found the "best" CAL GPU
644 ati.merge_opencl(ati_opencls, ignore_gpu_instance[PROC_TYPE_AMD_GPU]);
645 } else {
646 ati.find_best_opencls(use_all, ati_opencls, ignore_gpu_instance[PROC_TYPE_AMD_GPU]);
647 ati.attribs.localRAM = ati.opencl_prop.global_mem_size/MEGA;
648 ati.available_ram = ati.opencl_prop.global_mem_size;
649 ati.attribs.engineClock = ati.opencl_prop.max_clock_frequency;
650 safe_strcpy(ati.name, ati.opencl_prop.name);
651 }
652 }
653
654 if (intel_gpu_opencls.size() > 0) {
655 intel_gpu.find_best_opencls(use_all, intel_gpu_opencls, ignore_gpu_instance[PROC_TYPE_INTEL_GPU]);
656 intel_gpu.available_ram = intel_gpu.opencl_prop.global_mem_size;
657 safe_strcpy(intel_gpu.name, intel_gpu.opencl_prop.name);
658 }
659 }
660
get_opencl_info(OPENCL_DEVICE_PROP & prop,cl_uint device_index,vector<string> & warnings)661 cl_int COPROCS::get_opencl_info(
662 OPENCL_DEVICE_PROP& prop,
663 cl_uint device_index,
664 vector<string>&warnings
665 ) {
666 cl_int ciErrNum;
667 char buf[256];
668
669 ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_NAME, sizeof(prop.name), prop.name, NULL);
670 if ((ciErrNum != CL_SUCCESS) || (prop.name[0] == 0)) {
671 snprintf(buf, sizeof(buf),
672 "clGetDeviceInfo failed to get name for device %d",
673 (int)device_index
674 );
675 warnings.push_back(buf);
676 return ciErrNum;
677 }
678
679 ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_VENDOR, sizeof(prop.vendor), prop.vendor, NULL);
680 if ((ciErrNum != CL_SUCCESS) || (prop.vendor[0] == 0)) {
681 snprintf(buf, sizeof(buf),
682 "clGetDeviceInfo failed to get vendor for device %d",
683 (int)device_index
684 );
685 warnings.push_back(buf);
686 return ciErrNum;
687 }
688
689 ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_VENDOR_ID, sizeof(prop.vendor_id), &prop.vendor_id, NULL);
690 if (ciErrNum != CL_SUCCESS) {
691 snprintf(buf, sizeof(buf),
692 "clGetDeviceInfo failed to get vendor ID for device %d",
693 (int)device_index
694 );
695 warnings.push_back(buf);
696 return ciErrNum;
697 }
698
699 ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_AVAILABLE, sizeof(prop.available), &prop.available, NULL);
700 if (ciErrNum != CL_SUCCESS) {
701 snprintf(buf, sizeof(buf),
702 "clGetDeviceInfo failed to get availability for device %d",
703 (int)device_index
704 );
705 warnings.push_back(buf);
706 return ciErrNum;
707 }
708
709 ciErrNum = (*p_clGetDeviceInfo)(
710 prop.device_id, CL_DEVICE_HALF_FP_CONFIG,
711 sizeof(prop.half_fp_config), &prop.half_fp_config, NULL
712 );
713 if (ciErrNum != CL_SUCCESS) {
714 if ((ciErrNum == CL_INVALID_VALUE) || (ciErrNum == CL_INVALID_OPERATION)) {
715 prop.half_fp_config = 0; // Not supported by OpenCL 1.0
716 } else {
717 snprintf(buf, sizeof(buf),
718 "clGetDeviceInfo failed to get half-precision floating point capabilities for device %d",
719 (int)device_index
720 );
721 warnings.push_back(buf);
722 return ciErrNum;
723 }
724 }
725
726 ciErrNum = (*p_clGetDeviceInfo)(
727 prop.device_id, CL_DEVICE_SINGLE_FP_CONFIG,
728 sizeof(prop.single_fp_config), &prop.single_fp_config, NULL
729 );
730 if (ciErrNum != CL_SUCCESS) {
731 snprintf(buf, sizeof(buf),
732 "clGetDeviceInfo failed to get single-precision floating point capabilities for device %d",
733 (int)device_index
734 );
735 warnings.push_back(buf);
736 return ciErrNum;
737 }
738
739 ciErrNum = (*p_clGetDeviceInfo)(
740 prop.device_id, CL_DEVICE_DOUBLE_FP_CONFIG,
741 sizeof(prop.double_fp_config), &prop.double_fp_config, NULL
742 );
743 if (ciErrNum != CL_SUCCESS) {
744 if ((ciErrNum == CL_INVALID_VALUE) || (ciErrNum == CL_INVALID_OPERATION)) {
745 prop.double_fp_config = 0; // Not supported by OpenCL 1.0
746 } else {
747 snprintf(buf, sizeof(buf),
748 "clGetDeviceInfo failed to get double-precision floating point capabilities for device %d",
749 (int)device_index
750 );
751 warnings.push_back(buf);
752 return ciErrNum;
753 }
754 }
755
756 ciErrNum = (*p_clGetDeviceInfo)(
757 prop.device_id, CL_DEVICE_ENDIAN_LITTLE, sizeof(prop.endian_little),
758 &prop.endian_little, NULL
759 );
760 if (ciErrNum != CL_SUCCESS) {
761 snprintf(buf, sizeof(buf),
762 "clGetDeviceInfo failed to get little or big endian for device %d",
763 (int)device_index
764 );
765 warnings.push_back(buf);
766 return ciErrNum;
767 }
768
769 ciErrNum = (*p_clGetDeviceInfo)(
770 prop.device_id, CL_DEVICE_EXECUTION_CAPABILITIES,
771 sizeof(prop.execution_capabilities), &prop.execution_capabilities, NULL
772 );
773 if (ciErrNum != CL_SUCCESS) {
774 snprintf(buf, sizeof(buf),
775 "clGetDeviceInfo failed to get execution capabilities for device %d",
776 (int)device_index
777 );
778 warnings.push_back(buf);
779 return ciErrNum;
780 }
781
782 ciErrNum = (*p_clGetDeviceInfo)(
783 prop.device_id, CL_DEVICE_EXTENSIONS, sizeof(prop.extensions),
784 prop.extensions, NULL
785 );
786 if (ciErrNum != CL_SUCCESS) {
787 snprintf(buf, sizeof(buf),
788 "clGetDeviceInfo failed to get device extensions for device %d",
789 (int)device_index
790 );
791 warnings.push_back(buf);
792 return ciErrNum;
793 }
794
795 ciErrNum = (*p_clGetDeviceInfo)(
796 prop.device_id, CL_DEVICE_GLOBAL_MEM_SIZE,
797 sizeof(prop.global_mem_size), &prop.global_mem_size, NULL
798 );
799 if (ciErrNum != CL_SUCCESS) {
800 snprintf(buf, sizeof(buf),
801 "clGetDeviceInfo failed to get global memory size for device %d",
802 (int)device_index
803 );
804 warnings.push_back(buf);
805 return ciErrNum;
806 }
807
808 ciErrNum = (*p_clGetDeviceInfo)(
809 prop.device_id, CL_DEVICE_LOCAL_MEM_SIZE,
810 sizeof(prop.local_mem_size), &prop.local_mem_size, NULL
811 );
812 if (ciErrNum != CL_SUCCESS) {
813 snprintf(buf, sizeof(buf),
814 "clGetDeviceInfo failed to get local memory size for device %d",
815 (int)device_index
816 );
817 warnings.push_back(buf);
818 return ciErrNum;
819 }
820
821 ciErrNum = (*p_clGetDeviceInfo)(
822 prop.device_id, CL_DEVICE_MAX_CLOCK_FREQUENCY,
823 sizeof(prop.max_clock_frequency), &prop.max_clock_frequency, NULL
824 );
825 if (ciErrNum != CL_SUCCESS) {
826 snprintf(buf, sizeof(buf),
827 "clGetDeviceInfo failed to get max clock frequency for device %d",
828 (int)device_index
829 );
830 warnings.push_back(buf);
831 return ciErrNum;
832 }
833
834 ciErrNum = (*p_clGetDeviceInfo)(
835 prop.device_id, CL_DEVICE_MAX_COMPUTE_UNITS,
836 sizeof(prop.max_compute_units), &prop.max_compute_units, NULL
837 );
838 if (ciErrNum != CL_SUCCESS) {
839 snprintf(buf, sizeof(buf),
840 "clGetDeviceInfo failed to get max compute units for device %d",
841 (int)device_index
842 );
843 warnings.push_back(buf);
844 return ciErrNum;
845 }
846
847 ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_VERSION, sizeof(prop.opencl_device_version), prop.opencl_device_version, NULL);
848 if (ciErrNum != CL_SUCCESS) {
849 snprintf(buf, sizeof(buf),
850 "clGetDeviceInfo failed to get OpenCL version supported by device %d",
851 (int)device_index
852 );
853 warnings.push_back(buf);
854 return ciErrNum;
855 }
856
857 ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DRIVER_VERSION, sizeof(prop.opencl_driver_version), prop.opencl_driver_version, NULL);
858 if (ciErrNum != CL_SUCCESS) {
859 snprintf(buf, sizeof(buf),
860 "clGetDeviceInfo failed to get OpenCL driver version for device %d",
861 (int)device_index
862 );
863 warnings.push_back(buf);
864 return ciErrNum;
865 }
866
867 // Nvidia Specific Extensions
868 if (strstr(prop.extensions, "cl_nv_device_attribute_query") != NULL) {
869
870 ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, sizeof(prop.nv_compute_capability_major), &prop.nv_compute_capability_major, NULL);
871 if (ciErrNum != CL_SUCCESS) {
872 snprintf(buf, sizeof(buf),
873 "clGetDeviceInfo failed to get CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV for device %d",
874 (int)device_index
875 );
876 warnings.push_back(buf);
877 return ciErrNum;
878 }
879
880 ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, sizeof(prop.nv_compute_capability_minor), &prop.nv_compute_capability_minor, NULL);
881 if (ciErrNum != CL_SUCCESS) {
882 snprintf(buf, sizeof(buf),
883 "clGetDeviceInfo failed to get CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV for device %d",
884 (int)device_index
885 );
886 warnings.push_back(buf);
887 return ciErrNum;
888 }
889
890 }
891
892 // AMD Specific Extensions
893 if (strstr(prop.extensions, "cl_amd_device_attribute_query") != NULL) {
894
895 ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_BOARD_NAME_AMD, sizeof(buf), buf, NULL);
896 if (strlen(buf) && ciErrNum == CL_SUCCESS) {
897 safe_strcpy(prop.name, buf);
898 } else if (ciErrNum != CL_SUCCESS) {
899 snprintf(buf, sizeof(buf),
900 "clGetDeviceInfo failed to get AMD Board Name for device %d",
901 (int)device_index
902 );
903 warnings.push_back(buf);
904 return ciErrNum;
905 }
906
907 ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD, sizeof(prop.amd_simd_per_compute_unit), &prop.amd_simd_per_compute_unit, NULL);
908 if (ciErrNum != CL_SUCCESS) {
909 snprintf(buf, sizeof(buf),
910 "clGetDeviceInfo failed to get CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD for device %d",
911 (int)device_index
912 );
913 warnings.push_back(buf);
914 return ciErrNum;
915 }
916
917 ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_SIMD_WIDTH_AMD, sizeof(prop.amd_simd_width), &prop.amd_simd_width, NULL);
918 if (ciErrNum != CL_SUCCESS) {
919 snprintf(buf, sizeof(buf),
920 "clGetDeviceInfo failed to get CL_DEVICE_SIMD_WIDTH_AMD for device %d",
921 (int)device_index
922 );
923 warnings.push_back(buf);
924 return ciErrNum;
925 }
926
927 ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, sizeof(prop.amd_simd_instruction_width), &prop.amd_simd_instruction_width, NULL);
928 if (ciErrNum != CL_SUCCESS) {
929 snprintf(buf, sizeof(buf),
930 "clGetDeviceInfo failed to get CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD for device %d",
931 (int)device_index
932 );
933 warnings.push_back(buf);
934 return ciErrNum;
935 }
936
937 }
938
939 return CL_SUCCESS;
940 }
941
942 // This is called for ATI GPUs with CAL or NVIDIA GPUs with CUDA, to merge
943 // the OpenCL info into the CAL or CUDA data for the "best" CAL or CUDA GPU.
944 // This assumes that, for each GPU, we have previously correlated its CAL
945 // or CUDA device_num with its opencl_device_index.
946 //
merge_opencl(vector<OPENCL_DEVICE_PROP> & opencls,vector<int> & ignore_dev)947 void COPROC::merge_opencl(
948 vector<OPENCL_DEVICE_PROP> &opencls,
949 vector<int>& ignore_dev
950 ) {
951 unsigned int i, j;
952
953 for (i=0; i<opencls.size(); i++) {
954 opencls[i].is_used = COPROC_UNUSED;
955
956 if (in_vector(opencls[i].device_num, ignore_dev)) {
957 opencls[i].is_used = COPROC_IGNORED;
958 continue;
959 }
960 if (device_num == opencls[i].device_num) {
961 opencl_prop = opencls[i];
962 opencl_device_ids[0] = opencls[i].device_id;
963 have_opencl = true;
964 break;
965 }
966 }
967
968 opencl_device_count = 0;
969
970 // Fill in info for other GPUs which CAL or CUDA found equivalent to best
971 //
972 for (i=0; i<(unsigned int)count; ++i) {
973 for (j=0; j<opencls.size(); j++) {
974 if (device_nums[i] == opencls[j].device_num) {
975 opencls[j].is_used = COPROC_USED;
976 opencl_device_indexes[opencl_device_count] = opencls[j].opencl_device_index;
977 opencl_device_ids[opencl_device_count++] = opencls[j].device_id;
978 instance_has_opencl[i] = true;
979 }
980 }
981 }
982 }
983
984 // This is called for ATI GPUs without CAL or NVIDIA GPUs without CUDA
985 //
find_best_opencls(bool use_all,vector<OPENCL_DEVICE_PROP> & opencls,vector<int> & ignore_dev)986 void COPROC::find_best_opencls(
987 bool use_all,
988 vector<OPENCL_DEVICE_PROP> &opencls,
989 vector<int>& ignore_dev
990 ) {
991 unsigned int i;
992
993 // identify the most capable ATI, NVIDIA or Intel OpenCL GPU
994 //
995 bool first = true;
996 for (i=0; i<opencls.size(); i++) {
997 if (in_vector(opencls[i].device_num, ignore_dev)) {
998 opencls[i].is_used = COPROC_IGNORED;
999 continue;
1000 }
1001 bool is_best = false;
1002 if (first) {
1003 is_best = true;
1004 first = false;
1005 } else if (opencl_compare(opencls[i], opencl_prop, false) > 0) {
1006 is_best = true;
1007 }
1008 if (is_best) {
1009 // fill in what info we have
1010 opencl_prop = opencls[i];
1011 device_num = opencls[i].device_num;
1012 peak_flops = opencls[i].peak_flops;
1013 have_opencl = true;
1014 }
1015 }
1016
1017 // see which other instances are equivalent, and set the count,
1018 // device_nums, opencl_device_count and opencl_device_ids fields
1019 //
1020 count = 0;
1021 opencl_device_count = 0;
1022 for (i=0; i<opencls.size(); i++) {
1023 if (in_vector(opencls[i].device_num, ignore_dev)) {
1024 opencls[i].is_used = COPROC_IGNORED;
1025 continue;
1026 }
1027 if (use_all || !opencl_compare(opencls[i], opencl_prop, true)) {
1028 instance_has_opencl[count] = true;
1029 device_nums[count++] = opencls[i].device_num;
1030 opencl_device_indexes[opencl_device_count] = opencls[i].opencl_device_index;
1031 opencl_device_ids[opencl_device_count++] = opencls[i].device_id;
1032 opencls[i].is_used = COPROC_USED;
1033 }
1034 }
1035 }
1036
fake_opencl_gpu(char * type)1037 void fake_opencl_gpu(char* type) {
1038 OPENCL_DEVICE_PROP op;
1039 op.clear();
1040 strcpy(op.name, type);
1041 strcpy(op.vendor, "ARM");
1042 op.vendor_id = 102760464;
1043 op.available = 1;
1044 op.half_fp_config = 63;
1045 op.single_fp_config = 63;
1046 op.double_fp_config = 63;
1047 op.endian_little = 1;
1048 op.execution_capabilities = 1;
1049 strcpy(op.extensions, "cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics cl_khr_local_int32_base_atomics cl_khr_local_int32_extended_atomics cl_khr_byte_addressable_store cl_khr_3d_image_writes cl_khr_fp64 cl_khr_int64_base_atomics cl_khr_int64_extended_atomics cl_khr_fp16 cl_khr_gl_sharing cl_khr_icd cl_khr_egl_event cl_khr_egl_image cl_khr_image2d_from_buffer cl_arm_core_id cl_arm_printf cl_arm_thread_limit_hint cl_arm_non_uniform_work_group_size cl_arm_import_memory");
1050 op.global_mem_size = 2086998016;
1051 op.local_mem_size = 32768;
1052 op.max_clock_frequency = 600;
1053 op.max_compute_units = 2;
1054 strcpy(op.opencl_platform_version, "OpenCL 1.2 v1.r14p0-01rel0.0fe2d25ca074016740f8ab3fb451b151");
1055 strcpy(op.opencl_device_version, "OpenCL 1.2 v1.r14p0-01rel0.0fe2d25ca074016740f8ab3fb451b151");
1056 strcpy(op.opencl_driver_version, "1.2");
1057 op.is_used = COPROC_USED;
1058 other_opencls.push_back(op);
1059 }
1060
1061 #ifdef __APPLE__
1062 // OpenCL returns incorrect total RAM size for some
1063 // ATI GPUs so we get that info from OpenGL on Macs
1064
1065 #include <OpenGL/OpenGL.h>
1066 #include <OpenGL/gl.h>
1067 #include <OpenGL/glu.h>
1068 #include <Carbon/Carbon.h>
1069 #include <IOKit/graphics/IOGraphicsLib.h>
1070
1071 static io_service_t IOServicePortFromCGDisplayID(CGDirectDisplayID displayID);
1072
opencl_get_ati_mem_size_from_opengl(vector<string> & warnings)1073 void COPROCS::opencl_get_ati_mem_size_from_opengl(vector<string>& warnings) {
1074 CGLRendererInfoObj info;
1075 long i, j;
1076 GLint numRenderers = 0, rv = 0, deviceVRAM, rendererID;
1077 cl_ulong deviceMemSize;
1078 CGLError theErr2 = kCGLNoError;
1079 CGLContextObj curr_ctx = CGLGetCurrentContext (); // save current CGL context
1080 int ati_gpu_index = 0;
1081 GLint rendererIDs[32];
1082 CFDataRef modelName[32];
1083 char opencl_name[256], iokit_name[256], buf[256];
1084 char *p;
1085
1086 if (log_flags.coproc_debug) {
1087
1088 for (i=0; i<32; ++i) {
1089 rendererIDs[i] = 0;
1090 modelName[i] = NULL;
1091
1092 CGOpenGLDisplayMask myMask = 1 << i;
1093 CGDirectDisplayID displayID = CGOpenGLDisplayMaskToDisplayID(myMask);
1094 theErr2 = CGLQueryRendererInfo(myMask, &info, &numRenderers);
1095 if ((displayID != kCGNullDirectDisplay) && (theErr2 == kCGLNoError)) {
1096 // Get the I/O Kit service port for the display
1097 // io_registry_entry_t dspPort = CGDisplayIOServicePort(displayID); // Deprecated in OS 10.9
1098 io_registry_entry_t dspPort = IOServicePortFromCGDisplayID(displayID);
1099
1100 for (j = 0; j < numRenderers; j++) {
1101 // find accelerated renderer (assume only one)
1102 CGLDescribeRenderer (info, j, kCGLRPAcceleratedCompute, &rv);
1103 if (true == rv) { // if openCL-capable
1104 // what is the renderer ID
1105 CGLDescribeRenderer (info, j, kCGLRPRendererID, &rendererIDs[i]);
1106 modelName[i] = (CFDataRef)IORegistryEntrySearchCFProperty(
1107 dspPort,
1108 kIOServicePlane, CFSTR("model"), kCFAllocatorDefault,
1109 kIORegistryIterateRecursively | kIORegistryIterateParents
1110 );
1111 }
1112 if (modelName[i] != NULL) break;
1113 }
1114 }
1115 }
1116 } // End if (log_flags.coproc_debug) {
1117
1118 theErr2 = CGLQueryRendererInfo( 0xffffffff, &info, &numRenderers);
1119 if (theErr2 == kCGLNoError) {
1120 CGLDescribeRenderer (info, 0, kCGLRPRendererCount, &numRenderers);
1121 for (i = 0; i < numRenderers; i++) {
1122 if (ati_gpu_index >= (int)ati_opencls.size()) {
1123 break;
1124 }
1125
1126 CGLDescribeRenderer (info, i, kCGLRPAcceleratedCompute, &rv);
1127 if (true == rv) { // if openCL-capable
1128 // what is the renderer ID
1129 CGLDescribeRenderer (info, i, kCGLRPRendererID, &rendererID);
1130 // what is the VRAM?
1131 CGLError notAvail = CGLDescribeRenderer (info, i, kCGLRPVideoMemoryMegabytes, &deviceVRAM);
1132 if (notAvail == kCGLNoError) {
1133 deviceMemSize = ((cl_ulong)deviceVRAM) * (1024L*1024L);
1134 } else { // kCGLRPVideoMemoryMegabytes is not available before OS 10.7
1135 #pragma clang diagnostic push
1136 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
1137 // kCGLRPVideoMemory=120 is deprecated in OS 10.7 and may not be
1138 // defined in later SDKs, so use a literal value here instead
1139 // CGLDescribeRenderer (info, i, kCGLRPVideoMemory, &deviceVRAM);
1140 CGLDescribeRenderer (info, i, (CGLRendererProperty)120, &deviceVRAM);
1141 deviceMemSize = deviceVRAM;
1142 #pragma clang diagnostic pop
1143 }
1144
1145 // build context and context specific info
1146 CGLPixelFormatAttribute attribs[] = {
1147 kCGLPFARendererID,
1148 (CGLPixelFormatAttribute)rendererID,
1149 kCGLPFAAllowOfflineRenderers,
1150 (CGLPixelFormatAttribute)0
1151 };
1152 CGLPixelFormatObj pixelFormat = NULL;
1153 GLint numPixelFormats = 0;
1154 CGLContextObj cglContext;
1155
1156 CGLChoosePixelFormat (attribs, &pixelFormat, &numPixelFormats);
1157 if (pixelFormat) {
1158 CGLCreateContext(pixelFormat, NULL, &cglContext);
1159 CGLDestroyPixelFormat (pixelFormat);
1160 CGLSetCurrentContext (cglContext);
1161 if (cglContext) {
1162 // get vendor string from renderer
1163 const GLubyte * strVend = glGetString (GL_VENDOR);
1164 if (is_AMD((char *)strVend)) {
1165 ati_opencls[ati_gpu_index].global_mem_size = deviceMemSize;
1166 ati_opencls[ati_gpu_index].opencl_available_ram = deviceMemSize;
1167
1168 if (log_flags.coproc_debug) {
1169 // For some GPUs, one API returns "ATI" but the other API returns
1170 // "AMD" in the model name, so we normalize both to "AMD"
1171 strlcpy(opencl_name, ati_opencls[ati_gpu_index].name, sizeof(opencl_name));
1172 if ((p = strstr(opencl_name, "ATI")) != NULL) {
1173 *++p='M';
1174 *++p='D';
1175 }
1176
1177 for (j=0; j<32; j++) {
1178 if ((rendererID == rendererIDs[j]) && (modelName[j] != NULL)) {
1179 break;
1180 }
1181 }
1182 if (j < 32) {
1183 strlcpy(iokit_name, (char *)CFDataGetBytePtr(modelName[j]), sizeof(iokit_name));
1184 if ((p = strstr(iokit_name, "ATI")) != NULL) {
1185 *++p='M';
1186 *++p='D';
1187 }
1188 if (strcmp(iokit_name, opencl_name)) {
1189 snprintf(buf, sizeof(buf),
1190 "opencl_get_ati_mem_size_from_opengl model name mismatch: %s vs %s\n",
1191 ati_opencls[ati_gpu_index].name, (char *)CFDataGetBytePtr(modelName[j])
1192 );
1193 warnings.push_back(buf);
1194 }
1195 } else {
1196 // Could not get model name from IOKit, so use renderer name
1197 const GLubyte * strRend = glGetString (GL_RENDERER);
1198 if (strRend != NULL) {
1199 strlcpy(iokit_name, (char *)strRend, sizeof(iokit_name));
1200 if ((p = strstr(iokit_name, "ATI")) != NULL) {
1201 *++p='M';
1202 *++p='D';
1203 }
1204 }
1205
1206 if ((strRend == NULL) ||
1207 (!strstr(iokit_name, opencl_name))) {
1208 snprintf(buf, sizeof(buf),
1209 "opencl_get_ati_mem_size_from_opengl model name to renderer mismatch: %s vs %s\n",
1210 strRend, ati_opencls[ati_gpu_index].name
1211 );
1212 warnings.push_back(buf);
1213 }
1214 }
1215 } // End if (log_flags.coproc_debug) {
1216
1217 ati_gpu_index++;
1218 } // End if ATI / AMD GPU
1219
1220 CGLDestroyContext (cglContext);
1221 } else {
1222 warnings.push_back(
1223 "opencl_get_ati_mem_size_from_opengl failed to create context\n"
1224 );
1225 }
1226 } else {
1227 warnings.push_back(
1228 "opencl_get_ati_mem_size_from_opengl failed to create PixelFormat\n"
1229 );
1230 }
1231 } // End if kCGLRPAcceleratedCompute attribute
1232 } // End loop: for (i = 0; i < numRenderers; i++)
1233 CGLDestroyRendererInfo (info);
1234 }
1235
1236 if (log_flags.coproc_debug) {
1237 for (j=0; j<32; j++) {
1238 if (modelName[j] != NULL) {
1239 CFRelease(modelName[j]);
1240 }
1241 }
1242 }
1243 CGLSetCurrentContext (curr_ctx); // restore current CGL context
1244 }
1245
1246
1247
1248 // The following replaces CGDisplayIOServicePort which is deprecated in OS 10.9
1249 //
1250 //========================================================================
1251 // GLFW 3.1 OS X - www.glfw.org
1252 //------------------------------------------------------------------------
1253 // Copyright (c) 2002-2006 Marcus Geelnard
1254 // Copyright (c) 2006-2010 Camilla Berglund <elmindreda@elmindreda.org>
1255 //
1256 // This software is provided 'as-is', without any express or implied
1257 // warranty. In no event will the authors be held liable for any damages
1258 // arising from the use of this software.
1259 //
1260 // Permission is granted to anyone to use this software for any purpose,
1261 // including commercial applications, and to alter it and redistribute it
1262 // freely, subject to the following restrictions:
1263 //
1264 // 1. The origin of this software must not be misrepresented; you must not
1265 // claim that you wrote the original software. If you use this software
1266 // in a product, an acknowledgment in the product documentation would
1267 // be appreciated but is not required.
1268 //
1269 // 2. Altered source versions must be plainly marked as such, and must not
1270 // be misrepresented as being the original software.
1271 //
1272 // 3. This notice may not be removed or altered from any source
1273 // distribution.
1274 //
1275 //========================================================================
1276
1277 // Returns the io_service_t corresponding to a CG display ID, or 0 on failure.
1278 // The io_service_t should be released with IOObjectRelease when not needed.
1279 //
1280
IOServicePortFromCGDisplayID(CGDirectDisplayID displayID)1281 static io_service_t IOServicePortFromCGDisplayID(CGDirectDisplayID displayID)
1282 {
1283 io_iterator_t iter;
1284 io_service_t serv, servicePort = 0;
1285
1286 CFMutableDictionaryRef matching = IOServiceMatching("IODisplayConnect");
1287
1288 // releases matching for us
1289 kern_return_t err = IOServiceGetMatchingServices(kIOMasterPortDefault,
1290 matching,
1291 &iter);
1292 if (err)
1293 return 0;
1294
1295 while ((serv = IOIteratorNext(iter)) != 0)
1296 {
1297 CFDictionaryRef info;
1298 CFIndex vendorID, productID, serialNumber;
1299 CFNumberRef vendorIDRef, productIDRef, serialNumberRef;
1300 Boolean success;
1301
1302 info = IODisplayCreateInfoDictionary(serv,
1303 kIODisplayOnlyPreferredName);
1304
1305 vendorIDRef = (CFNumberRef)CFDictionaryGetValue(info,
1306 CFSTR(kDisplayVendorID));
1307 productIDRef = (CFNumberRef)CFDictionaryGetValue(info,
1308 CFSTR(kDisplayProductID));
1309 serialNumberRef = (CFNumberRef)CFDictionaryGetValue(info,
1310 CFSTR(kDisplaySerialNumber));
1311
1312 success = CFNumberGetValue(vendorIDRef, kCFNumberCFIndexType,
1313 &vendorID);
1314 success &= CFNumberGetValue(productIDRef, kCFNumberCFIndexType,
1315 &productID);
1316 success &= CFNumberGetValue(serialNumberRef, kCFNumberCFIndexType,
1317 &serialNumber);
1318
1319 if (!success)
1320 {
1321 CFRelease(info);
1322 continue;
1323 }
1324 // If the vendor and product id along with the serial don't match
1325 // then we are not looking at the correct monitor.
1326 // NOTE: The serial number is important in cases where two monitors
1327 // are the exact same.
1328 if (CGDisplayVendorNumber(displayID) != vendorID ||
1329 CGDisplayModelNumber(displayID) != productID ||
1330 CGDisplaySerialNumber(displayID) != serialNumber)
1331 {
1332 CFRelease(info);
1333 continue;
1334 }
1335
1336 // The VendorID, Product ID, and the Serial Number all Match Up!
1337 // Therefore we have found the appropriate display io_service
1338 servicePort = serv;
1339 CFRelease(info);
1340 break;
1341 }
1342
1343 IOObjectRelease(iter);
1344 return servicePort;
1345 }
1346 #endif// __APPLE__
1347