1 // This file is part of BOINC.
2 // http://boinc.berkeley.edu
3 // Copyright (C) 2012 University of California
4 //
5 // BOINC is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU Lesser General Public License
7 // as published by the Free Software Foundation,
8 // either version 3 of the License, or (at your option) any later version.
9 //
10 // BOINC is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
13 // See the GNU Lesser General Public License for more details.
14 //
15 // You should have received a copy of the GNU Lesser General Public License
16 // along with BOINC.  If not, see <http://www.gnu.org/licenses/>.
17 
18 // Detection of GPUs using OpenCL
19 
20 #define TEST_OTHER_COPROC_LOGIC 0
21 
22 #ifdef _WIN32
23 #include "boinc_win.h"
24 #ifdef _MSC_VER
25 #define snprintf _snprintf
26 #endif
27 #else
28 #ifdef __APPLE__
29 // Suppress obsolete warning when building for OS 10.3.9
30 #define DLOPEN_NO_WARN
31 #include <mach-o/dyld.h>
32 #endif
33 #include "config.h"
34 #include <dlfcn.h>
35 #endif
36 
37 #include <vector>
38 #include <string>
39 
40 using std::vector;
41 using std::string;
42 
43 #include "coproc.h"
44 #include "str_replace.h"
45 #include "util.h"
46 
47 #include "client_msgs.h"
48 #include "client_state.h"
49 #include "gpu_detect.h"
50 
51 #ifdef _WIN32
52 
53 HMODULE opencl_lib = NULL;
54 
55 typedef cl_int (__stdcall *CL_PLATFORMIDS) (cl_uint, cl_platform_id*, cl_uint*);
56 typedef cl_int (__stdcall *CL_PLATFORMINFO) (cl_platform_id, cl_platform_info, size_t, void*, size_t*);
57 typedef cl_int (__stdcall *CL_DEVICEIDS)(cl_platform_id, cl_device_type, cl_uint, cl_device_id*, cl_uint*);
58 typedef cl_int (__stdcall *CL_INFO) (cl_device_id, cl_device_info, size_t, void*, size_t*);
59 
60 CL_PLATFORMIDS  p_clGetPlatformIDs = NULL;
61 CL_PLATFORMINFO p_clGetPlatformInfo = NULL;
62 CL_DEVICEIDS    p_clGetDeviceIDs = NULL;
63 CL_INFO         p_clGetDeviceInfo = NULL;
64 
65 #else
66 
67 void* opencl_lib = NULL;
68 
69 cl_int (*p_clGetPlatformIDs)(
70     cl_uint,         // num_entries,
71     cl_platform_id*, // platforms
72     cl_uint *        // num_platforms
73 );
74 cl_int (*p_clGetPlatformInfo)(
75     cl_platform_id,  // platform
76     cl_platform_info, // param_name
77     size_t,          // param_value_size
78     void*,           // param_value
79     size_t*          // param_value_size_ret
80 );
81 cl_int (*p_clGetDeviceIDs)(
82     cl_platform_id,  // platform
83     cl_device_type,  // device_type
84     cl_uint,         // num_entries
85     cl_device_id*,   // devices
86     cl_uint*         // num_devices
87 );
88 cl_int (*p_clGetDeviceInfo)(
89     cl_device_id,    // device
90     cl_device_info,  // param_name
91     size_t,          // param_value_size
92     void*,           // param_value
93     size_t*          // param_value_size_ret
94 );
95 
96 #endif
97 
is_AMD(char * vendor)98 static bool is_AMD(char *vendor) {
99     if (strstr(vendor, "ATI")) return true;
100     if (strstr(vendor, "AMD")) return true;
101     if (strstr(vendor, "Advanced Micro Devices, Inc.")) return true;
102     return false;
103 }
104 
is_NVIDIA(char * vendor)105 static bool is_NVIDIA(char* vendor) {
106     if (strstr(vendor, "NVIDIA")) return true;
107     return false;
108 }
109 
is_intel(char * vendor)110 static bool is_intel(char* vendor) {
111     if (strcasestr(vendor, "intel")) return true;
112     return false;
113 }
114 
115 // If "loose", tolerate small diff
116 //
opencl_compare(OPENCL_DEVICE_PROP & c1,OPENCL_DEVICE_PROP & c2,bool loose)117 static int opencl_compare(OPENCL_DEVICE_PROP& c1, OPENCL_DEVICE_PROP& c2, bool loose) {
118     if (c1.opencl_device_version_int > c2.opencl_device_version_int) return 1;
119     if (c1.opencl_device_version_int < c2.opencl_device_version_int) return -1;
120     if (loose) {
121         if (c1.global_mem_size > 1.4*c2.global_mem_size) return 1;
122         if (c1.global_mem_size < .7*c2.global_mem_size) return -1;
123         return 0;
124     }
125     if (c1.global_mem_size > c2.global_mem_size) return 1;
126     if (c1.global_mem_size < c2.global_mem_size) return -1;
127     if (c1.peak_flops > c2.peak_flops) return 1;
128     if (c1.peak_flops < c2.peak_flops) return -1;
129     return 0;
130 }
131 
132 #ifdef __APPLE__
compare_pci_slots(int NVIDIA_GPU_Index1,int NVIDIA_GPU_Index2)133 static bool compare_pci_slots(int NVIDIA_GPU_Index1, int NVIDIA_GPU_Index2) {
134     if (NVIDIA_GPU_Index1 >= (int)nvidia_gpus.size()) return false;  // Should never happen
135     if (NVIDIA_GPU_Index2 >= (int)nvidia_gpus.size()) return false;  // Should never happen
136     return (
137         nvidia_gpus[NVIDIA_GPU_Index1].pci_info.bus_id <
138                 nvidia_gpus[NVIDIA_GPU_Index2].pci_info.bus_id
139     );
140 }
141 #endif
142 
143 
144 // OpenCL interfaces are documented here:
145 // http://www.khronos.org/registry/cl/sdk/1.0/docs/man/xhtml/ and
146 // http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/
147 
get_opencl(vector<string> & warnings)148 void COPROCS::get_opencl(
149     vector<string>& warnings
150 ) {
151     cl_int ciErrNum;
152     cl_platform_id platforms[MAX_OPENCL_PLATFORMS];
153     cl_uint num_platforms, platform_index, num_devices, device_index;
154     cl_device_id devices[MAX_COPROC_INSTANCES];
155     char platform_version[256];
156     char platform_vendor[256];
157     char buf[256];
158     OPENCL_DEVICE_PROP prop;
159     int current_CUDA_index;
160     int current_CAL_index;
161     int min_CAL_target;
162     int num_CAL_devices = (int)ati_gpus.size();
163     vector<int>devnums_pci_slot_sort;
164     vector<OPENCL_DEVICE_PROP>::iterator it;
165     int max_other_coprocs = MAX_RSC-1;  // coprocs[0] is reserved for CPU
166 
167     if (cc_config.no_opencl) {
168         return;
169     }
170 
171 #ifdef _WIN32
172     opencl_lib = LoadLibrary("OpenCL.dll");
173     if (!opencl_lib) {
174         warnings.push_back("No OpenCL library found");
175         return;
176     }
177 
178     p_clGetPlatformIDs = (CL_PLATFORMIDS)GetProcAddress( opencl_lib, "clGetPlatformIDs" );
179     p_clGetPlatformInfo = (CL_PLATFORMINFO)GetProcAddress( opencl_lib, "clGetPlatformInfo" );
180     p_clGetDeviceIDs = (CL_DEVICEIDS)GetProcAddress( opencl_lib, "clGetDeviceIDs" );
181     p_clGetDeviceInfo = (CL_INFO)GetProcAddress( opencl_lib, "clGetDeviceInfo" );
182 #else
183 #ifdef __APPLE__
184     opencl_lib = dlopen("/System/Library/Frameworks/OpenCL.framework/Versions/Current/OpenCL", RTLD_NOW);
185 #else
186     opencl_lib = dlopen("libOpenCL.so", RTLD_NOW);
187     if (!opencl_lib) {
188         opencl_lib = dlopen("libOpenCL.so.1", RTLD_NOW);
189     }
190 #endif
191     if (!opencl_lib) {
192         sprintf(buf, "OpenCL: %s", dlerror());
193         warnings.push_back(buf);
194         return;
195     }
196     p_clGetPlatformIDs = (cl_int(*)(cl_uint, cl_platform_id*, cl_uint*)) dlsym( opencl_lib, "clGetPlatformIDs" );
197     p_clGetPlatformInfo = (cl_int(*)(cl_platform_id, cl_platform_info, size_t, void*, size_t*)) dlsym( opencl_lib, "clGetPlatformInfo" );
198     p_clGetDeviceIDs = (cl_int(*)(cl_platform_id, cl_device_type, cl_uint, cl_device_id*, cl_uint*)) dlsym( opencl_lib, "clGetDeviceIDs" );
199     p_clGetDeviceInfo = (cl_int(*)(cl_device_id, cl_device_info, size_t, void*, size_t*)) dlsym( opencl_lib, "clGetDeviceInfo" );
200 #endif
201 
202     if (!p_clGetPlatformIDs) {
203         warnings.push_back("clGetPlatformIDs() missing from OpenCL library");
204         goto leave;
205     }
206     if (!p_clGetPlatformInfo) {
207         warnings.push_back("clGetPlatformInfo() missing from OpenCL library");
208         goto leave;
209     }
210     if (!p_clGetDeviceIDs) {
211         warnings.push_back("clGetDeviceIDs() missing from OpenCL library");
212         goto leave;
213     }
214     if (!p_clGetDeviceInfo) {
215         warnings.push_back("clGetDeviceInfo() missing from OpenCL library");
216         goto leave;
217     }
218 
219     ciErrNum = (*p_clGetPlatformIDs)(MAX_OPENCL_PLATFORMS, platforms, &num_platforms);
220     if ((ciErrNum != CL_SUCCESS) || (num_platforms == 0)) {
221         warnings.push_back("clGetPlatformIDs() failed to return any OpenCL platforms");
222         goto leave;
223     }
224 
225     if (nvidia_gpus.size()) {
226         for (int i=0; i<(int)nvidia_gpus.size(); ++i) {
227             devnums_pci_slot_sort.push_back(i);
228         }
229 #ifdef __APPLE__
230         std::stable_sort(
231             devnums_pci_slot_sort.begin(),
232             devnums_pci_slot_sort.end(),
233             compare_pci_slots
234         );
235 #endif
236     }
237 
238     for (platform_index=0; platform_index<num_platforms; ++platform_index) {
239         ciErrNum = (*p_clGetPlatformInfo)(
240             platforms[platform_index], CL_PLATFORM_VERSION,
241             sizeof(platform_version), &platform_version, NULL
242         );
243         if (ciErrNum != CL_SUCCESS) {
244             snprintf(buf, sizeof(buf),
245                 "Couldn't get PLATFORM_VERSION for platform #%d; error %d",
246                 platform_index, ciErrNum
247             );
248             warnings.push_back(buf);
249             continue;
250         }
251 
252         ciErrNum = (*p_clGetPlatformInfo)(
253             platforms[platform_index], CL_PLATFORM_VENDOR,
254             sizeof(platform_vendor), &platform_vendor, NULL
255         );
256         if (ciErrNum != CL_SUCCESS) {
257             snprintf(buf, sizeof(buf),
258                 "Couldn't get PLATFORM_VENDOR for platform #%d; error %d",
259                 platform_index, ciErrNum
260             );
261             warnings.push_back(buf);
262         }
263 
264         //////////// CPU //////////////
265 
266         ciErrNum = (*p_clGetDeviceIDs)(
267             platforms[platform_index], (CL_DEVICE_TYPE_CPU),
268             MAX_COPROC_INSTANCES, devices, &num_devices
269         );
270 
271         if ((ciErrNum != CL_SUCCESS) && (num_devices != 0)) {
272             num_devices = 0;                 // No devices
273             if (ciErrNum != CL_DEVICE_NOT_FOUND) {
274                 snprintf(buf, sizeof(buf),
275                     "Couldn't get CPU Device IDs for platform #%d: error %d",
276                     platform_index, ciErrNum
277                 );
278                 warnings.push_back(buf);
279             }
280         }
281 
282         for (device_index=0; device_index<num_devices; ++device_index) {
283             memset(&prop, 0, sizeof(prop));
284             prop.device_id = devices[device_index];
285             strlcpy(
286                 prop.opencl_platform_version, platform_version,
287                 sizeof(prop.opencl_platform_version)
288             );
289 
290             ciErrNum = get_opencl_info(prop, device_index, warnings);
291             if (ciErrNum != CL_SUCCESS) continue;
292 
293             prop.is_used = COPROC_UNUSED;
294             prop.get_device_version_int();
295 
296             OPENCL_CPU_PROP c;
297             strlcpy(c.platform_vendor, platform_vendor, sizeof(c.platform_vendor));
298             c.opencl_prop = prop;
299             cpu_opencls.push_back(c);
300         }
301 
302         //////////// GPUs and Accelerators //////////////
303 
304         ciErrNum = (*p_clGetDeviceIDs)(
305             platforms[platform_index],
306             (CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_ACCELERATOR),
307             MAX_COPROC_INSTANCES, devices, &num_devices
308         );
309 
310         if (ciErrNum == CL_DEVICE_NOT_FOUND) continue;  // No devices
311         if (num_devices == 0) continue;                 // No devices
312 
313         if (ciErrNum != CL_SUCCESS) {
314             snprintf(buf, sizeof(buf),
315                 "Couldn't get Device IDs for platform #%d: error %d",
316                 platform_index, ciErrNum
317             );
318             warnings.push_back(buf);
319             continue;
320         }
321 
322         // Mac OpenCL does not recognize all NVIDIA GPUs returned by CUDA
323         // Fortunately, CUDA and OpenCL return the same GPU model name on
324         // the Mac, so we can use this to match OpenCL devices with CUDA.
325         //
326         current_CUDA_index = 0;
327 
328         // ATI/AMD OpenCL does not always recognize all GPUs returned by CAL.
329         // This is complicated for several reasons:
330         // * CAL returns only an enum (CALtargetEnum) for the GPU's family,
331         //   not specific model information.
332         // * OpenCL return only the GPU family name
333         // * Which GPUs support OpenCL varies with different versions of the
334         //   AMD Catalyst drivers.
335         //
336         // To deal with this, we make some (probably imperfect) assumptions:
337         // * AMD drivers eliminate OpenCL support for older GPU families first.
338         // * Lower values of CALtargetEnum represent older GPU families.
339         // * All ATI/AMD GPUs reported by OpenCL are also reported by CAL (on
340         //   systems where CAL is available) though the converse may not be true.
341         //
342         current_CAL_index = 0;
343         min_CAL_target = 0;
344         if (is_AMD(platform_vendor) && (num_CAL_devices > 0)) {
345             while (1) {
346                 int numToMatch = 0;
347                 for (int i=0; i<num_CAL_devices; ++i) {
348                     if ((int)ati_gpus[i].attribs.target >= min_CAL_target) {
349                         ++numToMatch;
350                     }
351                 }
352                 if (numToMatch == (int)num_devices) break;
353                 if (numToMatch < (int)num_devices) {
354                     warnings.push_back(
355                         "Could not match ATI OpenCL and CAL GPUs: ignoring CAL."
356                     );
357                     // If we can't match ATI OpenCL and CAL GPUs, ignore CAL
358                     // and keep OpenCL because AMD has deprecated CAL.
359                     ati_gpus.clear();
360                     ati.have_cal = false;
361                     num_CAL_devices = 0;
362                     break;
363                 }
364                 ++min_CAL_target;
365             }
366         }
367 
368         for (device_index=0; device_index<num_devices; ++device_index) {
369             memset(&prop, 0, sizeof(prop));
370             prop.device_id = devices[device_index];
371             strlcpy(
372                 prop.opencl_platform_version, platform_version,
373                 sizeof(prop.opencl_platform_version)
374             );
375 
376 //TODO: Should we store the platform(s) for each GPU found?
377 //TODO: Must we check if multiple platforms found the same GPU and merge the records?
378             ciErrNum = get_opencl_info(prop, device_index, warnings);
379             if (ciErrNum != CL_SUCCESS) continue;
380 
381 // TODO: Eliminate this, or improve it
382 #if TEST_OTHER_COPROC_LOGIC
383             if (is_NVIDIA(prop.vendor)) {
384                 safe_strcpy(prop.vendor, "FAKE VENDOR X");
385             } else if (is_AMD(prop.vendor)) {
386                 safe_strcpy(prop.vendor, "FAKE VENDOR Y");
387             } else {
388                 safe_strcpy(prop.vendor, "FAKE VENDOR Z");
389             }
390 #endif
391 
392             prop.is_used = COPROC_UNUSED;
393             prop.get_device_version_int();
394 
395             //////////// NVIDIA //////////////
396             if (is_NVIDIA(prop.vendor)) {
397                 bool cuda_match_found = false;
398                 if (nvidia.have_cuda) {
399                     // Mac OpenCL does not recognize all NVIDIA GPUs returned by
400                     // CUDA but we assume that OpenCL and CUDA return devices
401                     // with identical model name strings and that OpenCL returns
402                     // devices in order of acending PCI slot.
403                     //
404                     // On other systems, assume OpenCL and CUDA return devices
405                     // in the same order.
406                     //
407                     int saved_CUDA_index = current_CUDA_index;
408 
409                     while (1) {
410                         if (current_CUDA_index >= (int)(nvidia_gpus.size())) {
411                             snprintf(buf, sizeof(buf),
412                                 "OpenCL NVIDIA index #%d does not match any CUDA device",
413                                 device_index
414                             );
415                             warnings.push_back(buf);
416                             // Newer versions of CUDA driver don't support older NVIDIA GPUs
417                             if (nvidia.cuda_version >= 6050) {
418                                 prop.device_num = (int)(nvidia_opencls.size());
419                                 current_CUDA_index = saved_CUDA_index;
420                                 prop.warn_bad_cuda = true;
421                                 break;
422                             } else {
423                                 // Older CUDA drivers should report all NVIDIA GPUs reported by OpenCL
424                                 goto leave; // Should never happen
425                             }
426                         }
427                         if (!strcmp(prop.name,
428                             nvidia_gpus[devnums_pci_slot_sort[current_CUDA_index]].prop.name)
429                             ) {
430                             cuda_match_found = true;
431                             prop.device_num = devnums_pci_slot_sort[current_CUDA_index];
432                             break;  // We have a match
433                         }
434                         // This CUDA GPU is not recognized by OpenCL,
435                         // so try the next
436                         //
437                         ++current_CUDA_index;
438                     }
439                 } else {
440                     prop.device_num = (int)(nvidia_opencls.size());
441                 }
442                 prop.opencl_device_index = device_index;
443 
444                 if (cuda_match_found) {
445                     prop.peak_flops = nvidia_gpus[prop.device_num].peak_flops;
446                 } else {
447                     COPROC_NVIDIA c;
448                     c.opencl_prop = prop;
449                     c.set_peak_flops();
450                     prop.peak_flops = c.peak_flops;
451                 }
452                 if (cuda_match_found) {
453                     // Assumes OpenCL device_num and CUDA device_num now match
454                     //
455                     prop.opencl_available_ram = nvidia_gpus[prop.device_num].available_ram;
456                 } else {
457                     prop.opencl_available_ram = prop.global_mem_size;
458                 }
459 
460                 // Build nvidia_opencls vector in device_num order
461                 for (it=nvidia_opencls.begin(); it != nvidia_opencls.end(); ++it) {
462                     if (it->device_num > prop.device_num) break;
463                 }
464                 nvidia_opencls.insert(it, prop);
465 
466                 if (cuda_match_found) ++current_CUDA_index;
467             }
468 
469             //////////// AMD / ATI //////////////
470             else if (is_AMD(prop.vendor)) {
471                 prop.opencl_device_index = device_index;
472 
473                 if (ati.have_cal) {
474                     // AMD OpenCL does not recognize all AMD GPUs returned by
475                     // CAL but we assume that OpenCL and CAL return devices in
476                     // the same order.  See additional comments earlier in
477                     // this source file for more details.
478                     //
479                     while (1) {
480                         if (current_CAL_index >= num_CAL_devices) {
481                             snprintf(buf, sizeof(buf),
482                                 "OpenCL ATI device #%d does not match any CAL device",
483                                 device_index
484                             );
485                             warnings.push_back(buf);
486                             goto leave; // Should never happen
487                         }
488                         if ((int)ati_gpus[current_CAL_index].attribs.target >= min_CAL_target) {
489                             break;  // We have a match
490                         }
491                         // This CAL GPU is not recognized by OpenCL,
492                         // so try the next
493                         //
494                         ++current_CAL_index;
495                     }
496                     prop.device_num = current_CAL_index++;
497 
498                     // Always use GPU model name from CAL if
499                     // available for ATI / AMD  GPUs because
500                     // (we believe) it is more user-friendly.
501                     //
502                     safe_strcpy(prop.name, ati_gpus[prop.device_num].name);
503 
504                     // Work around a bug in OpenCL which returns only
505                     // 1/2 of total global RAM size: use the value from CAL.
506                     // This bug applies only to ATI GPUs, not to NVIDIA
507                     // See also further workaround code for Macs.
508                     //
509                     prop.global_mem_size = ati_gpus[prop.device_num].attribs.localRAM * MEGA;
510                     prop.peak_flops = ati_gpus[prop.device_num].peak_flops;
511                 } else {            // ! ati.have_cal
512                     prop.device_num = (int)(ati_opencls.size());
513                     COPROC_ATI c;
514                     c.opencl_prop = prop;
515                     c.set_peak_flops();
516                     prop.peak_flops = c.peak_flops;
517                 }
518 
519                 if (ati_gpus.size()) {
520                     prop.opencl_available_ram = ati_gpus[prop.device_num].available_ram;
521                 } else {
522                     prop.opencl_available_ram = prop.global_mem_size;
523                 }
524                 ati_opencls.push_back(prop);
525             }
526 
527             //////////// INTEL GPU //////////////
528             else if (is_intel(prop.vendor)) {
529                 prop.device_num = (int)(intel_gpu_opencls.size());
530                 prop.opencl_device_index = device_index;
531 
532                 COPROC_INTEL c;
533                 c.opencl_prop = prop;
534                 c.is_used = COPROC_UNUSED;
535                 c.available_ram = prop.global_mem_size;
536                 safe_strcpy(c.name, prop.name);
537                 safe_strcpy(c.version, prop.opencl_driver_version);
538 
539                 c.set_peak_flops();
540                 prop.peak_flops = c.peak_flops;
541                 prop.opencl_available_ram = prop.global_mem_size;
542 
543                 intel_gpu_opencls.push_back(prop);
544 
545                 // At present Intel GPUs only support OpenCL
546                 // and do not have a native GPGPU framework,
547                 // so treat each detected Intel OpenCL GPU device as
548                 // a native device.
549                 //
550                 intel_gpus.push_back(c);
551             } else {
552                 //////////// OTHER GPU OR ACCELERATOR //////////////
553                 // Put each coprocessor instance into a separate other_opencls element
554 
555                 // opencl_device_index is passed to project apps via init_data.xml
556                 // to differentiate among OpenCL devices from the same vendor. It is
557                 // used by boinc_get_opencl_ids() to select the correct OpenCL device.
558                 int opencl_device_index = 0;
559                 for (unsigned int coproc_index=0; coproc_index<other_opencls.size(); coproc_index++) {
560                     if (!strcmp(other_opencls[coproc_index].vendor, prop.vendor)) {
561                         opencl_device_index++;  // Another OpenCL device from same vendor
562                     }
563                 }
564 
565                 prop.device_num = 0;    // Each vector entry has only one device
566                 prop.opencl_device_index = opencl_device_index;
567                 prop.opencl_available_ram = prop.global_mem_size;
568                 prop.is_used = COPROC_USED;
569 
570                 // TODO: Find a better way to calculate / estimate peak_flops for future coprocessors?
571                 prop.peak_flops = 0;
572                 if (prop.max_compute_units) {
573                     prop.peak_flops = prop.max_compute_units * prop.max_clock_frequency * MEGA;
574                 }
575                 if (prop.peak_flops <= 0) prop.peak_flops = 45e9;
576 
577                 other_opencls.push_back(prop);
578             }
579         }
580     }
581 
582     // Neither nvidia.count, ati.count nor intel_gpu.count have been set yet,
583     // so we can't test have_nvidia(), have_ati() or have_intel_gpu() here.
584     //
585     if ((nvidia_opencls.size() > 0) || nvidia.have_cuda) max_other_coprocs--;
586     if ((ati_opencls.size() > 0) || ati.have_cal) max_other_coprocs--;
587     if (intel_gpu_opencls.size() > 0) max_other_coprocs--;
588     if ((int)other_opencls.size() > max_other_coprocs) {
589         warnings.push_back("Too many OpenCL device types found");
590     }
591 
592 
593 #ifdef __APPLE__
594     // Work around a bug in OpenCL which returns only
595     // 1/2 of total global RAM size.
596     // This bug applies only to ATI GPUs, not to NVIDIA
597     // This has already been fixed on latest Catalyst
598     // drivers, but Mac does not use Catalyst drivers.
599     if (ati_opencls.size() > 0) {
600         opencl_get_ati_mem_size_from_opengl(warnings);
601     }
602 #endif
603 
604     if ((nvidia_opencls.size() == 0) &&
605         (ati_opencls.size() == 0) &&
606         (intel_gpu_opencls.size() == 0) &&
607         (cpu_opencls.size() == 0) &&
608         (other_opencls.size() == 0)
609     ) {
610         warnings.push_back(
611             "OpenCL library present but no OpenCL-capable devices found"
612         );
613     }
614 leave:
615 #ifdef _WIN32
616     if (opencl_lib) FreeLibrary(opencl_lib);
617 #else
618     if (opencl_lib) dlclose(opencl_lib);
619 #endif
620 }
621 
correlate_opencl(bool use_all,IGNORE_GPU_INSTANCE & ignore_gpu_instance)622 void COPROCS::correlate_opencl(
623     bool use_all,
624     IGNORE_GPU_INSTANCE& ignore_gpu_instance
625 ) {
626     if (nvidia_opencls.size() > 0) {
627         if (nvidia.have_cuda) { // If CUDA already found the "best" NVIDIA GPU
628             nvidia.merge_opencl(
629                 nvidia_opencls, ignore_gpu_instance[PROC_TYPE_NVIDIA_GPU]
630             );
631         } else {
632             nvidia.find_best_opencls(
633                 use_all, nvidia_opencls, ignore_gpu_instance[PROC_TYPE_NVIDIA_GPU]
634             );
635             nvidia.prop.totalGlobalMem = nvidia.opencl_prop.global_mem_size;
636             nvidia.available_ram = nvidia.opencl_prop.global_mem_size;
637             nvidia.prop.clockRate = nvidia.opencl_prop.max_clock_frequency * 1000;
638             safe_strcpy(nvidia.prop.name, nvidia.opencl_prop.name);
639         }
640     }
641 
642     if (ati_opencls.size() > 0) {
643         if (ati.have_cal) { // If CAL already found the "best" CAL GPU
644             ati.merge_opencl(ati_opencls, ignore_gpu_instance[PROC_TYPE_AMD_GPU]);
645         } else {
646             ati.find_best_opencls(use_all, ati_opencls, ignore_gpu_instance[PROC_TYPE_AMD_GPU]);
647             ati.attribs.localRAM = ati.opencl_prop.global_mem_size/MEGA;
648             ati.available_ram = ati.opencl_prop.global_mem_size;
649             ati.attribs.engineClock = ati.opencl_prop.max_clock_frequency;
650             safe_strcpy(ati.name, ati.opencl_prop.name);
651         }
652     }
653 
654     if (intel_gpu_opencls.size() > 0) {
655         intel_gpu.find_best_opencls(use_all, intel_gpu_opencls, ignore_gpu_instance[PROC_TYPE_INTEL_GPU]);
656         intel_gpu.available_ram = intel_gpu.opencl_prop.global_mem_size;
657         safe_strcpy(intel_gpu.name, intel_gpu.opencl_prop.name);
658     }
659 }
660 
get_opencl_info(OPENCL_DEVICE_PROP & prop,cl_uint device_index,vector<string> & warnings)661 cl_int COPROCS::get_opencl_info(
662     OPENCL_DEVICE_PROP& prop,
663     cl_uint device_index,
664     vector<string>&warnings
665 ) {
666     cl_int ciErrNum;
667     char buf[256];
668 
669     ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_NAME, sizeof(prop.name), prop.name, NULL);
670     if ((ciErrNum != CL_SUCCESS) || (prop.name[0] == 0)) {
671         snprintf(buf, sizeof(buf),
672             "clGetDeviceInfo failed to get name for device %d",
673             (int)device_index
674         );
675         warnings.push_back(buf);
676         return ciErrNum;
677     }
678 
679     ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_VENDOR, sizeof(prop.vendor), prop.vendor, NULL);
680     if ((ciErrNum != CL_SUCCESS) || (prop.vendor[0] == 0)) {
681         snprintf(buf, sizeof(buf),
682             "clGetDeviceInfo failed to get vendor for device %d",
683             (int)device_index
684         );
685         warnings.push_back(buf);
686         return ciErrNum;
687     }
688 
689     ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_VENDOR_ID, sizeof(prop.vendor_id), &prop.vendor_id, NULL);
690     if (ciErrNum != CL_SUCCESS) {
691         snprintf(buf, sizeof(buf),
692             "clGetDeviceInfo failed to get vendor ID for device %d",
693             (int)device_index
694         );
695         warnings.push_back(buf);
696         return ciErrNum;
697     }
698 
699     ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_AVAILABLE, sizeof(prop.available), &prop.available, NULL);
700     if (ciErrNum != CL_SUCCESS) {
701         snprintf(buf, sizeof(buf),
702             "clGetDeviceInfo failed to get availability for device %d",
703             (int)device_index
704         );
705         warnings.push_back(buf);
706         return ciErrNum;
707     }
708 
709     ciErrNum = (*p_clGetDeviceInfo)(
710         prop.device_id, CL_DEVICE_HALF_FP_CONFIG,
711         sizeof(prop.half_fp_config), &prop.half_fp_config, NULL
712     );
713     if (ciErrNum != CL_SUCCESS) {
714         if ((ciErrNum == CL_INVALID_VALUE) || (ciErrNum == CL_INVALID_OPERATION)) {
715             prop.half_fp_config = 0;  // Not supported by OpenCL 1.0
716         } else {
717             snprintf(buf, sizeof(buf),
718                 "clGetDeviceInfo failed to get half-precision floating point capabilities for device %d",
719                 (int)device_index
720             );
721             warnings.push_back(buf);
722             return ciErrNum;
723         }
724     }
725 
726     ciErrNum = (*p_clGetDeviceInfo)(
727         prop.device_id, CL_DEVICE_SINGLE_FP_CONFIG,
728         sizeof(prop.single_fp_config), &prop.single_fp_config, NULL
729     );
730     if (ciErrNum != CL_SUCCESS) {
731         snprintf(buf, sizeof(buf),
732             "clGetDeviceInfo failed to get single-precision floating point capabilities for device %d",
733             (int)device_index
734         );
735         warnings.push_back(buf);
736         return ciErrNum;
737     }
738 
739     ciErrNum = (*p_clGetDeviceInfo)(
740         prop.device_id, CL_DEVICE_DOUBLE_FP_CONFIG,
741         sizeof(prop.double_fp_config), &prop.double_fp_config, NULL
742     );
743     if (ciErrNum != CL_SUCCESS) {
744         if ((ciErrNum == CL_INVALID_VALUE) || (ciErrNum == CL_INVALID_OPERATION)) {
745             prop.double_fp_config = 0;  // Not supported by OpenCL 1.0
746         } else {
747             snprintf(buf, sizeof(buf),
748                 "clGetDeviceInfo failed to get double-precision floating point capabilities for device %d",
749                 (int)device_index
750             );
751             warnings.push_back(buf);
752             return ciErrNum;
753         }
754     }
755 
756     ciErrNum = (*p_clGetDeviceInfo)(
757         prop.device_id, CL_DEVICE_ENDIAN_LITTLE, sizeof(prop.endian_little),
758         &prop.endian_little, NULL
759     );
760     if (ciErrNum != CL_SUCCESS) {
761         snprintf(buf, sizeof(buf),
762             "clGetDeviceInfo failed to get little or big endian for device %d",
763             (int)device_index
764         );
765         warnings.push_back(buf);
766         return ciErrNum;
767     }
768 
769     ciErrNum = (*p_clGetDeviceInfo)(
770         prop.device_id, CL_DEVICE_EXECUTION_CAPABILITIES,
771         sizeof(prop.execution_capabilities), &prop.execution_capabilities, NULL
772     );
773     if (ciErrNum != CL_SUCCESS) {
774         snprintf(buf, sizeof(buf),
775             "clGetDeviceInfo failed to get execution capabilities for device %d",
776             (int)device_index
777         );
778         warnings.push_back(buf);
779         return ciErrNum;
780     }
781 
782     ciErrNum = (*p_clGetDeviceInfo)(
783         prop.device_id, CL_DEVICE_EXTENSIONS, sizeof(prop.extensions),
784         prop.extensions, NULL
785     );
786     if (ciErrNum != CL_SUCCESS) {
787         snprintf(buf, sizeof(buf),
788             "clGetDeviceInfo failed to get device extensions for device %d",
789             (int)device_index
790         );
791         warnings.push_back(buf);
792         return ciErrNum;
793     }
794 
795     ciErrNum = (*p_clGetDeviceInfo)(
796         prop.device_id, CL_DEVICE_GLOBAL_MEM_SIZE,
797         sizeof(prop.global_mem_size), &prop.global_mem_size, NULL
798     );
799     if (ciErrNum != CL_SUCCESS) {
800         snprintf(buf, sizeof(buf),
801             "clGetDeviceInfo failed to get global memory size for device %d",
802             (int)device_index
803         );
804         warnings.push_back(buf);
805         return ciErrNum;
806     }
807 
808     ciErrNum = (*p_clGetDeviceInfo)(
809         prop.device_id, CL_DEVICE_LOCAL_MEM_SIZE,
810         sizeof(prop.local_mem_size), &prop.local_mem_size, NULL
811     );
812     if (ciErrNum != CL_SUCCESS) {
813         snprintf(buf, sizeof(buf),
814             "clGetDeviceInfo failed to get local memory size for device %d",
815             (int)device_index
816         );
817         warnings.push_back(buf);
818         return ciErrNum;
819     }
820 
821     ciErrNum = (*p_clGetDeviceInfo)(
822         prop.device_id, CL_DEVICE_MAX_CLOCK_FREQUENCY,
823         sizeof(prop.max_clock_frequency), &prop.max_clock_frequency, NULL
824     );
825     if (ciErrNum != CL_SUCCESS) {
826         snprintf(buf, sizeof(buf),
827             "clGetDeviceInfo failed to get max clock frequency for device %d",
828             (int)device_index
829         );
830         warnings.push_back(buf);
831         return ciErrNum;
832     }
833 
834     ciErrNum = (*p_clGetDeviceInfo)(
835         prop.device_id, CL_DEVICE_MAX_COMPUTE_UNITS,
836         sizeof(prop.max_compute_units), &prop.max_compute_units, NULL
837     );
838     if (ciErrNum != CL_SUCCESS) {
839         snprintf(buf, sizeof(buf),
840             "clGetDeviceInfo failed to get max compute units for device %d",
841             (int)device_index
842         );
843         warnings.push_back(buf);
844         return ciErrNum;
845     }
846 
847     ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_VERSION, sizeof(prop.opencl_device_version), prop.opencl_device_version, NULL);
848     if (ciErrNum != CL_SUCCESS) {
849         snprintf(buf, sizeof(buf),
850             "clGetDeviceInfo failed to get OpenCL version supported by device %d",
851             (int)device_index
852         );
853         warnings.push_back(buf);
854         return ciErrNum;
855     }
856 
857     ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DRIVER_VERSION, sizeof(prop.opencl_driver_version), prop.opencl_driver_version, NULL);
858     if (ciErrNum != CL_SUCCESS) {
859         snprintf(buf, sizeof(buf),
860             "clGetDeviceInfo failed to get OpenCL driver version for device %d",
861             (int)device_index
862         );
863         warnings.push_back(buf);
864         return ciErrNum;
865     }
866 
867     // Nvidia Specific Extensions
868     if (strstr(prop.extensions, "cl_nv_device_attribute_query") != NULL) {
869 
870         ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, sizeof(prop.nv_compute_capability_major), &prop.nv_compute_capability_major, NULL);
871         if (ciErrNum != CL_SUCCESS) {
872             snprintf(buf, sizeof(buf),
873                 "clGetDeviceInfo failed to get CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV for device %d",
874                 (int)device_index
875             );
876             warnings.push_back(buf);
877             return ciErrNum;
878         }
879 
880         ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, sizeof(prop.nv_compute_capability_minor), &prop.nv_compute_capability_minor, NULL);
881         if (ciErrNum != CL_SUCCESS) {
882             snprintf(buf, sizeof(buf),
883                 "clGetDeviceInfo failed to get CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV for device %d",
884                 (int)device_index
885             );
886             warnings.push_back(buf);
887             return ciErrNum;
888         }
889 
890     }
891 
892     // AMD Specific Extensions
893     if (strstr(prop.extensions, "cl_amd_device_attribute_query") != NULL) {
894 
895         ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_BOARD_NAME_AMD, sizeof(buf), buf, NULL);
896         if (strlen(buf) && ciErrNum == CL_SUCCESS) {
897             safe_strcpy(prop.name, buf);
898         } else if (ciErrNum != CL_SUCCESS) {
899             snprintf(buf, sizeof(buf),
900                 "clGetDeviceInfo failed to get AMD Board Name for device %d",
901                 (int)device_index
902             );
903             warnings.push_back(buf);
904             return ciErrNum;
905         }
906 
907         ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD, sizeof(prop.amd_simd_per_compute_unit), &prop.amd_simd_per_compute_unit, NULL);
908         if (ciErrNum != CL_SUCCESS) {
909             snprintf(buf, sizeof(buf),
910                 "clGetDeviceInfo failed to get CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD for device %d",
911                 (int)device_index
912             );
913             warnings.push_back(buf);
914             return ciErrNum;
915         }
916 
917         ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_SIMD_WIDTH_AMD, sizeof(prop.amd_simd_width), &prop.amd_simd_width, NULL);
918         if (ciErrNum != CL_SUCCESS) {
919             snprintf(buf, sizeof(buf),
920                 "clGetDeviceInfo failed to get CL_DEVICE_SIMD_WIDTH_AMD for device %d",
921                 (int)device_index
922             );
923             warnings.push_back(buf);
924             return ciErrNum;
925         }
926 
927         ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, sizeof(prop.amd_simd_instruction_width), &prop.amd_simd_instruction_width, NULL);
928         if (ciErrNum != CL_SUCCESS) {
929             snprintf(buf, sizeof(buf),
930                 "clGetDeviceInfo failed to get CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD for device %d",
931                 (int)device_index
932             );
933             warnings.push_back(buf);
934             return ciErrNum;
935         }
936 
937     }
938 
939     return CL_SUCCESS;
940 }
941 
942 // This is called for ATI GPUs with CAL or NVIDIA GPUs with CUDA, to merge
943 // the OpenCL info into the CAL or CUDA data for the "best" CAL or CUDA GPU.
944 // This assumes that, for each GPU, we have previously correlated its CAL
945 // or CUDA device_num with its opencl_device_index.
946 //
merge_opencl(vector<OPENCL_DEVICE_PROP> & opencls,vector<int> & ignore_dev)947 void COPROC::merge_opencl(
948     vector<OPENCL_DEVICE_PROP> &opencls,
949     vector<int>& ignore_dev
950 ) {
951     unsigned int i, j;
952 
953     for (i=0; i<opencls.size(); i++) {
954         opencls[i].is_used = COPROC_UNUSED;
955 
956         if (in_vector(opencls[i].device_num, ignore_dev)) {
957             opencls[i].is_used = COPROC_IGNORED;
958             continue;
959         }
960         if (device_num == opencls[i].device_num) {
961             opencl_prop = opencls[i];
962             opencl_device_ids[0] = opencls[i].device_id;
963             have_opencl = true;
964             break;
965         }
966     }
967 
968     opencl_device_count = 0;
969 
970     // Fill in info for other GPUs which CAL or CUDA found equivalent to best
971     //
972     for (i=0; i<(unsigned int)count; ++i) {
973         for (j=0; j<opencls.size(); j++) {
974             if (device_nums[i] == opencls[j].device_num) {
975                 opencls[j].is_used = COPROC_USED;
976                 opencl_device_indexes[opencl_device_count] = opencls[j].opencl_device_index;
977                 opencl_device_ids[opencl_device_count++] = opencls[j].device_id;
978                 instance_has_opencl[i] = true;
979             }
980         }
981     }
982 }
983 
984 // This is called for ATI GPUs without CAL or NVIDIA GPUs without CUDA
985 //
find_best_opencls(bool use_all,vector<OPENCL_DEVICE_PROP> & opencls,vector<int> & ignore_dev)986 void COPROC::find_best_opencls(
987     bool use_all,
988     vector<OPENCL_DEVICE_PROP> &opencls,
989     vector<int>& ignore_dev
990 ) {
991     unsigned int i;
992 
993     // identify the most capable ATI, NVIDIA or Intel OpenCL GPU
994     //
995     bool first = true;
996     for (i=0; i<opencls.size(); i++) {
997         if (in_vector(opencls[i].device_num, ignore_dev)) {
998             opencls[i].is_used = COPROC_IGNORED;
999             continue;
1000         }
1001         bool is_best = false;
1002         if (first) {
1003             is_best = true;
1004             first = false;
1005         } else if (opencl_compare(opencls[i], opencl_prop, false) > 0) {
1006             is_best = true;
1007         }
1008         if (is_best) {
1009             // fill in what info we have
1010             opencl_prop = opencls[i];
1011             device_num = opencls[i].device_num;
1012             peak_flops = opencls[i].peak_flops;
1013             have_opencl = true;
1014         }
1015     }
1016 
1017     // see which other instances are equivalent, and set the count,
1018     // device_nums, opencl_device_count and opencl_device_ids fields
1019     //
1020     count = 0;
1021     opencl_device_count = 0;
1022     for (i=0; i<opencls.size(); i++) {
1023         if (in_vector(opencls[i].device_num, ignore_dev)) {
1024             opencls[i].is_used = COPROC_IGNORED;
1025             continue;
1026         }
1027         if (use_all || !opencl_compare(opencls[i], opencl_prop, true)) {
1028             instance_has_opencl[count] = true;
1029             device_nums[count++] = opencls[i].device_num;
1030             opencl_device_indexes[opencl_device_count] = opencls[i].opencl_device_index;
1031             opencl_device_ids[opencl_device_count++] = opencls[i].device_id;
1032             opencls[i].is_used = COPROC_USED;
1033         }
1034     }
1035 }
1036 
fake_opencl_gpu(char * type)1037 void fake_opencl_gpu(char* type) {
1038     OPENCL_DEVICE_PROP op;
1039     op.clear();
1040     strcpy(op.name, type);
1041     strcpy(op.vendor, "ARM");
1042     op.vendor_id = 102760464;
1043     op.available = 1;
1044     op.half_fp_config = 63;
1045     op.single_fp_config = 63;
1046     op.double_fp_config = 63;
1047     op.endian_little = 1;
1048     op.execution_capabilities = 1;
1049     strcpy(op.extensions, "cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics cl_khr_local_int32_base_atomics cl_khr_local_int32_extended_atomics cl_khr_byte_addressable_store cl_khr_3d_image_writes cl_khr_fp64 cl_khr_int64_base_atomics cl_khr_int64_extended_atomics cl_khr_fp16 cl_khr_gl_sharing cl_khr_icd cl_khr_egl_event cl_khr_egl_image cl_khr_image2d_from_buffer cl_arm_core_id cl_arm_printf cl_arm_thread_limit_hint cl_arm_non_uniform_work_group_size cl_arm_import_memory");
1050     op.global_mem_size = 2086998016;
1051     op.local_mem_size = 32768;
1052     op.max_clock_frequency = 600;
1053     op.max_compute_units = 2;
1054     strcpy(op.opencl_platform_version, "OpenCL 1.2 v1.r14p0-01rel0.0fe2d25ca074016740f8ab3fb451b151");
1055     strcpy(op.opencl_device_version,   "OpenCL 1.2 v1.r14p0-01rel0.0fe2d25ca074016740f8ab3fb451b151");
1056     strcpy(op.opencl_driver_version, "1.2");
1057     op.is_used = COPROC_USED;
1058     other_opencls.push_back(op);
1059 }
1060 
1061 #ifdef __APPLE__
1062 // OpenCL returns incorrect total RAM size for some
1063 // ATI GPUs so we get that info from OpenGL on Macs
1064 
1065 #include <OpenGL/OpenGL.h>
1066 #include <OpenGL/gl.h>
1067 #include <OpenGL/glu.h>
1068 #include <Carbon/Carbon.h>
1069 #include <IOKit/graphics/IOGraphicsLib.h>
1070 
1071 static io_service_t IOServicePortFromCGDisplayID(CGDirectDisplayID displayID);
1072 
opencl_get_ati_mem_size_from_opengl(vector<string> & warnings)1073 void COPROCS::opencl_get_ati_mem_size_from_opengl(vector<string>& warnings) {
1074     CGLRendererInfoObj info;
1075     long i, j;
1076     GLint numRenderers = 0, rv = 0, deviceVRAM, rendererID;
1077     cl_ulong deviceMemSize;
1078     CGLError theErr2 = kCGLNoError;
1079     CGLContextObj curr_ctx = CGLGetCurrentContext (); // save current CGL context
1080     int ati_gpu_index = 0;
1081     GLint rendererIDs[32];
1082     CFDataRef modelName[32];
1083     char opencl_name[256], iokit_name[256], buf[256];
1084     char *p;
1085 
1086     if (log_flags.coproc_debug) {
1087 
1088         for (i=0; i<32; ++i) {
1089             rendererIDs[i] = 0;
1090             modelName[i] = NULL;
1091 
1092             CGOpenGLDisplayMask myMask = 1 << i;
1093             CGDirectDisplayID displayID = CGOpenGLDisplayMaskToDisplayID(myMask);
1094             theErr2 = CGLQueryRendererInfo(myMask, &info, &numRenderers);
1095             if ((displayID != kCGNullDirectDisplay) && (theErr2 == kCGLNoError)) {
1096                 // Get the I/O Kit service port for the display
1097 //                io_registry_entry_t dspPort = CGDisplayIOServicePort(displayID);  // Deprecated in OS 10.9
1098                 io_registry_entry_t dspPort = IOServicePortFromCGDisplayID(displayID);
1099 
1100                 for (j = 0; j < numRenderers; j++) {
1101                     // find accelerated renderer (assume only one)
1102                     CGLDescribeRenderer (info, j, kCGLRPAcceleratedCompute, &rv);
1103                     if (true == rv) { // if openCL-capable
1104                         // what is the renderer ID
1105                         CGLDescribeRenderer (info, j, kCGLRPRendererID, &rendererIDs[i]);
1106                         modelName[i] = (CFDataRef)IORegistryEntrySearchCFProperty(
1107                             dspPort,
1108                             kIOServicePlane, CFSTR("model"), kCFAllocatorDefault,
1109                             kIORegistryIterateRecursively | kIORegistryIterateParents
1110                         );
1111                     }
1112                     if (modelName[i] != NULL) break;
1113                 }
1114             }
1115         }
1116     }   // End if (log_flags.coproc_debug) {
1117 
1118     theErr2 = CGLQueryRendererInfo( 0xffffffff, &info, &numRenderers);
1119     if (theErr2 == kCGLNoError) {
1120         CGLDescribeRenderer (info, 0, kCGLRPRendererCount, &numRenderers);
1121         for (i = 0; i < numRenderers; i++) {
1122             if (ati_gpu_index >= (int)ati_opencls.size()) {
1123                 break;
1124             }
1125 
1126             CGLDescribeRenderer (info, i, kCGLRPAcceleratedCompute, &rv);
1127             if (true == rv) { // if openCL-capable
1128                 // what is the renderer ID
1129                 CGLDescribeRenderer (info, i, kCGLRPRendererID, &rendererID);
1130                 // what is the VRAM?
1131                 CGLError notAvail = CGLDescribeRenderer (info, i, kCGLRPVideoMemoryMegabytes, &deviceVRAM);
1132                 if (notAvail == kCGLNoError) {
1133                     deviceMemSize = ((cl_ulong)deviceVRAM) * (1024L*1024L);
1134                 } else {	// kCGLRPVideoMemoryMegabytes is not available before OS 10.7
1135 #pragma clang diagnostic push
1136 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
1137                     // kCGLRPVideoMemory=120 is deprecated in OS 10.7 and may not be
1138                     // defined in later SDKs, so use a literal value here instead
1139                     // CGLDescribeRenderer (info, i, kCGLRPVideoMemory, &deviceVRAM);
1140                     CGLDescribeRenderer (info, i, (CGLRendererProperty)120, &deviceVRAM);
1141                     deviceMemSize = deviceVRAM;
1142 #pragma clang diagnostic pop
1143                 }
1144 
1145                 // build context and context specific info
1146                 CGLPixelFormatAttribute attribs[] = {
1147                     kCGLPFARendererID,
1148                     (CGLPixelFormatAttribute)rendererID,
1149                     kCGLPFAAllowOfflineRenderers,
1150                     (CGLPixelFormatAttribute)0
1151                 };
1152                 CGLPixelFormatObj pixelFormat = NULL;
1153                 GLint numPixelFormats = 0;
1154                 CGLContextObj cglContext;
1155 
1156                 CGLChoosePixelFormat (attribs, &pixelFormat, &numPixelFormats);
1157                 if (pixelFormat) {
1158                     CGLCreateContext(pixelFormat, NULL, &cglContext);
1159                     CGLDestroyPixelFormat (pixelFormat);
1160                     CGLSetCurrentContext (cglContext);
1161                     if (cglContext) {
1162                        // get vendor string from renderer
1163                         const GLubyte * strVend = glGetString (GL_VENDOR);
1164                         if (is_AMD((char *)strVend)) {
1165                             ati_opencls[ati_gpu_index].global_mem_size = deviceMemSize;
1166                             ati_opencls[ati_gpu_index].opencl_available_ram = deviceMemSize;
1167 
1168                             if (log_flags.coproc_debug) {
1169                                 // For some GPUs, one API returns "ATI" but the other API returns
1170                                 // "AMD" in the model name, so we normalize both to "AMD"
1171                                 strlcpy(opencl_name, ati_opencls[ati_gpu_index].name, sizeof(opencl_name));
1172                                 if ((p = strstr(opencl_name, "ATI")) != NULL) {
1173                                     *++p='M';
1174                                     *++p='D';
1175                                 }
1176 
1177                                 for (j=0; j<32; j++) {
1178                                     if ((rendererID == rendererIDs[j]) && (modelName[j] != NULL)) {
1179                                         break;
1180                                     }
1181                                 }
1182                                 if (j < 32) {
1183                                     strlcpy(iokit_name, (char *)CFDataGetBytePtr(modelName[j]), sizeof(iokit_name));
1184                                     if ((p = strstr(iokit_name, "ATI")) != NULL) {
1185                                         *++p='M';
1186                                         *++p='D';
1187                                     }
1188                                     if (strcmp(iokit_name, opencl_name)) {
1189                                         snprintf(buf, sizeof(buf),
1190                                             "opencl_get_ati_mem_size_from_opengl model name mismatch: %s vs %s\n",
1191                                             ati_opencls[ati_gpu_index].name, (char *)CFDataGetBytePtr(modelName[j])
1192                                         );
1193                                         warnings.push_back(buf);
1194                                     }
1195                                 } else {
1196                                     // Could not get model name from IOKit, so use renderer name
1197                                     const GLubyte * strRend = glGetString (GL_RENDERER);
1198                                     if (strRend != NULL) {
1199                                         strlcpy(iokit_name, (char *)strRend, sizeof(iokit_name));
1200                                         if ((p = strstr(iokit_name, "ATI")) != NULL) {
1201                                             *++p='M';
1202                                             *++p='D';
1203                                         }
1204                                     }
1205 
1206                                     if ((strRend == NULL) ||
1207                                         (!strstr(iokit_name, opencl_name))) {
1208                                             snprintf(buf, sizeof(buf),
1209                                             "opencl_get_ati_mem_size_from_opengl model name to renderer mismatch: %s vs %s\n",
1210                                             strRend, ati_opencls[ati_gpu_index].name
1211                                         );
1212                                         warnings.push_back(buf);
1213                                     }
1214                                 }
1215                             }   // End if (log_flags.coproc_debug) {
1216 
1217                             ati_gpu_index++;
1218                         } // End if ATI / AMD GPU
1219 
1220                         CGLDestroyContext (cglContext);
1221                     } else {
1222                         warnings.push_back(
1223                             "opencl_get_ati_mem_size_from_opengl failed to create context\n"
1224                         );
1225                     }
1226                 } else {
1227                     warnings.push_back(
1228                         "opencl_get_ati_mem_size_from_opengl failed to create PixelFormat\n"
1229                     );
1230                 }
1231             }       // End if kCGLRPAcceleratedCompute attribute
1232         }   // End loop: for (i = 0; i < numRenderers; i++)
1233         CGLDestroyRendererInfo (info);
1234     }
1235 
1236     if (log_flags.coproc_debug) {
1237         for (j=0; j<32; j++) {
1238             if (modelName[j] != NULL) {
1239                 CFRelease(modelName[j]);
1240             }
1241         }
1242     }
1243     CGLSetCurrentContext (curr_ctx); // restore current CGL context
1244 }
1245 
1246 
1247 
1248 // The following replaces CGDisplayIOServicePort which is deprecated in OS 10.9
1249 //
1250 //========================================================================
1251 // GLFW 3.1 OS X - www.glfw.org
1252 //------------------------------------------------------------------------
1253 // Copyright (c) 2002-2006 Marcus Geelnard
1254 // Copyright (c) 2006-2010 Camilla Berglund <elmindreda@elmindreda.org>
1255 //
1256 // This software is provided 'as-is', without any express or implied
1257 // warranty. In no event will the authors be held liable for any damages
1258 // arising from the use of this software.
1259 //
1260 // Permission is granted to anyone to use this software for any purpose,
1261 // including commercial applications, and to alter it and redistribute it
1262 // freely, subject to the following restrictions:
1263 //
1264 // 1. The origin of this software must not be misrepresented; you must not
1265 //    claim that you wrote the original software. If you use this software
1266 //    in a product, an acknowledgment in the product documentation would
1267 //    be appreciated but is not required.
1268 //
1269 // 2. Altered source versions must be plainly marked as such, and must not
1270 //    be misrepresented as being the original software.
1271 //
1272 // 3. This notice may not be removed or altered from any source
1273 //    distribution.
1274 //
1275 //========================================================================
1276 
1277 // Returns the io_service_t corresponding to a CG display ID, or 0 on failure.
1278 // The io_service_t should be released with IOObjectRelease when not needed.
1279 //
1280 
IOServicePortFromCGDisplayID(CGDirectDisplayID displayID)1281 static io_service_t IOServicePortFromCGDisplayID(CGDirectDisplayID displayID)
1282 {
1283     io_iterator_t iter;
1284     io_service_t serv, servicePort = 0;
1285 
1286     CFMutableDictionaryRef matching = IOServiceMatching("IODisplayConnect");
1287 
1288     // releases matching for us
1289     kern_return_t err = IOServiceGetMatchingServices(kIOMasterPortDefault,
1290                                                      matching,
1291                                                      &iter);
1292     if (err)
1293         return 0;
1294 
1295     while ((serv = IOIteratorNext(iter)) != 0)
1296     {
1297         CFDictionaryRef info;
1298         CFIndex vendorID, productID, serialNumber;
1299         CFNumberRef vendorIDRef, productIDRef, serialNumberRef;
1300         Boolean success;
1301 
1302         info = IODisplayCreateInfoDictionary(serv,
1303                                              kIODisplayOnlyPreferredName);
1304 
1305         vendorIDRef = (CFNumberRef)CFDictionaryGetValue(info,
1306                                            CFSTR(kDisplayVendorID));
1307         productIDRef = (CFNumberRef)CFDictionaryGetValue(info,
1308                                             CFSTR(kDisplayProductID));
1309         serialNumberRef = (CFNumberRef)CFDictionaryGetValue(info,
1310                                                CFSTR(kDisplaySerialNumber));
1311 
1312         success = CFNumberGetValue(vendorIDRef, kCFNumberCFIndexType,
1313                                    &vendorID);
1314         success &= CFNumberGetValue(productIDRef, kCFNumberCFIndexType,
1315                                     &productID);
1316         success &= CFNumberGetValue(serialNumberRef, kCFNumberCFIndexType,
1317                                     &serialNumber);
1318 
1319         if (!success)
1320         {
1321             CFRelease(info);
1322             continue;
1323         }
1324         // If the vendor and product id along with the serial don't match
1325         // then we are not looking at the correct monitor.
1326         // NOTE: The serial number is important in cases where two monitors
1327         //       are the exact same.
1328         if (CGDisplayVendorNumber(displayID) != vendorID  ||
1329             CGDisplayModelNumber(displayID) != productID  ||
1330             CGDisplaySerialNumber(displayID) != serialNumber)
1331         {
1332             CFRelease(info);
1333             continue;
1334         }
1335 
1336         // The VendorID, Product ID, and the Serial Number all Match Up!
1337         // Therefore we have found the appropriate display io_service
1338         servicePort = serv;
1339         CFRelease(info);
1340         break;
1341     }
1342 
1343     IOObjectRelease(iter);
1344     return servicePort;
1345 }
1346 #endif// __APPLE__
1347