1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  */
9 
10 #include <config_folders.h>
11 
12 #include <opencl_device.hxx>
13 #include <opencl_device_selection.h>
14 
15 #include <opencl/openclconfig.hxx>
16 #include <opencl/openclwrapper.hxx>
17 #include <opencl/platforminfo.hxx>
18 #include <osl/file.hxx>
19 #include <rtl/bootstrap.hxx>
20 #include <rtl/digest.h>
21 #include <rtl/strbuf.hxx>
22 #include <rtl/ustring.hxx>
23 #include <sal/config.h>
24 #include <sal/log.hxx>
25 #include <opencl/OpenCLZone.hxx>
26 
27 #include <memory>
28 
29 #include <stdlib.h>
30 
31 #include <officecfg/Office/Common.hxx>
32 
33 #ifdef _WIN32
34 #include <prewin.h>
35 #include <postwin.h>
36 #define OPENCL_DLL_NAME "OpenCL.dll"
37 #elif defined(MACOSX)
38 #define OPENCL_DLL_NAME nullptr
39 #else
40 #define OPENCL_DLL_NAME "libOpenCL.so.1"
41 #endif
42 
43 #ifdef _WIN32_WINNT_WINBLUE
44 #include <VersionHelpers.h>
45 #endif
46 
47 #define DEVICE_NAME_LENGTH 1024
48 #define DRIVER_VERSION_LENGTH 1024
49 #define PLATFORM_VERSION_LENGTH 1024
50 
51 #define CHECK_OPENCL(status,name) \
52 if( status != CL_SUCCESS )  \
53 { \
54     SAL_WARN( "opencl", "OpenCL error code " << status << " at " SAL_DETAIL_WHERE "from " name ); \
55     return false; \
56 }
57 
58 using namespace std;
59 
60 namespace {
61 
62 bool bIsInited = false;
63 
64 }
65 
66 namespace openclwrapper {
67 
68 GPUEnv gpuEnv;
69 sal_uInt64 kernelFailures = 0;
70 
71 namespace
72 {
73 
generateMD5(const void * pData,size_t length)74 OString generateMD5(const void* pData, size_t length)
75 {
76     sal_uInt8 pBuffer[RTL_DIGEST_LENGTH_MD5];
77     rtlDigestError aError = rtl_digest_MD5(pData, length,
78             pBuffer, RTL_DIGEST_LENGTH_MD5);
79     SAL_WARN_IF(aError != rtl_Digest_E_None, "opencl", "md5 generation failed");
80 
81     OStringBuffer aBuffer;
82     const char* const pString = "0123456789ABCDEF";
83     for(sal_uInt8 val : pBuffer)
84     {
85         aBuffer.append(pString[val/16]);
86         aBuffer.append(pString[val%16]);
87     }
88     return aBuffer.makeStringAndClear();
89 }
90 
getCacheFolder()91 OString const & getCacheFolder()
92 {
93     static OString const aCacheFolder = [&]()
94     {
95         OUString url("${$BRAND_BASE_DIR/" LIBO_ETC_FOLDER "/" SAL_CONFIGFILE("bootstrap") ":UserInstallation}/cache/");
96         rtl::Bootstrap::expandMacros(url);
97 
98         osl::Directory::create(url);
99 
100         return OUStringToOString(url, RTL_TEXTENCODING_UTF8);
101     }();
102     return aCacheFolder;
103 }
104 
105 }
106 
initializeCommandQueue(GPUEnv & aGpuEnv)107 static bool initializeCommandQueue(GPUEnv& aGpuEnv)
108 {
109     OpenCLZone zone;
110 
111     cl_int nState;
112     cl_command_queue command_queue[OPENCL_CMDQUEUE_SIZE];
113 
114     for (int i = 0; i < OPENCL_CMDQUEUE_SIZE; ++i)
115     {
116         command_queue[i] = clCreateCommandQueue(aGpuEnv.mpContext, aGpuEnv.mpDevID, 0, &nState);
117         if (nState != CL_SUCCESS)
118             SAL_WARN("opencl", "clCreateCommandQueue failed: " << errorString(nState));
119 
120         if (command_queue[i] == nullptr || nState != CL_SUCCESS)
121         {
122             // Release all command queues created so far.
123             for (int j = 0; j <= i; ++j)
124             {
125                 if (command_queue[j])
126                 {
127                     clReleaseCommandQueue(command_queue[j]);
128                     command_queue[j] = nullptr;
129                 }
130             }
131 
132             clReleaseContext(aGpuEnv.mpContext);
133             SAL_WARN("opencl", "failed to set/switch opencl device");
134             return false;
135         }
136 
137         SAL_INFO("opencl", "Created command queue " << command_queue[i] << " for context " << aGpuEnv.mpContext);
138     }
139 
140     for (int i = 0; i < OPENCL_CMDQUEUE_SIZE; ++i)
141     {
142         aGpuEnv.mpCmdQueue[i] = command_queue[i];
143     }
144     aGpuEnv.mbCommandQueueInitialized = true;
145     return true;
146 }
147 
setKernelEnv(KernelEnv * envInfo)148 void setKernelEnv( KernelEnv *envInfo )
149 {
150     if (!gpuEnv.mbCommandQueueInitialized)
151     {
152         initializeCommandQueue(gpuEnv);
153     }
154 
155     envInfo->mpkContext = gpuEnv.mpContext;
156     envInfo->mpkProgram = gpuEnv.mpArryPrograms[0];
157 
158     assert(gpuEnv.mnCmdQueuePos < OPENCL_CMDQUEUE_SIZE);
159     envInfo->mpkCmdQueue = gpuEnv.mpCmdQueue[gpuEnv.mnCmdQueuePos];
160 }
161 
162 namespace {
163 
createFileName(cl_device_id deviceId,const char * clFileName)164 OString createFileName(cl_device_id deviceId, const char* clFileName)
165 {
166     OString fileName(clFileName);
167     sal_Int32 nIndex = fileName.lastIndexOf(".cl");
168     if(nIndex > 0)
169         fileName = fileName.copy(0, nIndex);
170 
171     char deviceName[DEVICE_NAME_LENGTH] = {0};
172     clGetDeviceInfo(deviceId, CL_DEVICE_NAME,
173             sizeof(deviceName), deviceName, nullptr);
174 
175     char driverVersion[DRIVER_VERSION_LENGTH] = {0};
176     clGetDeviceInfo(deviceId, CL_DRIVER_VERSION,
177             sizeof(driverVersion), driverVersion, nullptr);
178 
179     cl_platform_id platformId;
180     clGetDeviceInfo(deviceId, CL_DEVICE_PLATFORM,
181             sizeof(platformId), &platformId, nullptr);
182 
183     char platformVersion[PLATFORM_VERSION_LENGTH] = {0};
184     clGetPlatformInfo(platformId, CL_PLATFORM_VERSION, sizeof(platformVersion),
185             platformVersion, nullptr);
186 
187     // create hash for deviceName + driver version + platform version
188     OString aString = rtl::OStringView(deviceName) + driverVersion + platformVersion;
189     OString aHash = generateMD5(aString.getStr(), aString.getLength());
190 
191     return getCacheFolder() + fileName + "-" + aHash + ".bin";
192 }
193 
binaryGenerated(const char * clFileName,cl_context context)194 std::vector<std::shared_ptr<osl::File> > binaryGenerated( const char * clFileName, cl_context context )
195 {
196     size_t numDevices=0;
197 
198     std::vector<std::shared_ptr<osl::File> > aGeneratedFiles;
199     cl_int clStatus = clGetContextInfo( context, CL_CONTEXT_DEVICES,
200             0, nullptr, &numDevices );
201     numDevices /= sizeof(numDevices);
202 
203     if(clStatus != CL_SUCCESS)
204         return aGeneratedFiles;
205 
206     assert(numDevices == 1);
207 
208     // grab the handle to the device in the context.
209     cl_device_id pDevID;
210     clStatus = clGetContextInfo( context, CL_CONTEXT_DEVICES,
211             sizeof( cl_device_id ), &pDevID, nullptr );
212 
213     if(clStatus != CL_SUCCESS)
214         return aGeneratedFiles;
215 
216     assert(pDevID == gpuEnv.mpDevID);
217 
218     OString fileName = createFileName(gpuEnv.mpDevID, clFileName);
219     auto pNewFile = std::make_shared<osl::File>(OStringToOUString(fileName, RTL_TEXTENCODING_UTF8));
220     if(pNewFile->open(osl_File_OpenFlag_Read) == osl::FileBase::E_None)
221     {
222         aGeneratedFiles.push_back(pNewFile);
223         SAL_INFO("opencl.file", "Opening binary file '" << fileName << "' for reading: success");
224     }
225     else
226     {
227         SAL_INFO("opencl.file", "Opening binary file '" << fileName << "' for reading: FAIL");
228     }
229 
230     return aGeneratedFiles;
231 }
232 
writeBinaryToFile(const OString & rFileName,const char * binary,size_t numBytes)233 bool writeBinaryToFile( const OString& rFileName, const char* binary, size_t numBytes )
234 {
235     osl::File file(OStringToOUString(rFileName, RTL_TEXTENCODING_UTF8));
236     osl::FileBase::RC status = file.open(
237             osl_File_OpenFlag_Write | osl_File_OpenFlag_Create );
238 
239     if(status != osl::FileBase::E_None)
240         return false;
241 
242     sal_uInt64 nBytesWritten = 0;
243     file.write( binary, numBytes, nBytesWritten );
244 
245     assert(numBytes == nBytesWritten);
246 
247     return true;
248 }
249 
250 }
251 
generatBinFromKernelSource(cl_program program,const char * clFileName)252 bool generatBinFromKernelSource( cl_program program, const char * clFileName )
253 {
254     cl_uint numDevices;
255 
256     cl_int clStatus = clGetProgramInfo( program, CL_PROGRAM_NUM_DEVICES,
257                    sizeof(numDevices), &numDevices, nullptr );
258     CHECK_OPENCL( clStatus, "clGetProgramInfo" );
259 
260     assert(numDevices == 1);
261 
262     cl_device_id pDevID;
263     /* grab the handle to the device in the program. */
264     clStatus = clGetProgramInfo( program, CL_PROGRAM_DEVICES,
265                    sizeof(cl_device_id), &pDevID, nullptr );
266     CHECK_OPENCL( clStatus, "clGetProgramInfo" );
267 
268     /* figure out the size of the binary. */
269     size_t binarySize;
270 
271     clStatus = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES,
272                    sizeof(size_t), &binarySize, nullptr );
273     CHECK_OPENCL( clStatus, "clGetProgramInfo" );
274 
275     /* copy over the generated binary. */
276     if ( binarySize != 0 )
277     {
278         std::unique_ptr<char[]> binary(new char[binarySize]);
279         clStatus = clGetProgramInfo( program, CL_PROGRAM_BINARIES,
280                                      sizeof(char *), &binary, nullptr );
281         CHECK_OPENCL(clStatus,"clGetProgramInfo");
282 
283         OString fileName = createFileName(pDevID, clFileName);
284         if ( !writeBinaryToFile( fileName,
285                                  binary.get(), binarySize ) )
286             SAL_INFO("opencl.file", "Writing binary file '" << fileName << "': FAIL");
287         else
288             SAL_INFO("opencl.file", "Writing binary file '" << fileName << "': success");
289     }
290     return true;
291 }
292 
293 namespace {
294 
295 struct OpenCLEnv
296 {
297     cl_platform_id mpOclPlatformID;
298     cl_context mpOclContext;
299     cl_device_id mpOclDevsID;
300     cl_command_queue mpOclCmdQueue[OPENCL_CMDQUEUE_SIZE];
301 };
302 
initOpenCLAttr(OpenCLEnv * env)303 bool initOpenCLAttr( OpenCLEnv * env )
304 {
305     if ( gpuEnv.mnIsUserCreated )
306         return true;
307 
308     gpuEnv.mpContext = env->mpOclContext;
309     gpuEnv.mpPlatformID = env->mpOclPlatformID;
310     gpuEnv.mpDevID = env->mpOclDevsID;
311 
312     gpuEnv.mnIsUserCreated = 1;
313 
314     gpuEnv.mbCommandQueueInitialized = false;
315 
316     gpuEnv.mnCmdQueuePos = 0; // default to 0.
317 
318     return false;
319 }
320 
buildProgram(const char * buildOption,GPUEnv * gpuInfo,int idx)321 bool buildProgram(const char* buildOption, GPUEnv* gpuInfo, int idx)
322 {
323     cl_int clStatus;
324     //char options[512];
325     // create a cl program executable for all the devices specified
326     clStatus = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, &gpuInfo->mpDevID,
327                               buildOption, nullptr, nullptr);
328 
329     if ( clStatus != CL_SUCCESS )
330     {
331         size_t length;
332         clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID,
333                                           CL_PROGRAM_BUILD_LOG, 0, nullptr, &length);
334         if ( clStatus != CL_SUCCESS )
335         {
336             return false;
337         }
338 
339         std::unique_ptr<char[]> buildLog(new char[length]);
340         clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID,
341                                           CL_PROGRAM_BUILD_LOG, length, buildLog.get(), &length );
342         if ( clStatus != CL_SUCCESS )
343         {
344             return false;
345         }
346 
347         OString aBuildLogFileURL = getCacheFolder() + "kernel-build.log";
348         osl::File aBuildLogFile(OStringToOUString(aBuildLogFileURL, RTL_TEXTENCODING_UTF8));
349         osl::FileBase::RC status = aBuildLogFile.open(
350                 osl_File_OpenFlag_Write | osl_File_OpenFlag_Create );
351 
352         if(status != osl::FileBase::E_None)
353             return false;
354 
355         sal_uInt64 nBytesWritten = 0;
356         aBuildLogFile.write( buildLog.get(), length, nBytesWritten );
357 
358         return false;
359     }
360 
361     return true;
362 }
363 
364 }
365 
buildProgramFromBinary(const char * buildOption,GPUEnv * gpuInfo,const char * filename,int idx)366 bool buildProgramFromBinary(const char* buildOption, GPUEnv* gpuInfo, const char* filename, int idx)
367 {
368     size_t numDevices;
369     cl_int clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES,
370             0, nullptr, &numDevices );
371     numDevices /= sizeof(numDevices);
372     CHECK_OPENCL( clStatus, "clGetContextInfo" );
373 
374     std::vector<std::shared_ptr<osl::File> > aGeneratedFiles = binaryGenerated(
375             filename, gpuInfo->mpContext );
376 
377     if (aGeneratedFiles.size() == numDevices)
378     {
379         std::unique_ptr<size_t[]> length(new size_t[numDevices]);
380         std::unique_ptr<unsigned char*[]> pBinary(new unsigned char*[numDevices]);
381         for(size_t i = 0; i < numDevices; ++i)
382         {
383             sal_uInt64 nSize;
384             aGeneratedFiles[i]->getSize(nSize);
385             unsigned char* binary = new unsigned char[nSize];
386             sal_uInt64 nBytesRead;
387             aGeneratedFiles[i]->read(binary, nSize, nBytesRead);
388             if(nSize != nBytesRead)
389                 assert(false);
390 
391             length[i] = nBytesRead;
392 
393             pBinary[i] = binary;
394         }
395 
396         // grab the handles to all of the devices in the context.
397         std::unique_ptr<cl_device_id[]> pArryDevsID(new cl_device_id[numDevices]);
398         clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES,
399                        sizeof( cl_device_id ) * numDevices, pArryDevsID.get(), nullptr );
400 
401         if(clStatus != CL_SUCCESS)
402         {
403             for(size_t i = 0; i < numDevices; ++i)
404             {
405                 delete[] pBinary[i];
406             }
407             return false;
408         }
409 
410         cl_int binary_status;
411 
412         gpuInfo->mpArryPrograms[idx] = clCreateProgramWithBinary( gpuInfo->mpContext,numDevices,
413                                            pArryDevsID.get(), length.get(), const_cast<const unsigned char**>(pBinary.get()),
414                                            &binary_status, &clStatus );
415         if(clStatus != CL_SUCCESS)
416         {
417             // something went wrong, fall back to compiling from source
418             return false;
419         }
420         SAL_INFO("opencl", "Created program " << gpuInfo->mpArryPrograms[idx] << " from binary");
421         for(size_t i = 0; i < numDevices; ++i)
422         {
423             delete[] pBinary[i];
424         }
425     }
426 
427     if ( !gpuInfo->mpArryPrograms[idx] )
428     {
429         return false;
430     }
431     return buildProgram(buildOption, gpuInfo, idx);
432 }
433 
434 namespace {
435 
checkDeviceForDoubleSupport(cl_device_id deviceId,bool & bKhrFp64,bool & bAmdFp64)436 void checkDeviceForDoubleSupport(cl_device_id deviceId, bool& bKhrFp64, bool& bAmdFp64)
437 {
438     OpenCLZone zone;
439 
440     bKhrFp64 = false;
441     bAmdFp64 = false;
442 
443     // Check device extensions for double type
444     size_t aDevExtInfoSize = 0;
445 
446     cl_uint clStatus = clGetDeviceInfo( deviceId, CL_DEVICE_EXTENSIONS, 0, nullptr, &aDevExtInfoSize );
447     if( clStatus != CL_SUCCESS )
448         return;
449 
450     std::unique_ptr<char[]> pExtInfo(new char[aDevExtInfoSize]);
451 
452     clStatus = clGetDeviceInfo( deviceId, CL_DEVICE_EXTENSIONS,
453                    sizeof(char) * aDevExtInfoSize, pExtInfo.get(), nullptr);
454 
455     if( clStatus != CL_SUCCESS )
456         return;
457 
458     if ( strstr( pExtInfo.get(), "cl_khr_fp64" ) )
459     {
460         bKhrFp64 = true;
461     }
462     else
463     {
464         // Check if cl_amd_fp64 extension is supported
465         if ( strstr( pExtInfo.get(), "cl_amd_fp64" ) )
466             bAmdFp64 = true;
467     }
468 }
469 
initOpenCLRunEnv(GPUEnv * gpuInfo)470 bool initOpenCLRunEnv( GPUEnv *gpuInfo )
471 {
472     OpenCLZone zone;
473     cl_uint nPreferredVectorWidthFloat;
474     char pName[64];
475 
476     bool bKhrFp64 = false;
477     bool bAmdFp64 = false;
478 
479     checkDeviceForDoubleSupport(gpuInfo->mpDevID, bKhrFp64, bAmdFp64);
480 
481     gpuInfo->mnKhrFp64Flag = bKhrFp64;
482     gpuInfo->mnAmdFp64Flag = bAmdFp64;
483 
484     gpuInfo->mbNeedsTDRAvoidance = false;
485 
486     clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, sizeof(cl_uint),
487                     &nPreferredVectorWidthFloat, nullptr);
488     SAL_INFO("opencl", "CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT=" << nPreferredVectorWidthFloat);
489 
490     clGetPlatformInfo(gpuInfo->mpPlatformID, CL_PLATFORM_NAME, 64,
491              pName, nullptr);
492 
493 #if defined (_WIN32)
494 // the Win32 SDK 8.1 deprecates GetVersionEx()
495 # ifdef _WIN32_WINNT_WINBLUE
496     const bool bIsNotWinOrIsWin8OrGreater = IsWindows8OrGreater();
497 # else
498     bool bIsNotWinOrIsWin8OrGreater = true;
499     OSVERSIONINFOW aVersionInfo = {};
500     aVersionInfo.dwOSVersionInfoSize = sizeof( aVersionInfo );
501     if (GetVersionExW( &aVersionInfo ))
502     {
503         // Windows 7 or lower?
504         if (aVersionInfo.dwMajorVersion < 6 ||
505            (aVersionInfo.dwMajorVersion == 6 && aVersionInfo.dwMinorVersion < 2))
506             bIsNotWinOrIsWin8OrGreater = false;
507     }
508 # endif
509 #else
510     const bool bIsNotWinOrIsWin8OrGreater = true;
511 #endif
512 
513     // Heuristic: Certain old low-end OpenCL implementations don't
514     // work for us with too large group lengths. Looking at the preferred
515     // float vector width seems to be a way to detect these devices, except
516     // the non-working NVIDIA cards on Windows older than version 8.
517     gpuInfo->mbNeedsTDRAvoidance = ( nPreferredVectorWidthFloat == 4 ) ||
518         ( !bIsNotWinOrIsWin8OrGreater &&
519           OUString::createFromAscii(pName).indexOf("NVIDIA") > -1 );
520 
521     size_t nMaxParameterSize;
522     clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof(size_t),
523                     &nMaxParameterSize, nullptr);
524     SAL_INFO("opencl", "CL_DEVICE_MAX_PARAMETER_SIZE=" << nMaxParameterSize);
525 
526     return false;
527 }
528 
initOpenCLRunEnv(int argc)529 bool initOpenCLRunEnv( int argc )
530 {
531     if ( ( argc > MAX_CLFILE_NUM ) || ( argc < 0 ) )
532         return true;
533 
534     if ( !bIsInited )
535     {
536         if ( !gpuEnv.mnIsUserCreated )
537             memset( &gpuEnv, 0, sizeof(gpuEnv) );
538 
539         //initialize devices, context, command_queue
540         bool status = initOpenCLRunEnv( &gpuEnv );
541         if ( status )
542         {
543             return true;
544         }
545         //initialize program, kernelName, kernelCount
546         if( getenv( "SC_FLOAT" ) )
547         {
548             gpuEnv.mnKhrFp64Flag = false;
549             gpuEnv.mnAmdFp64Flag = false;
550         }
551         if( gpuEnv.mnKhrFp64Flag )
552         {
553             SAL_INFO("opencl", "Use Khr double");
554         }
555         else if( gpuEnv.mnAmdFp64Flag )
556         {
557             SAL_INFO("opencl", "Use AMD double type");
558         }
559         else
560         {
561             SAL_INFO("opencl", "USE float type");
562         }
563         bIsInited = true;
564     }
565     return false;
566 }
567 
568 // based on crashes and hanging during kernel compilation
createDeviceInfo(cl_device_id aDeviceId,OpenCLPlatformInfo & rPlatformInfo)569 void createDeviceInfo(cl_device_id aDeviceId, OpenCLPlatformInfo& rPlatformInfo)
570 {
571     OpenCLDeviceInfo aDeviceInfo;
572     aDeviceInfo.device = aDeviceId;
573 
574     char pName[DEVICE_NAME_LENGTH];
575     cl_int nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_NAME, DEVICE_NAME_LENGTH, pName, nullptr);
576     if(nState != CL_SUCCESS)
577         return;
578 
579     aDeviceInfo.maName = OUString::createFromAscii(pName);
580 
581     char pVendor[DEVICE_NAME_LENGTH];
582     nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_VENDOR, DEVICE_NAME_LENGTH, pVendor, nullptr);
583     if(nState != CL_SUCCESS)
584         return;
585 
586     aDeviceInfo.maVendor = OUString::createFromAscii(pVendor);
587 
588     cl_ulong nMemSize;
589     nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(nMemSize), &nMemSize, nullptr);
590     if(nState != CL_SUCCESS)
591         return;
592 
593     aDeviceInfo.mnMemory = nMemSize;
594 
595     cl_uint nClockFrequency;
596     nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(nClockFrequency), &nClockFrequency, nullptr);
597     if(nState != CL_SUCCESS)
598         return;
599 
600     aDeviceInfo.mnFrequency = nClockFrequency;
601 
602     cl_uint nComputeUnits;
603     nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(nComputeUnits), &nComputeUnits, nullptr);
604     if(nState != CL_SUCCESS)
605         return;
606 
607     char pDriver[DEVICE_NAME_LENGTH];
608     nState = clGetDeviceInfo(aDeviceId, CL_DRIVER_VERSION, DEVICE_NAME_LENGTH, pDriver, nullptr);
609 
610     if(nState != CL_SUCCESS)
611         return;
612 
613     aDeviceInfo.maDriver = OUString::createFromAscii(pDriver);
614 
615     bool bKhrFp64 = false;
616     bool bAmdFp64 = false;
617     checkDeviceForDoubleSupport(aDeviceId, bKhrFp64, bAmdFp64);
618 
619     // only list devices that support double
620     if(!bKhrFp64 && !bAmdFp64)
621         return;
622 
623     aDeviceInfo.mnComputeUnits = nComputeUnits;
624 
625     if(!OpenCLConfig::get().checkImplementation(rPlatformInfo, aDeviceInfo))
626         rPlatformInfo.maDevices.push_back(aDeviceInfo);
627 }
628 
createPlatformInfo(cl_platform_id nPlatformId,OpenCLPlatformInfo & rPlatformInfo)629 bool createPlatformInfo(cl_platform_id nPlatformId, OpenCLPlatformInfo& rPlatformInfo)
630 {
631     rPlatformInfo.platform = nPlatformId;
632     char pName[64];
633     cl_int nState = clGetPlatformInfo(nPlatformId, CL_PLATFORM_NAME, 64,
634              pName, nullptr);
635     if(nState != CL_SUCCESS)
636         return false;
637     rPlatformInfo.maName = OUString::createFromAscii(pName);
638 
639     char pVendor[64];
640     nState = clGetPlatformInfo(nPlatformId, CL_PLATFORM_VENDOR, 64,
641              pVendor, nullptr);
642     if(nState != CL_SUCCESS)
643         return false;
644 
645     rPlatformInfo.maVendor = OUString::createFromAscii(pVendor);
646 
647     cl_uint nDevices;
648     nState = clGetDeviceIDs(nPlatformId, CL_DEVICE_TYPE_ALL, 0, nullptr, &nDevices);
649     if(nState != CL_SUCCESS)
650         return false;
651 
652     // memory leak that does not matter
653     // memory is stored in static variable that lives through the whole program
654     cl_device_id* pDevices = new cl_device_id[nDevices];
655     nState = clGetDeviceIDs(nPlatformId, CL_DEVICE_TYPE_ALL, nDevices, pDevices, nullptr);
656     if(nState != CL_SUCCESS)
657         return false;
658 
659     for(size_t i = 0; i < nDevices; ++i)
660     {
661         createDeviceInfo(pDevices[i], rPlatformInfo);
662     }
663 
664     return true;
665 }
666 
667 }
668 
fillOpenCLInfo()669 const std::vector<OpenCLPlatformInfo>& fillOpenCLInfo()
670 {
671     static std::vector<OpenCLPlatformInfo> aPlatforms;
672 
673     // return early if we already initialized or can't use OpenCL
674     if (!aPlatforms.empty() || !canUseOpenCL())
675         return aPlatforms;
676 
677     int status = clewInit(OPENCL_DLL_NAME);
678     if (status < 0)
679         return aPlatforms;
680 
681     cl_uint nPlatforms;
682     cl_int nState = clGetPlatformIDs(0, nullptr, &nPlatforms);
683 
684     if(nState != CL_SUCCESS)
685         return aPlatforms;
686 
687     // memory leak that does not matter,
688     // memory is stored in static instance aPlatforms
689     cl_platform_id* pPlatforms = new cl_platform_id[nPlatforms];
690     nState = clGetPlatformIDs(nPlatforms, pPlatforms, nullptr);
691 
692     if(nState != CL_SUCCESS)
693         return aPlatforms;
694 
695     for(size_t i = 0; i < nPlatforms; ++i)
696     {
697         OpenCLPlatformInfo aPlatformInfo;
698         if(createPlatformInfo(pPlatforms[i], aPlatformInfo))
699             aPlatforms.push_back(aPlatformInfo);
700     }
701 
702     return aPlatforms;
703 }
704 
705 namespace {
706 
findDeviceIdByDeviceString(const OUString & rString,const std::vector<OpenCLPlatformInfo> & rPlatforms)707 cl_device_id findDeviceIdByDeviceString(const OUString& rString, const std::vector<OpenCLPlatformInfo>& rPlatforms)
708 {
709     for (const OpenCLPlatformInfo& rPlatform : rPlatforms)
710     {
711         for (const OpenCLDeviceInfo& rDeviceInfo : rPlatform.maDevices)
712         {
713             OUString aDeviceId = rDeviceInfo.maVendor + " " + rDeviceInfo.maName;
714             if (rString == aDeviceId)
715             {
716                 return rDeviceInfo.device;
717             }
718         }
719     }
720 
721     return nullptr;
722 }
723 
findDeviceInfoFromDeviceId(cl_device_id aDeviceId,size_t & rDeviceId,size_t & rPlatformId)724 void findDeviceInfoFromDeviceId(cl_device_id aDeviceId, size_t& rDeviceId, size_t& rPlatformId)
725 {
726     cl_platform_id platformId;
727     cl_int nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_PLATFORM,
728             sizeof(platformId), &platformId, nullptr);
729 
730     if(nState != CL_SUCCESS)
731         return;
732 
733     const std::vector<OpenCLPlatformInfo>& rPlatforms = fillOpenCLInfo();
734     for(size_t i = 0; i < rPlatforms.size(); ++i)
735     {
736         cl_platform_id platId = rPlatforms[i].platform;
737         if(platId != platformId)
738             continue;
739 
740         for(size_t j = 0; j < rPlatforms[i].maDevices.size(); ++j)
741         {
742             cl_device_id id = rPlatforms[i].maDevices[j].device;
743             if(id == aDeviceId)
744             {
745                 rDeviceId = j;
746                 rPlatformId = i;
747                 return;
748             }
749         }
750     }
751 }
752 
753 }
754 
canUseOpenCL()755 bool canUseOpenCL()
756 {
757     if( const char* env = getenv( "SC_FORCE_CALCULATION" ))
758     {
759         if( strcmp( env, "opencl" ) == 0 )
760             return true;
761     }
762     return !getenv("SAL_DISABLE_OPENCL") && officecfg::Office::Common::Misc::UseOpenCL::get();
763 }
764 
switchOpenCLDevice(const OUString * pDevice,bool bAutoSelect,bool bForceEvaluation,OUString & rOutSelectedDeviceVersionIDString)765 bool switchOpenCLDevice(const OUString* pDevice, bool bAutoSelect, bool bForceEvaluation, OUString& rOutSelectedDeviceVersionIDString)
766 {
767     if (!canUseOpenCL() || fillOpenCLInfo().empty())
768         return false;
769 
770     cl_device_id pDeviceId = nullptr;
771     if(pDevice)
772         pDeviceId = findDeviceIdByDeviceString(*pDevice, fillOpenCLInfo());
773 
774     if(!pDeviceId || bAutoSelect)
775     {
776         int status = clewInit(OPENCL_DLL_NAME);
777         if (status < 0)
778             return false;
779 
780         OUString url("${$BRAND_BASE_DIR/" LIBO_ETC_FOLDER "/" SAL_CONFIGFILE("bootstrap") ":UserInstallation}/cache/");
781         rtl::Bootstrap::expandMacros(url);
782         OUString path;
783         osl::FileBase::getSystemPathFromFileURL(url,path);
784         ds_device aSelectedDevice = getDeviceSelection(path, bForceEvaluation);
785         if ( aSelectedDevice.eType != DeviceType::OpenCLDevice)
786             return false;
787         pDeviceId = aSelectedDevice.aDeviceID;
788     }
789 
790     if(gpuEnv.mpDevID == pDeviceId)
791     {
792         // we don't need to change anything
793         // still the same device
794         return pDeviceId != nullptr;
795     }
796 
797     cl_context context;
798     cl_platform_id platformId;
799 
800     {
801         OpenCLZone zone;
802         cl_int nState = clGetDeviceInfo(pDeviceId, CL_DEVICE_PLATFORM,
803                                         sizeof(platformId), &platformId, nullptr);
804 
805         cl_context_properties cps[3];
806         cps[0] = CL_CONTEXT_PLATFORM;
807         cps[1] = reinterpret_cast<cl_context_properties>(platformId);
808         cps[2] = 0;
809         context = clCreateContext( cps, 1, &pDeviceId, nullptr, nullptr, &nState );
810         if (nState != CL_SUCCESS)
811             SAL_WARN("opencl", "clCreateContext failed: " << errorString(nState));
812 
813         if(nState != CL_SUCCESS || context == nullptr)
814         {
815             if(context != nullptr)
816                 clReleaseContext(context);
817 
818             SAL_WARN("opencl", "failed to set/switch opencl device");
819             return false;
820         }
821         SAL_INFO("opencl", "Created context " << context << " for platform " << platformId << ", device " << pDeviceId);
822 
823         OString sDeviceID = getDeviceInfoString(pDeviceId, CL_DEVICE_VENDOR) + " " + getDeviceInfoString(pDeviceId, CL_DRIVER_VERSION);
824         rOutSelectedDeviceVersionIDString = OStringToOUString(sDeviceID, RTL_TEXTENCODING_UTF8);
825     }
826 
827     setOpenCLCmdQueuePosition(0); // Call this just to avoid the method being deleted from unused function deleter.
828 
829     releaseOpenCLEnv(&gpuEnv);
830 
831     OpenCLEnv env;
832     env.mpOclPlatformID = platformId;
833     env.mpOclContext = context;
834     env.mpOclDevsID = pDeviceId;
835 
836     initOpenCLAttr(&env);
837 
838     return !initOpenCLRunEnv(0);
839 }
840 
getOpenCLDeviceInfo(size_t & rDeviceId,size_t & rPlatformId)841 void getOpenCLDeviceInfo(size_t& rDeviceId, size_t& rPlatformId)
842 {
843     if (!canUseOpenCL())
844         return;
845 
846     int status = clewInit(OPENCL_DLL_NAME);
847     if (status < 0)
848         return;
849 
850     cl_device_id id = gpuEnv.mpDevID;
851     findDeviceInfoFromDeviceId(id, rDeviceId, rPlatformId);
852 }
853 
getOpenCLDeviceName(OUString & rDeviceName,OUString & rPlatformName)854 void getOpenCLDeviceName(OUString& rDeviceName, OUString& rPlatformName)
855 {
856     if (!canUseOpenCL())
857         return;
858 
859     int status = clewInit(OPENCL_DLL_NAME);
860     if (status < 0)
861         return;
862 
863     cl_device_id deviceId = gpuEnv.mpDevID;
864     cl_platform_id platformId;
865     if( clGetDeviceInfo(deviceId, CL_DEVICE_PLATFORM, sizeof(platformId), &platformId, nullptr) != CL_SUCCESS )
866         return;
867 
868     char deviceName[DEVICE_NAME_LENGTH] = {0};
869     if( clGetDeviceInfo(deviceId, CL_DEVICE_NAME, sizeof(deviceName), deviceName, nullptr) != CL_SUCCESS )
870         return;
871     char platformName[64];
872     if( clGetPlatformInfo(platformId, CL_PLATFORM_NAME, 64, platformName, nullptr) != CL_SUCCESS )
873         return;
874     rDeviceName = OUString::createFromAscii(deviceName);
875     rPlatformName = OUString::createFromAscii(platformName);
876 }
877 
setOpenCLCmdQueuePosition(int nPos)878 void setOpenCLCmdQueuePosition( int nPos )
879 {
880     if (nPos < 0 || nPos >= OPENCL_CMDQUEUE_SIZE)
881         // Out of range. Ignore this.
882         return;
883 
884     gpuEnv.mnCmdQueuePos = nPos;
885 }
886 
errorString(cl_int nError)887 const char* errorString(cl_int nError)
888 {
889 #define CASE(val) case CL_##val: return #val
890     switch (nError)
891     {
892         CASE(SUCCESS);
893         CASE(DEVICE_NOT_FOUND);
894         CASE(DEVICE_NOT_AVAILABLE);
895         CASE(COMPILER_NOT_AVAILABLE);
896         CASE(MEM_OBJECT_ALLOCATION_FAILURE);
897         CASE(OUT_OF_RESOURCES);
898         CASE(OUT_OF_HOST_MEMORY);
899         CASE(PROFILING_INFO_NOT_AVAILABLE);
900         CASE(MEM_COPY_OVERLAP);
901         CASE(IMAGE_FORMAT_MISMATCH);
902         CASE(IMAGE_FORMAT_NOT_SUPPORTED);
903         CASE(BUILD_PROGRAM_FAILURE);
904         CASE(MAP_FAILURE);
905         CASE(INVALID_VALUE);
906         CASE(INVALID_DEVICE_TYPE);
907         CASE(INVALID_PLATFORM);
908         CASE(INVALID_DEVICE);
909         CASE(INVALID_CONTEXT);
910         CASE(INVALID_QUEUE_PROPERTIES);
911         CASE(INVALID_COMMAND_QUEUE);
912         CASE(INVALID_HOST_PTR);
913         CASE(INVALID_MEM_OBJECT);
914         CASE(INVALID_IMAGE_FORMAT_DESCRIPTOR);
915         CASE(INVALID_IMAGE_SIZE);
916         CASE(INVALID_SAMPLER);
917         CASE(INVALID_BINARY);
918         CASE(INVALID_BUILD_OPTIONS);
919         CASE(INVALID_PROGRAM);
920         CASE(INVALID_PROGRAM_EXECUTABLE);
921         CASE(INVALID_KERNEL_NAME);
922         CASE(INVALID_KERNEL_DEFINITION);
923         CASE(INVALID_KERNEL);
924         CASE(INVALID_ARG_INDEX);
925         CASE(INVALID_ARG_VALUE);
926         CASE(INVALID_ARG_SIZE);
927         CASE(INVALID_KERNEL_ARGS);
928         CASE(INVALID_WORK_DIMENSION);
929         CASE(INVALID_WORK_GROUP_SIZE);
930         CASE(INVALID_WORK_ITEM_SIZE);
931         CASE(INVALID_GLOBAL_OFFSET);
932         CASE(INVALID_EVENT_WAIT_LIST);
933         CASE(INVALID_EVENT);
934         CASE(INVALID_OPERATION);
935         CASE(INVALID_GL_OBJECT);
936         CASE(INVALID_BUFFER_SIZE);
937         CASE(INVALID_MIP_LEVEL);
938         CASE(INVALID_GLOBAL_WORK_SIZE);
939         default:
940             return "Unknown OpenCL error code";
941     }
942 #undef CASE
943 }
944 
isOpenCLEnabled()945 bool GPUEnv::isOpenCLEnabled()
946 {
947     return gpuEnv.mpDevID && gpuEnv.mpContext;
948 }
949 
950 }
951 
releaseOpenCLEnv(openclwrapper::GPUEnv * gpuInfo)952 void releaseOpenCLEnv( openclwrapper::GPUEnv *gpuInfo )
953 {
954     OpenCLZone zone;
955 
956     if ( !bIsInited )
957     {
958         return;
959     }
960 
961     for (_cl_command_queue* & i : openclwrapper::gpuEnv.mpCmdQueue)
962     {
963         if (i)
964         {
965             clReleaseCommandQueue(i);
966             i = nullptr;
967         }
968     }
969     openclwrapper::gpuEnv.mnCmdQueuePos = 0;
970 
971     if ( openclwrapper::gpuEnv.mpContext )
972     {
973         clReleaseContext( openclwrapper::gpuEnv.mpContext );
974         openclwrapper::gpuEnv.mpContext = nullptr;
975     }
976     bIsInited = false;
977     gpuInfo->mnIsUserCreated = 0;
978 }
979 
980 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
981