1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team.
5  * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7  * and including many others, as listed in the AUTHORS file in the
8  * top-level source directory and at http://www.gromacs.org.
9  *
10  * GROMACS is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public License
12  * as published by the Free Software Foundation; either version 2.1
13  * of the License, or (at your option) any later version.
14  *
15  * GROMACS is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with GROMACS; if not, see
22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
24  *
25  * If you want to redistribute modifications to GROMACS, please
26  * consider that scientific software is very special. Version
27  * control is crucial - bugs must be traceable. We will be happy to
28  * consider code for inclusion in the official distribution, but
29  * derived work must not be called official GROMACS. Details are found
30  * in the README & COPYING files - if they are missing, get the
31  * official version at http://www.gromacs.org.
32  *
33  * To help us fund GROMACS development, we humbly ask that you cite
34  * the research papers on the package. Check out http://www.gromacs.org.
35  */
36 /*! \internal \file
37  *  \brief Define infrastructure for OpenCL JIT compilation for Gromacs
38  *
39  *  \author Dimitrios Karkoulis <dimitris.karkoulis@gmail.com>
40  *  \author Anca Hamuraru <anca@streamcomputing.eu>
41  *  \author Teemu Virolainen <teemu@streamcomputing.eu>
42  *  \author Mark Abraham <mark.j.abraham@gmail.com>
43  */
44 
45 #include "gmxpre.h"
46 
47 #include "ocl_compiler.h"
48 
49 #include "config.h"
50 
51 #include <cstdio>
52 
53 #include <algorithm>
54 #include <string>
55 #include <vector>
56 
57 #include "gromacs/gpu_utils/oclutils.h"
58 #include "gromacs/utility/cstringutil.h"
59 #include "gromacs/utility/exceptions.h"
60 #include "gromacs/utility/gmxassert.h"
61 #include "gromacs/utility/path.h"
62 #include "gromacs/utility/programcontext.h"
63 #include "gromacs/utility/smalloc.h"
64 #include "gromacs/utility/stringutil.h"
65 #include "gromacs/utility/textreader.h"
66 #include "gromacs/utility/unique_cptr.h"
67 
68 #include "ocl_caching.h"
69 
70 namespace gmx
71 {
72 namespace ocl
73 {
74 
75 /*! \brief True if OpenCL binary caching is enabled.
76  *
77  *  Currently caching is disabled by default unless the env var override
78  *  is used until we resolve concurrency issues. */
79 static bool useBuildCache = getenv("GMX_OCL_GENCACHE") != nullptr;
80 
81 /*! \brief Handles writing the OpenCL JIT compilation log to \c fplog.
82  *
83  * If \c fplog is non-null and either the GMX_OCL_DUMP_LOG environment
84  * variable is set or the compilation failed, then the OpenCL
85  * compilation log is written.
86  *
87  * \param fplog               Open file pointer to log file
88  * \param program             OpenCL program that was compiled
89  * \param deviceId            Id of the device for which compilation took place
90  * \param kernelFilename      File name containing the kernel
91  * \param preprocessorOptions String containing the preprocessor command-line options used for the
92  * build \param buildFailed         Whether the OpenCL build succeeded
93  *
94  * \throws std::bad_alloc if out of memory */
writeOclBuildLog(FILE * fplog,cl_program program,cl_device_id deviceId,const std::string & kernelFilename,const std::string & preprocessorOptions,bool buildFailed)95 static void writeOclBuildLog(FILE*              fplog,
96                              cl_program         program,
97                              cl_device_id       deviceId,
98                              const std::string& kernelFilename,
99                              const std::string& preprocessorOptions,
100                              bool               buildFailed)
101 {
102     bool writeOutput = ((fplog != nullptr) && (buildFailed || (getenv("GMX_OCL_DUMP_LOG") != nullptr)));
103 
104     if (!writeOutput)
105     {
106         return;
107     }
108 
109     // Get build log string size
110     size_t buildLogSize;
111     cl_int cl_error =
112             clGetProgramBuildInfo(program, deviceId, CL_PROGRAM_BUILD_LOG, 0, nullptr, &buildLogSize);
113     if (cl_error != CL_SUCCESS)
114     {
115         GMX_THROW(InternalError("Could not get OpenCL program build log size, error was "
116                                 + ocl_get_error_string(cl_error)));
117     }
118 
119     char*             buildLog = nullptr;
120     unique_cptr<char> buildLogGuard;
121     if (buildLogSize != 0)
122     {
123         /* Allocate memory to fit the build log,
124            it can be very large in case of errors */
125         snew(buildLog, buildLogSize);
126         buildLogGuard.reset(buildLog);
127 
128         /* Get the actual compilation log */
129         cl_error = clGetProgramBuildInfo(program, deviceId, CL_PROGRAM_BUILD_LOG, buildLogSize,
130                                          buildLog, nullptr);
131         if (cl_error != CL_SUCCESS)
132         {
133             GMX_THROW(InternalError("Could not get OpenCL program build log, error was "
134                                     + ocl_get_error_string(cl_error)));
135         }
136     }
137 
138     std::string message;
139     if (buildFailed)
140     {
141         message += "Compilation of source file " + kernelFilename + " failed!\n";
142     }
143     else
144     {
145         message += "Compilation of source file " + kernelFilename + " was successful!\n";
146     }
147     message += "-- Used build options: " + preprocessorOptions + "\n";
148     message += "--------------LOG START---------------\n";
149     message += buildLog;
150     message += "---------------LOG END----------------\n";
151     ;
152 
153     fputs(message.c_str(), fplog);
154 }
155 
156 /*! \brief Construct compiler options string
157  *
158  * \param deviceVendor  Device vendor. Used to automatically enable some
159  *                      vendor-specific options.
160  * \return The string with the compiler options
161  */
selectCompilerOptions(DeviceVendor deviceVendor)162 static std::string selectCompilerOptions(DeviceVendor deviceVendor)
163 {
164     std::string compilerOptions;
165 
166     if (getenv("GMX_OCL_NOOPT"))
167     {
168         compilerOptions += " -cl-opt-disable";
169     }
170 
171     /* Fastmath imprves performance on all supported arch */
172     if (getenv("GMX_OCL_DISABLE_FASTMATH") == nullptr)
173     {
174         compilerOptions += " -cl-fast-relaxed-math";
175 
176         // Hint to the compiler that it can flush denorms to zero.
177         // In CUDA this is triggered by the -use_fast_math flag, equivalent with
178         // -cl-fast-relaxed-math, hence the inclusion on the conditional block.
179         compilerOptions += " -cl-denorms-are-zero";
180     }
181 
182     if ((deviceVendor == DeviceVendor::Nvidia) && getenv("GMX_OCL_VERBOSE"))
183     {
184         compilerOptions += " -cl-nv-verbose";
185     }
186 
187     if ((deviceVendor == DeviceVendor::Amd) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
188     {
189         /* To dump OpenCL build intermediate files, caching must be off */
190         if (!useBuildCache)
191         {
192             compilerOptions += " -save-temps";
193         }
194     }
195 
196     if (getenv("GMX_OCL_DEBUG"))
197     {
198         compilerOptions += " -g";
199     }
200 
201     return compilerOptions;
202 }
203 
204 /*! \brief Get the path to the folder storing an OpenCL source file.
205  *
206  * By default, this function constructs the full path to the OpenCL from
207  * the known location of the binary that is running, so that we handle
208  * both in-source and installed builds. The user can override this
209  * behavior by defining GMX_OCL_FILE_PATH environment variable.
210  *
211  * \param[in] sourceRelativePath    Relative path to the kernel or other file in the source tree,
212  *                                  from src, e.g. "gromacs/mdlib/nbnxn_ocl" for NB kernels.
213  * \return OS-normalized path string to the folder storing OpenCL source file
214  *
215  * \throws std::bad_alloc    if out of memory.
216  *         FileIOError  if GMX_OCL_FILE_PATH does not specify a readable path
217  */
getSourceRootPath(const std::string & sourceRelativePath)218 static std::string getSourceRootPath(const std::string& sourceRelativePath)
219 {
220     std::string sourceRootPath;
221     /* Use GMX_OCL_FILE_PATH if the user has defined it */
222     const char* gmxOclFilePath = getenv("GMX_OCL_FILE_PATH");
223 
224     if (gmxOclFilePath == nullptr)
225     {
226         /* Normal way of getting ocl_root_dir. First get the right
227            root path from the path to the binary that is running. */
228         InstallationPrefixInfo info           = getProgramContext().installationPrefix();
229         std::string            dataPathSuffix = (info.bSourceLayout ? "src" : GMX_INSTALL_OCLDIR);
230         sourceRootPath = Path::join(info.path, dataPathSuffix, sourceRelativePath);
231     }
232     else
233     {
234         if (!Directory::exists(gmxOclFilePath))
235         {
236             GMX_THROW(FileIOError(
237                     formatString("GMX_OCL_FILE_PATH must point to the directory where OpenCL"
238                                  "kernels are found, but '%s' does not exist",
239                                  gmxOclFilePath)));
240         }
241         sourceRootPath = Path::join(gmxOclFilePath, sourceRelativePath);
242     }
243 
244     // Make sure we return an OS-correct path format
245     return Path::normalize(sourceRootPath);
246 }
247 
getKernelWarpSize(cl_kernel kernel,cl_device_id deviceId)248 size_t getKernelWarpSize(cl_kernel kernel, cl_device_id deviceId)
249 {
250     size_t warpSize = 0;
251     cl_int cl_error =
252             clGetKernelWorkGroupInfo(kernel, deviceId, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
253                                      sizeof(warpSize), &warpSize, nullptr);
254     if (cl_error != CL_SUCCESS)
255     {
256         GMX_THROW(InternalError("Could not query OpenCL preferred workgroup size, error was "
257                                 + ocl_get_error_string(cl_error)));
258     }
259     if (warpSize == 0)
260     {
261         GMX_THROW(InternalError(formatString("Invalid OpenCL warp size encountered")));
262     }
263     return warpSize;
264 }
265 
getDeviceWarpSize(cl_context context,cl_device_id deviceId)266 size_t getDeviceWarpSize(cl_context context, cl_device_id deviceId)
267 {
268     cl_int      cl_error;
269     const char* warpSizeKernel =
270             "__kernel void test(__global int* test){test[get_local_id(0)] = 0;}";
271     cl_program program = clCreateProgramWithSource(context, 1, &warpSizeKernel, nullptr, &cl_error);
272     if (cl_error != CL_SUCCESS)
273     {
274         GMX_THROW(InternalError("Could not create OpenCL program to determine warp size, error was "
275                                 + ocl_get_error_string(cl_error)));
276     }
277 
278     cl_error = clBuildProgram(program, 0, nullptr, nullptr, nullptr, nullptr);
279     if (cl_error != CL_SUCCESS)
280     {
281         GMX_THROW(InternalError("Could not build OpenCL program to determine warp size, error was "
282                                 + ocl_get_error_string(cl_error)));
283     }
284 
285     cl_kernel kernel = clCreateKernel(program, "test", &cl_error);
286     if (cl_error != CL_SUCCESS)
287     {
288         GMX_THROW(InternalError("Could not create OpenCL kernel to determine warp size, error was "
289                                 + ocl_get_error_string(cl_error)));
290     }
291 
292     size_t warpSize = getKernelWarpSize(kernel, deviceId);
293 
294     cl_error = clReleaseKernel(kernel);
295     if (cl_error != CL_SUCCESS)
296     {
297         GMX_THROW(InternalError("Could not release OpenCL warp-size kernel, error was "
298                                 + ocl_get_error_string(cl_error)));
299     }
300     cl_error = clReleaseProgram(program);
301     if (cl_error != CL_SUCCESS)
302     {
303         GMX_THROW(InternalError("Could not release OpenCL warp-size program, error was "
304                                 + ocl_get_error_string(cl_error)));
305     }
306 
307     return warpSize;
308 }
309 
310 /*! \brief Select a compilation-line define for a vendor-specific kernel choice from vendor id
311  *
312  * \param[in] deviceVendor Vendor id enumerator
313  *
314  * \return The appropriate compilation-line define
315  */
makeVendorFlavorChoice(DeviceVendor deviceVendor)316 static std::string makeVendorFlavorChoice(DeviceVendor deviceVendor)
317 {
318     switch (deviceVendor)
319     {
320         case DeviceVendor::Amd: return "-D_AMD_SOURCE_";
321         case DeviceVendor::Nvidia: return "-D_NVIDIA_SOURCE_";
322         case DeviceVendor::Intel: return "-D_INTEL_SOURCE_";
323         default: return "";
324     }
325 }
326 
327 /*! \brief Create include paths for kernel sources.
328  *
329  * All OpenCL kernel files are expected to be stored in one single folder.
330  *
331  * \throws std::bad_alloc  if out of memory.
332  */
makeKernelIncludePathOption(const std::string & unescapedKernelRootPath)333 static std::string makeKernelIncludePathOption(const std::string& unescapedKernelRootPath)
334 {
335     std::string includePathOption;
336 
337     /* Apple does not seem to accept the quoted include paths other
338      * OpenCL implementations are happy with. Since the standard still says
339      * it should be quoted, we handle Apple as a special case.
340      */
341 #ifdef __APPLE__
342     includePathOption += "-I";
343 
344     // Prepend all the spaces with a backslash
345     for (std::string::size_type i = 0; i < unescapedKernelRootPath.length(); i++)
346     {
347         if (unescapedKernelRootPath[i] == ' ')
348         {
349             includePathOption.push_back('\\');
350         }
351         includePathOption.push_back(unescapedKernelRootPath[i]);
352     }
353 #else
354     includePathOption += "-I\"" + unescapedKernelRootPath + "\"";
355 #endif
356 
357     return includePathOption;
358 }
359 
360 /*! \brief Replace duplicated spaces with a single one in string
361  *
362  * Only the first character will be kept for multiple adjacent characters that
363  * are both identical and where the first one returns true for isspace().
364  *
365  * \param str String that will be modified.
366  */
removeExtraSpaces(std::string * str)367 static void removeExtraSpaces(std::string* str)
368 {
369     GMX_RELEASE_ASSERT(str != nullptr, "A pointer to an actual string must be provided");
370     std::string::iterator newEnd = std::unique(
371             str->begin(), str->end(), [=](char a, char b) { return isspace(a) != 0 && (a == b); });
372     str->erase(newEnd, str->end());
373 }
374 
375 /*! \brief Builds a string with build options for the OpenCL kernels
376  *
377  * \throws std::bad_alloc  if out of memory. */
makePreprocessorOptions(const std::string & kernelRootPath,const std::string & includeRootPath,size_t warpSize,DeviceVendor deviceVendor,const std::string & extraDefines)378 static std::string makePreprocessorOptions(const std::string& kernelRootPath,
379                                            const std::string& includeRootPath,
380                                            size_t             warpSize,
381                                            DeviceVendor       deviceVendor,
382                                            const std::string& extraDefines)
383 {
384     std::string preprocessorOptions;
385 
386     /* Compose the complete build options */
387     preprocessorOptions = formatString("-DWARP_SIZE_TEST=%d", static_cast<int>(warpSize));
388     preprocessorOptions += ' ';
389     preprocessorOptions += makeVendorFlavorChoice(deviceVendor);
390     preprocessorOptions += ' ';
391     preprocessorOptions += extraDefines;
392     preprocessorOptions += ' ';
393     preprocessorOptions += selectCompilerOptions(deviceVendor);
394     preprocessorOptions += ' ';
395     preprocessorOptions += makeKernelIncludePathOption(kernelRootPath);
396     preprocessorOptions += ' ';
397     preprocessorOptions += makeKernelIncludePathOption(includeRootPath);
398 
399     // Mac OS (and maybe some other implementations) does not accept double spaces in options
400     removeExtraSpaces(&preprocessorOptions);
401 
402     return preprocessorOptions;
403 }
404 
compileProgram(FILE * fplog,const std::string & kernelRelativePath,const std::string & kernelBaseFilename,const std::string & extraDefines,cl_context context,cl_device_id deviceId,DeviceVendor deviceVendor)405 cl_program compileProgram(FILE*              fplog,
406                           const std::string& kernelRelativePath,
407                           const std::string& kernelBaseFilename,
408                           const std::string& extraDefines,
409                           cl_context         context,
410                           cl_device_id       deviceId,
411                           DeviceVendor       deviceVendor)
412 {
413     cl_int cl_error;
414     // Let the kernel find include files from its module.
415     std::string kernelRootPath = getSourceRootPath(kernelRelativePath);
416     // Let the kernel find include files from other modules.
417     std::string rootPath = getSourceRootPath("");
418 
419     GMX_RELEASE_ASSERT(fplog != nullptr, "Need a valid log file for building OpenCL programs");
420 
421     /* Load OpenCL source files */
422     std::string kernelFilename = Path::join(kernelRootPath, kernelBaseFilename);
423 
424     /* Make the build options */
425     std::string preprocessorOptions = makePreprocessorOptions(
426             kernelRootPath, rootPath, getDeviceWarpSize(context, deviceId), deviceVendor, extraDefines);
427 
428     bool buildCacheWasRead = false;
429 
430     std::string cacheFilename;
431     if (useBuildCache)
432     {
433         cacheFilename = makeBinaryCacheFilename(kernelBaseFilename, deviceId);
434     }
435 
436     /* Create OpenCL program */
437     cl_program program = nullptr;
438     if (useBuildCache)
439     {
440         if (File::exists(cacheFilename, File::returnFalseOnError))
441         {
442             /* Check if there's a valid cache available */
443             try
444             {
445                 program           = makeProgramFromCache(cacheFilename, context, deviceId);
446                 buildCacheWasRead = true;
447             }
448             catch (FileIOError& e)
449             {
450                 // Failing to read from the cache is not a critical error
451                 formatExceptionMessageToFile(fplog, e);
452             }
453             fprintf(fplog, "OpenCL binary cache file %s is present, will load kernels.\n",
454                     cacheFilename.c_str());
455         }
456         else
457         {
458             fprintf(fplog,
459                     "No OpenCL binary cache file was present for %s, so will compile kernels "
460                     "normally.\n",
461                     kernelBaseFilename.c_str());
462         }
463     }
464     if (program == nullptr)
465     {
466         // Compile OpenCL program from source
467         std::string kernelSource = TextReader::readFileToString(kernelFilename);
468         if (kernelSource.empty())
469         {
470             GMX_THROW(FileIOError("Error loading OpenCL code " + kernelFilename));
471         }
472         const char* kernelSourcePtr  = kernelSource.c_str();
473         size_t      kernelSourceSize = kernelSource.size();
474         /* Create program from source code */
475         program = clCreateProgramWithSource(context, 1, &kernelSourcePtr, &kernelSourceSize, &cl_error);
476         if (cl_error != CL_SUCCESS)
477         {
478             GMX_THROW(InternalError("Could not create OpenCL program, error was "
479                                     + ocl_get_error_string(cl_error)));
480         }
481     }
482 
483     /* Build the OpenCL program, keeping the status to potentially
484        write to the simulation log file. */
485     cl_int buildStatus =
486             clBuildProgram(program, 0, nullptr, preprocessorOptions.c_str(), nullptr, nullptr);
487 
488     /* Write log first, and then throw exception that the user know what is
489        the issue even if the build fails. */
490     writeOclBuildLog(fplog, program, deviceId, kernelFilename, preprocessorOptions,
491                      buildStatus != CL_SUCCESS);
492 
493     if (buildStatus != CL_SUCCESS)
494     {
495         GMX_THROW(InternalError("Could not build OpenCL program, error was "
496                                 + ocl_get_error_string(buildStatus)));
497     }
498 
499     if (useBuildCache)
500     {
501         if (!buildCacheWasRead)
502         {
503             /* If OpenCL caching is ON, but the current cache is not
504                valid => update it */
505             try
506             {
507                 writeBinaryToCache(program, cacheFilename);
508             }
509             catch (GromacsException& e)
510             {
511                 // Failing to write the cache is not a critical error
512                 formatExceptionMessageToFile(fplog, e);
513             }
514         }
515     }
516     if ((deviceVendor == DeviceVendor::Nvidia) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
517     {
518         /* If dumping intermediate files has been requested and this is an NVIDIA card
519            => write PTX to file */
520         char buffer[STRLEN];
521 
522         cl_error = clGetDeviceInfo(deviceId, CL_DEVICE_NAME, sizeof(buffer), buffer, nullptr);
523         if (cl_error != CL_SUCCESS)
524         {
525             GMX_THROW(InternalError("Could not get OpenCL device info, error was "
526                                     + ocl_get_error_string(cl_error)));
527         }
528         std::string ptxFilename = buffer;
529         ptxFilename += ".ptx";
530 
531         try
532         {
533             writeBinaryToCache(program, ptxFilename);
534         }
535         catch (GromacsException& e)
536         {
537             // Failing to write the cache is not a critical error
538             formatExceptionMessageToFile(fplog, e);
539         }
540     }
541 
542     return program;
543 }
544 
545 } // namespace ocl
546 } // namespace gmx
547