1 /*
2 * This file is part of the GROMACS molecular simulation package.
3 *
4 * Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team.
5 * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
9 *
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
14 *
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 *
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
32 *
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
35 */
36 /*! \internal \file
37 * \brief Define infrastructure for OpenCL JIT compilation for Gromacs
38 *
39 * \author Dimitrios Karkoulis <dimitris.karkoulis@gmail.com>
40 * \author Anca Hamuraru <anca@streamcomputing.eu>
41 * \author Teemu Virolainen <teemu@streamcomputing.eu>
42 * \author Mark Abraham <mark.j.abraham@gmail.com>
43 */
44
45 #include "gmxpre.h"
46
47 #include "ocl_compiler.h"
48
49 #include "config.h"
50
51 #include <cstdio>
52
53 #include <algorithm>
54 #include <string>
55 #include <vector>
56
57 #include "gromacs/gpu_utils/oclutils.h"
58 #include "gromacs/utility/cstringutil.h"
59 #include "gromacs/utility/exceptions.h"
60 #include "gromacs/utility/gmxassert.h"
61 #include "gromacs/utility/path.h"
62 #include "gromacs/utility/programcontext.h"
63 #include "gromacs/utility/smalloc.h"
64 #include "gromacs/utility/stringutil.h"
65 #include "gromacs/utility/textreader.h"
66 #include "gromacs/utility/unique_cptr.h"
67
68 #include "ocl_caching.h"
69
70 namespace gmx
71 {
72 namespace ocl
73 {
74
75 /*! \brief True if OpenCL binary caching is enabled.
76 *
77 * Currently caching is disabled by default unless the env var override
78 * is used until we resolve concurrency issues. */
79 static bool useBuildCache = getenv("GMX_OCL_GENCACHE") != nullptr;
80
81 /*! \brief Handles writing the OpenCL JIT compilation log to \c fplog.
82 *
83 * If \c fplog is non-null and either the GMX_OCL_DUMP_LOG environment
84 * variable is set or the compilation failed, then the OpenCL
85 * compilation log is written.
86 *
87 * \param fplog Open file pointer to log file
88 * \param program OpenCL program that was compiled
89 * \param deviceId Id of the device for which compilation took place
90 * \param kernelFilename File name containing the kernel
91 * \param preprocessorOptions String containing the preprocessor command-line options used for the
92 * build \param buildFailed Whether the OpenCL build succeeded
93 *
94 * \throws std::bad_alloc if out of memory */
writeOclBuildLog(FILE * fplog,cl_program program,cl_device_id deviceId,const std::string & kernelFilename,const std::string & preprocessorOptions,bool buildFailed)95 static void writeOclBuildLog(FILE* fplog,
96 cl_program program,
97 cl_device_id deviceId,
98 const std::string& kernelFilename,
99 const std::string& preprocessorOptions,
100 bool buildFailed)
101 {
102 bool writeOutput = ((fplog != nullptr) && (buildFailed || (getenv("GMX_OCL_DUMP_LOG") != nullptr)));
103
104 if (!writeOutput)
105 {
106 return;
107 }
108
109 // Get build log string size
110 size_t buildLogSize;
111 cl_int cl_error =
112 clGetProgramBuildInfo(program, deviceId, CL_PROGRAM_BUILD_LOG, 0, nullptr, &buildLogSize);
113 if (cl_error != CL_SUCCESS)
114 {
115 GMX_THROW(InternalError("Could not get OpenCL program build log size, error was "
116 + ocl_get_error_string(cl_error)));
117 }
118
119 char* buildLog = nullptr;
120 unique_cptr<char> buildLogGuard;
121 if (buildLogSize != 0)
122 {
123 /* Allocate memory to fit the build log,
124 it can be very large in case of errors */
125 snew(buildLog, buildLogSize);
126 buildLogGuard.reset(buildLog);
127
128 /* Get the actual compilation log */
129 cl_error = clGetProgramBuildInfo(program, deviceId, CL_PROGRAM_BUILD_LOG, buildLogSize,
130 buildLog, nullptr);
131 if (cl_error != CL_SUCCESS)
132 {
133 GMX_THROW(InternalError("Could not get OpenCL program build log, error was "
134 + ocl_get_error_string(cl_error)));
135 }
136 }
137
138 std::string message;
139 if (buildFailed)
140 {
141 message += "Compilation of source file " + kernelFilename + " failed!\n";
142 }
143 else
144 {
145 message += "Compilation of source file " + kernelFilename + " was successful!\n";
146 }
147 message += "-- Used build options: " + preprocessorOptions + "\n";
148 message += "--------------LOG START---------------\n";
149 message += buildLog;
150 message += "---------------LOG END----------------\n";
151 ;
152
153 fputs(message.c_str(), fplog);
154 }
155
156 /*! \brief Construct compiler options string
157 *
158 * \param deviceVendor Device vendor. Used to automatically enable some
159 * vendor-specific options.
160 * \return The string with the compiler options
161 */
selectCompilerOptions(DeviceVendor deviceVendor)162 static std::string selectCompilerOptions(DeviceVendor deviceVendor)
163 {
164 std::string compilerOptions;
165
166 if (getenv("GMX_OCL_NOOPT"))
167 {
168 compilerOptions += " -cl-opt-disable";
169 }
170
171 /* Fastmath imprves performance on all supported arch */
172 if (getenv("GMX_OCL_DISABLE_FASTMATH") == nullptr)
173 {
174 compilerOptions += " -cl-fast-relaxed-math";
175
176 // Hint to the compiler that it can flush denorms to zero.
177 // In CUDA this is triggered by the -use_fast_math flag, equivalent with
178 // -cl-fast-relaxed-math, hence the inclusion on the conditional block.
179 compilerOptions += " -cl-denorms-are-zero";
180 }
181
182 if ((deviceVendor == DeviceVendor::Nvidia) && getenv("GMX_OCL_VERBOSE"))
183 {
184 compilerOptions += " -cl-nv-verbose";
185 }
186
187 if ((deviceVendor == DeviceVendor::Amd) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
188 {
189 /* To dump OpenCL build intermediate files, caching must be off */
190 if (!useBuildCache)
191 {
192 compilerOptions += " -save-temps";
193 }
194 }
195
196 if (getenv("GMX_OCL_DEBUG"))
197 {
198 compilerOptions += " -g";
199 }
200
201 return compilerOptions;
202 }
203
204 /*! \brief Get the path to the folder storing an OpenCL source file.
205 *
206 * By default, this function constructs the full path to the OpenCL from
207 * the known location of the binary that is running, so that we handle
208 * both in-source and installed builds. The user can override this
209 * behavior by defining GMX_OCL_FILE_PATH environment variable.
210 *
211 * \param[in] sourceRelativePath Relative path to the kernel or other file in the source tree,
212 * from src, e.g. "gromacs/mdlib/nbnxn_ocl" for NB kernels.
213 * \return OS-normalized path string to the folder storing OpenCL source file
214 *
215 * \throws std::bad_alloc if out of memory.
216 * FileIOError if GMX_OCL_FILE_PATH does not specify a readable path
217 */
getSourceRootPath(const std::string & sourceRelativePath)218 static std::string getSourceRootPath(const std::string& sourceRelativePath)
219 {
220 std::string sourceRootPath;
221 /* Use GMX_OCL_FILE_PATH if the user has defined it */
222 const char* gmxOclFilePath = getenv("GMX_OCL_FILE_PATH");
223
224 if (gmxOclFilePath == nullptr)
225 {
226 /* Normal way of getting ocl_root_dir. First get the right
227 root path from the path to the binary that is running. */
228 InstallationPrefixInfo info = getProgramContext().installationPrefix();
229 std::string dataPathSuffix = (info.bSourceLayout ? "src" : GMX_INSTALL_OCLDIR);
230 sourceRootPath = Path::join(info.path, dataPathSuffix, sourceRelativePath);
231 }
232 else
233 {
234 if (!Directory::exists(gmxOclFilePath))
235 {
236 GMX_THROW(FileIOError(
237 formatString("GMX_OCL_FILE_PATH must point to the directory where OpenCL"
238 "kernels are found, but '%s' does not exist",
239 gmxOclFilePath)));
240 }
241 sourceRootPath = Path::join(gmxOclFilePath, sourceRelativePath);
242 }
243
244 // Make sure we return an OS-correct path format
245 return Path::normalize(sourceRootPath);
246 }
247
getKernelWarpSize(cl_kernel kernel,cl_device_id deviceId)248 size_t getKernelWarpSize(cl_kernel kernel, cl_device_id deviceId)
249 {
250 size_t warpSize = 0;
251 cl_int cl_error =
252 clGetKernelWorkGroupInfo(kernel, deviceId, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
253 sizeof(warpSize), &warpSize, nullptr);
254 if (cl_error != CL_SUCCESS)
255 {
256 GMX_THROW(InternalError("Could not query OpenCL preferred workgroup size, error was "
257 + ocl_get_error_string(cl_error)));
258 }
259 if (warpSize == 0)
260 {
261 GMX_THROW(InternalError(formatString("Invalid OpenCL warp size encountered")));
262 }
263 return warpSize;
264 }
265
getDeviceWarpSize(cl_context context,cl_device_id deviceId)266 size_t getDeviceWarpSize(cl_context context, cl_device_id deviceId)
267 {
268 cl_int cl_error;
269 const char* warpSizeKernel =
270 "__kernel void test(__global int* test){test[get_local_id(0)] = 0;}";
271 cl_program program = clCreateProgramWithSource(context, 1, &warpSizeKernel, nullptr, &cl_error);
272 if (cl_error != CL_SUCCESS)
273 {
274 GMX_THROW(InternalError("Could not create OpenCL program to determine warp size, error was "
275 + ocl_get_error_string(cl_error)));
276 }
277
278 cl_error = clBuildProgram(program, 0, nullptr, nullptr, nullptr, nullptr);
279 if (cl_error != CL_SUCCESS)
280 {
281 GMX_THROW(InternalError("Could not build OpenCL program to determine warp size, error was "
282 + ocl_get_error_string(cl_error)));
283 }
284
285 cl_kernel kernel = clCreateKernel(program, "test", &cl_error);
286 if (cl_error != CL_SUCCESS)
287 {
288 GMX_THROW(InternalError("Could not create OpenCL kernel to determine warp size, error was "
289 + ocl_get_error_string(cl_error)));
290 }
291
292 size_t warpSize = getKernelWarpSize(kernel, deviceId);
293
294 cl_error = clReleaseKernel(kernel);
295 if (cl_error != CL_SUCCESS)
296 {
297 GMX_THROW(InternalError("Could not release OpenCL warp-size kernel, error was "
298 + ocl_get_error_string(cl_error)));
299 }
300 cl_error = clReleaseProgram(program);
301 if (cl_error != CL_SUCCESS)
302 {
303 GMX_THROW(InternalError("Could not release OpenCL warp-size program, error was "
304 + ocl_get_error_string(cl_error)));
305 }
306
307 return warpSize;
308 }
309
310 /*! \brief Select a compilation-line define for a vendor-specific kernel choice from vendor id
311 *
312 * \param[in] deviceVendor Vendor id enumerator
313 *
314 * \return The appropriate compilation-line define
315 */
makeVendorFlavorChoice(DeviceVendor deviceVendor)316 static std::string makeVendorFlavorChoice(DeviceVendor deviceVendor)
317 {
318 switch (deviceVendor)
319 {
320 case DeviceVendor::Amd: return "-D_AMD_SOURCE_";
321 case DeviceVendor::Nvidia: return "-D_NVIDIA_SOURCE_";
322 case DeviceVendor::Intel: return "-D_INTEL_SOURCE_";
323 default: return "";
324 }
325 }
326
327 /*! \brief Create include paths for kernel sources.
328 *
329 * All OpenCL kernel files are expected to be stored in one single folder.
330 *
331 * \throws std::bad_alloc if out of memory.
332 */
makeKernelIncludePathOption(const std::string & unescapedKernelRootPath)333 static std::string makeKernelIncludePathOption(const std::string& unescapedKernelRootPath)
334 {
335 std::string includePathOption;
336
337 /* Apple does not seem to accept the quoted include paths other
338 * OpenCL implementations are happy with. Since the standard still says
339 * it should be quoted, we handle Apple as a special case.
340 */
341 #ifdef __APPLE__
342 includePathOption += "-I";
343
344 // Prepend all the spaces with a backslash
345 for (std::string::size_type i = 0; i < unescapedKernelRootPath.length(); i++)
346 {
347 if (unescapedKernelRootPath[i] == ' ')
348 {
349 includePathOption.push_back('\\');
350 }
351 includePathOption.push_back(unescapedKernelRootPath[i]);
352 }
353 #else
354 includePathOption += "-I\"" + unescapedKernelRootPath + "\"";
355 #endif
356
357 return includePathOption;
358 }
359
360 /*! \brief Replace duplicated spaces with a single one in string
361 *
362 * Only the first character will be kept for multiple adjacent characters that
363 * are both identical and where the first one returns true for isspace().
364 *
365 * \param str String that will be modified.
366 */
removeExtraSpaces(std::string * str)367 static void removeExtraSpaces(std::string* str)
368 {
369 GMX_RELEASE_ASSERT(str != nullptr, "A pointer to an actual string must be provided");
370 std::string::iterator newEnd = std::unique(
371 str->begin(), str->end(), [=](char a, char b) { return isspace(a) != 0 && (a == b); });
372 str->erase(newEnd, str->end());
373 }
374
375 /*! \brief Builds a string with build options for the OpenCL kernels
376 *
377 * \throws std::bad_alloc if out of memory. */
makePreprocessorOptions(const std::string & kernelRootPath,const std::string & includeRootPath,size_t warpSize,DeviceVendor deviceVendor,const std::string & extraDefines)378 static std::string makePreprocessorOptions(const std::string& kernelRootPath,
379 const std::string& includeRootPath,
380 size_t warpSize,
381 DeviceVendor deviceVendor,
382 const std::string& extraDefines)
383 {
384 std::string preprocessorOptions;
385
386 /* Compose the complete build options */
387 preprocessorOptions = formatString("-DWARP_SIZE_TEST=%d", static_cast<int>(warpSize));
388 preprocessorOptions += ' ';
389 preprocessorOptions += makeVendorFlavorChoice(deviceVendor);
390 preprocessorOptions += ' ';
391 preprocessorOptions += extraDefines;
392 preprocessorOptions += ' ';
393 preprocessorOptions += selectCompilerOptions(deviceVendor);
394 preprocessorOptions += ' ';
395 preprocessorOptions += makeKernelIncludePathOption(kernelRootPath);
396 preprocessorOptions += ' ';
397 preprocessorOptions += makeKernelIncludePathOption(includeRootPath);
398
399 // Mac OS (and maybe some other implementations) does not accept double spaces in options
400 removeExtraSpaces(&preprocessorOptions);
401
402 return preprocessorOptions;
403 }
404
compileProgram(FILE * fplog,const std::string & kernelRelativePath,const std::string & kernelBaseFilename,const std::string & extraDefines,cl_context context,cl_device_id deviceId,DeviceVendor deviceVendor)405 cl_program compileProgram(FILE* fplog,
406 const std::string& kernelRelativePath,
407 const std::string& kernelBaseFilename,
408 const std::string& extraDefines,
409 cl_context context,
410 cl_device_id deviceId,
411 DeviceVendor deviceVendor)
412 {
413 cl_int cl_error;
414 // Let the kernel find include files from its module.
415 std::string kernelRootPath = getSourceRootPath(kernelRelativePath);
416 // Let the kernel find include files from other modules.
417 std::string rootPath = getSourceRootPath("");
418
419 GMX_RELEASE_ASSERT(fplog != nullptr, "Need a valid log file for building OpenCL programs");
420
421 /* Load OpenCL source files */
422 std::string kernelFilename = Path::join(kernelRootPath, kernelBaseFilename);
423
424 /* Make the build options */
425 std::string preprocessorOptions = makePreprocessorOptions(
426 kernelRootPath, rootPath, getDeviceWarpSize(context, deviceId), deviceVendor, extraDefines);
427
428 bool buildCacheWasRead = false;
429
430 std::string cacheFilename;
431 if (useBuildCache)
432 {
433 cacheFilename = makeBinaryCacheFilename(kernelBaseFilename, deviceId);
434 }
435
436 /* Create OpenCL program */
437 cl_program program = nullptr;
438 if (useBuildCache)
439 {
440 if (File::exists(cacheFilename, File::returnFalseOnError))
441 {
442 /* Check if there's a valid cache available */
443 try
444 {
445 program = makeProgramFromCache(cacheFilename, context, deviceId);
446 buildCacheWasRead = true;
447 }
448 catch (FileIOError& e)
449 {
450 // Failing to read from the cache is not a critical error
451 formatExceptionMessageToFile(fplog, e);
452 }
453 fprintf(fplog, "OpenCL binary cache file %s is present, will load kernels.\n",
454 cacheFilename.c_str());
455 }
456 else
457 {
458 fprintf(fplog,
459 "No OpenCL binary cache file was present for %s, so will compile kernels "
460 "normally.\n",
461 kernelBaseFilename.c_str());
462 }
463 }
464 if (program == nullptr)
465 {
466 // Compile OpenCL program from source
467 std::string kernelSource = TextReader::readFileToString(kernelFilename);
468 if (kernelSource.empty())
469 {
470 GMX_THROW(FileIOError("Error loading OpenCL code " + kernelFilename));
471 }
472 const char* kernelSourcePtr = kernelSource.c_str();
473 size_t kernelSourceSize = kernelSource.size();
474 /* Create program from source code */
475 program = clCreateProgramWithSource(context, 1, &kernelSourcePtr, &kernelSourceSize, &cl_error);
476 if (cl_error != CL_SUCCESS)
477 {
478 GMX_THROW(InternalError("Could not create OpenCL program, error was "
479 + ocl_get_error_string(cl_error)));
480 }
481 }
482
483 /* Build the OpenCL program, keeping the status to potentially
484 write to the simulation log file. */
485 cl_int buildStatus =
486 clBuildProgram(program, 0, nullptr, preprocessorOptions.c_str(), nullptr, nullptr);
487
488 /* Write log first, and then throw exception that the user know what is
489 the issue even if the build fails. */
490 writeOclBuildLog(fplog, program, deviceId, kernelFilename, preprocessorOptions,
491 buildStatus != CL_SUCCESS);
492
493 if (buildStatus != CL_SUCCESS)
494 {
495 GMX_THROW(InternalError("Could not build OpenCL program, error was "
496 + ocl_get_error_string(buildStatus)));
497 }
498
499 if (useBuildCache)
500 {
501 if (!buildCacheWasRead)
502 {
503 /* If OpenCL caching is ON, but the current cache is not
504 valid => update it */
505 try
506 {
507 writeBinaryToCache(program, cacheFilename);
508 }
509 catch (GromacsException& e)
510 {
511 // Failing to write the cache is not a critical error
512 formatExceptionMessageToFile(fplog, e);
513 }
514 }
515 }
516 if ((deviceVendor == DeviceVendor::Nvidia) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
517 {
518 /* If dumping intermediate files has been requested and this is an NVIDIA card
519 => write PTX to file */
520 char buffer[STRLEN];
521
522 cl_error = clGetDeviceInfo(deviceId, CL_DEVICE_NAME, sizeof(buffer), buffer, nullptr);
523 if (cl_error != CL_SUCCESS)
524 {
525 GMX_THROW(InternalError("Could not get OpenCL device info, error was "
526 + ocl_get_error_string(cl_error)));
527 }
528 std::string ptxFilename = buffer;
529 ptxFilename += ".ptx";
530
531 try
532 {
533 writeBinaryToCache(program, ptxFilename);
534 }
535 catch (GromacsException& e)
536 {
537 // Failing to write the cache is not a critical error
538 formatExceptionMessageToFile(fplog, e);
539 }
540 }
541
542 return program;
543 }
544
545 } // namespace ocl
546 } // namespace gmx
547