1import config.package 2import os 3 4class Configure(config.package.Package): 5 def __init__(self, framework): 6 config.package.Package.__init__(self, framework) 7 self.minversion = '7.5' 8 self.versionname = 'CUDA_VERSION' 9 self.versioninclude = 'cuda.h' 10 self.requiresversion = 1 11 self.functions = ['cublasInit', 'cufftDestroy'] 12 self.includes = ['cublas.h','cufft.h','cusparse.h','cusolverDn.h','thrust/version.h'] 13 self.liblist = [['libcufft.a', 'libcublas.a','libcudart.a','libcusparse.a','libcusolver.a'], 14 ['cufft.lib','cublas.lib','cudart.lib','cusparse.lib','cusolver.lib']] 15 self.precisions = ['single','double'] 16 self.cxx = 0 17 self.complex = 1 18 self.hastests = 0 19 self.hastestsdatafiles = 0 20 return 21 22 def setupHelp(self, help): 23 import nargs 24 config.package.Package.setupHelp(self, help) 25 help.addArgument('CUDA', '-with-cuda-gencodearch', nargs.ArgString(None, None, 'Cuda architecture for code generation, for example 70, (this may be used by external packages), use all to build a fat binary for distribution')) 26 return 27 28 def __str__(self): 29 output = config.package.Package.__str__(self) 30 if hasattr(self,'gencodearch'): 31 output += ' CUDA SM '+self.gencodearch+'\n' 32 return output 33 34 def setupDependencies(self, framework): 35 config.package.Package.setupDependencies(self, framework) 36 self.scalarTypes = framework.require('PETSc.options.scalarTypes',self) 37 self.compilers = framework.require('config.compilers',self) 38 self.thrust = framework.require('config.packages.thrust',self) 39 self.odeps = [self.thrust] # if user supplies thrust, install it first 40 return 41 42 def getSearchDirectories(self): 43 import os 44 self.pushLanguage('CUDA') 45 petscNvcc = self.getCompiler() 46 self.popLanguage() 47 self.getExecutable(petscNvcc,getFullPath=1,resultName='systemNvcc') 48 if hasattr(self,'systemNvcc'): 49 self.nvccDir = os.path.dirname(self.systemNvcc) 50 self.cudaDir = os.path.split(self.nvccDir)[0] 51 yield self.cudaDir 52 return 53 54 def checkSizeofVoidP(self): 55 '''Checks if the CUDA compiler agrees with the C compiler on what size of void * should be''' 56 self.log.write('Checking if sizeof(void*) in CUDA is the same as with regular compiler\n') 57 size = self.types.checkSizeof('void *', (8, 4), lang='CUDA', save=False) 58 if size != self.types.sizes['void-p']: 59 raise RuntimeError('CUDA Error: sizeof(void*) with CUDA compiler is ' + str(size) + ' which differs from sizeof(void*) with C compiler') 60 return 61 62 def checkThrustVersion(self,minVer): 63 '''Check if thrust version is >= minVer ''' 64 include = '#include <thrust/version.h> \n#if THRUST_VERSION < ' + str(minVer) + '\n#error "thrust version is too low"\n#endif\n' 65 self.pushLanguage('CUDA') 66 valid = self.checkCompile(include) 67 self.popLanguage() 68 return valid 69 70 def configureTypes(self): 71 import config.setCompilers 72 if not self.getDefaultPrecision() in ['double', 'single']: 73 raise RuntimeError('Must use either single or double precision with CUDA') 74 self.checkSizeofVoidP() 75 if not self.thrust.found and self.scalarTypes.scalartype == 'complex': # if no user-supplied thrust, check the system's complex ability 76 if not self.compilers.cxxdialect in ['C++11','C++14']: 77 raise RuntimeError('CUDA Error: Using CUDA with PetscComplex requirs a C++ dialect at least cxx11. Use --with-cxx-dialect=xxx to specify a proper one') 78 if not self.checkThrustVersion(100908): 79 raise RuntimeError('CUDA Error: The thrust library is too low to support PetscComplex. Use --download-thrust or --with-thrust-dir to give a thrust >= 1.9.8') 80 if self.compilers.cxxdialect in ['C++11','C++14']: #nvcc is a C++ compiler so it is always good to add -std=xxx. It is even crucial when using thrust complex (see MR 2822) 81 self.setCompilers.CUDAFLAGS += ' -std=' + self.compilers.cxxdialect.lower() 82 return 83 84 def versionToStandardForm(self,ver): 85 '''Converts from CUDA 7050 notation to standard notation 7.5''' 86 return ".".join(map(str,[int(ver)//1000, int(ver)//10%10])) 87 88 def checkNVCCDoubleAlign(self): 89 if 'known-cuda-align-double' in self.argDB: 90 if not self.argDB['known-cuda-align-double']: 91 raise RuntimeError('CUDA error: PETSC currently requires that CUDA double alignment match the C compiler') 92 else: 93 typedef = 'typedef struct {double a; int b;} teststruct;\n' 94 cuda_size = self.types.checkSizeof('teststruct', (16, 12), lang='CUDA', codeBegin=typedef, save=False) 95 c_size = self.types.checkSizeof('teststruct', (16, 12), lang='C', codeBegin=typedef, save=False) 96 if c_size != cuda_size: 97 raise RuntimeError('CUDA compiler error: memory alignment doesn\'t match C compiler (try adding -malign-double to compiler options)') 98 return 99 100 def configureLibrary(self): 101 config.package.Package.configureLibrary(self) 102 self.checkNVCCDoubleAlign() 103 self.configureTypes() 104 # includes from --download-thrust should override the prepackaged version in cuda - so list thrust.include before cuda.include on the compile command. 105 if self.thrust.found: 106 self.log.write('Overriding the thrust library in CUDAToolkit with a user-specified one\n') 107 self.include = self.thrust.include+self.include 108 109 if 'with-cuda-gencodearch' in self.framework.clArgDB: 110 self.gencodearch = self.argDB['with-cuda-gencodearch'] 111 else: 112 import os 113 self.pushLanguage('CUDA') 114 petscNvcc = self.getCompiler() 115 self.popLanguage() 116 self.getExecutable(petscNvcc,getFullPath=1,resultName='systemNvcc') 117 if hasattr(self,'systemNvcc'): 118 cudaDir = os.path.dirname(os.path.dirname(self.systemNvcc)) 119 dq = os.path.join(cudaDir,'extras','demo_suite') 120 self.getExecutable('deviceQuery',path = dq) 121 if hasattr(self,'deviceQuery'): 122 try: 123 (out, err, ret) = Configure.executeShellCommand(self.deviceQuery + ' | grep "CUDA Capability"',timeout = 60, log = self.log, threads = 1) 124 except: 125 self.log.write('deviceQuery failed\n') 126 else: 127 try: 128 out = out.split('\n')[0] 129 sm = out[-3:] 130 self.gencodearch = str(int(10*float(sm))) 131 except: 132 self.log.write('Unable to parse CUDA capability\n') 133 134 if hasattr(self,'gencodearch'): 135 if self.gencodearch == 'all': 136 for gen in ['52','60','61','70','75']: 137 self.setCompilers.CUDAFLAGS += ' -gencode arch=compute_'+gen+',code=sm_'+gen+' ' 138 self.log.write(self.setCompilers.CUDAFLAGS+'\n') 139 else: 140 self.setCompilers.CUDAFLAGS += ' -gencode arch=compute_'+self.gencodearch+',code=sm_'+self.gencodearch+' ' 141 142 self.addDefine('HAVE_CUDA','1') 143 if not self.version_tuple: 144 self.checkVersion(); # set version_tuple 145 if self.version_tuple[0] >= 11: 146 self.addDefine('HAVE_CUDA_VERSION_11PLUS','1') 147 return 148