1import config.package
2import os
3
4class Configure(config.package.Package):
5  def __init__(self, framework):
6    config.package.Package.__init__(self, framework)
7    self.minversion        = '7.5'
8    self.versionname       = 'CUDA_VERSION'
9    self.versioninclude    = 'cuda.h'
10    self.requiresversion   = 1
11    self.functions         = ['cublasInit', 'cufftDestroy']
12    self.includes          = ['cublas.h','cufft.h','cusparse.h','cusolverDn.h','thrust/version.h']
13    self.liblist           = [['libcufft.a', 'libcublas.a','libcudart.a','libcusparse.a','libcusolver.a'],
14                              ['cufft.lib','cublas.lib','cudart.lib','cusparse.lib','cusolver.lib']]
15    self.precisions        = ['single','double']
16    self.cxx               = 0
17    self.complex           = 1
18    self.hastests          = 0
19    self.hastestsdatafiles = 0
20    return
21
22  def setupHelp(self, help):
23    import nargs
24    config.package.Package.setupHelp(self, help)
25    help.addArgument('CUDA', '-with-cuda-gencodearch', nargs.ArgString(None, None, 'Cuda architecture for code generation, for example 70, (this may be used by external packages), use all to build a fat binary for distribution'))
26    return
27
28  def __str__(self):
29    output  = config.package.Package.__str__(self)
30    if hasattr(self,'gencodearch'):
31      output += '  CUDA SM '+self.gencodearch+'\n'
32    return output
33
34  def setupDependencies(self, framework):
35    config.package.Package.setupDependencies(self, framework)
36    self.scalarTypes  = framework.require('PETSc.options.scalarTypes',self)
37    self.compilers    = framework.require('config.compilers',self)
38    self.thrust       = framework.require('config.packages.thrust',self)
39    self.odeps        = [self.thrust] # if user supplies thrust, install it first
40    return
41
42  def getSearchDirectories(self):
43    import os
44    self.pushLanguage('CUDA')
45    petscNvcc = self.getCompiler()
46    self.popLanguage()
47    self.getExecutable(petscNvcc,getFullPath=1,resultName='systemNvcc')
48    if hasattr(self,'systemNvcc'):
49      self.nvccDir = os.path.dirname(self.systemNvcc)
50      self.cudaDir = os.path.split(self.nvccDir)[0]
51      yield self.cudaDir
52    return
53
54  def checkSizeofVoidP(self):
55    '''Checks if the CUDA compiler agrees with the C compiler on what size of void * should be'''
56    self.log.write('Checking if sizeof(void*) in CUDA is the same as with regular compiler\n')
57    size = self.types.checkSizeof('void *', (8, 4), lang='CUDA', save=False)
58    if size != self.types.sizes['void-p']:
59      raise RuntimeError('CUDA Error: sizeof(void*) with CUDA compiler is ' + str(size) + ' which differs from sizeof(void*) with C compiler')
60    return
61
62  def checkThrustVersion(self,minVer):
63    '''Check if thrust version is >= minVer '''
64    include = '#include <thrust/version.h> \n#if THRUST_VERSION < ' + str(minVer) + '\n#error "thrust version is too low"\n#endif\n'
65    self.pushLanguage('CUDA')
66    valid = self.checkCompile(include)
67    self.popLanguage()
68    return valid
69
70  def configureTypes(self):
71    import config.setCompilers
72    if not self.getDefaultPrecision() in ['double', 'single']:
73      raise RuntimeError('Must use either single or double precision with CUDA')
74    self.checkSizeofVoidP()
75    if not self.thrust.found and self.scalarTypes.scalartype == 'complex': # if no user-supplied thrust, check the system's complex ability
76      if not self.compilers.cxxdialect in ['C++11','C++14']:
77        raise RuntimeError('CUDA Error: Using CUDA with PetscComplex requirs a C++ dialect at least cxx11. Use --with-cxx-dialect=xxx to specify a proper one')
78      if not self.checkThrustVersion(100908):
79        raise RuntimeError('CUDA Error: The thrust library is too low to support PetscComplex. Use --download-thrust or --with-thrust-dir to give a thrust >= 1.9.8')
80    if self.compilers.cxxdialect in ['C++11','C++14']: #nvcc is a C++ compiler so it is always good to add -std=xxx. It is even crucial when using thrust complex (see MR 2822)
81      self.setCompilers.CUDAFLAGS += ' -std=' + self.compilers.cxxdialect.lower()
82    return
83
84  def versionToStandardForm(self,ver):
85    '''Converts from CUDA 7050 notation to standard notation 7.5'''
86    return ".".join(map(str,[int(ver)//1000, int(ver)//10%10]))
87
88  def checkNVCCDoubleAlign(self):
89    if 'known-cuda-align-double' in self.argDB:
90      if not self.argDB['known-cuda-align-double']:
91        raise RuntimeError('CUDA error: PETSC currently requires that CUDA double alignment match the C compiler')
92    else:
93      typedef = 'typedef struct {double a; int b;} teststruct;\n'
94      cuda_size = self.types.checkSizeof('teststruct', (16, 12), lang='CUDA', codeBegin=typedef, save=False)
95      c_size = self.types.checkSizeof('teststruct', (16, 12), lang='C', codeBegin=typedef, save=False)
96      if c_size != cuda_size:
97        raise RuntimeError('CUDA compiler error: memory alignment doesn\'t match C compiler (try adding -malign-double to compiler options)')
98    return
99
100  def configureLibrary(self):
101    config.package.Package.configureLibrary(self)
102    self.checkNVCCDoubleAlign()
103    self.configureTypes()
104    # includes from --download-thrust should override the prepackaged version in cuda - so list thrust.include before cuda.include on the compile command.
105    if self.thrust.found:
106      self.log.write('Overriding the thrust library in CUDAToolkit with a user-specified one\n')
107      self.include = self.thrust.include+self.include
108
109    if 'with-cuda-gencodearch' in self.framework.clArgDB:
110      self.gencodearch = self.argDB['with-cuda-gencodearch']
111    else:
112      import os
113      self.pushLanguage('CUDA')
114      petscNvcc = self.getCompiler()
115      self.popLanguage()
116      self.getExecutable(petscNvcc,getFullPath=1,resultName='systemNvcc')
117      if hasattr(self,'systemNvcc'):
118        cudaDir = os.path.dirname(os.path.dirname(self.systemNvcc))
119        dq = os.path.join(cudaDir,'extras','demo_suite')
120        self.getExecutable('deviceQuery',path = dq)
121        if hasattr(self,'deviceQuery'):
122          try:
123            (out, err, ret) = Configure.executeShellCommand(self.deviceQuery + ' | grep "CUDA Capability"',timeout = 60, log = self.log, threads = 1)
124          except:
125            self.log.write('deviceQuery failed\n')
126          else:
127            try:
128              out = out.split('\n')[0]
129              sm = out[-3:]
130              self.gencodearch = str(int(10*float(sm)))
131            except:
132              self.log.write('Unable to parse CUDA capability\n')
133
134    if hasattr(self,'gencodearch'):
135      if self.gencodearch == 'all':
136        for gen in ['52','60','61','70','75']:
137          self.setCompilers.CUDAFLAGS += ' -gencode arch=compute_'+gen+',code=sm_'+gen+' '
138          self.log.write(self.setCompilers.CUDAFLAGS+'\n')
139      else:
140        self.setCompilers.CUDAFLAGS += ' -gencode arch=compute_'+self.gencodearch+',code=sm_'+self.gencodearch+' '
141
142    self.addDefine('HAVE_CUDA','1')
143    if not self.version_tuple:
144      self.checkVersion(); # set version_tuple
145    if self.version_tuple[0] >= 11:
146      self.addDefine('HAVE_CUDA_VERSION_11PLUS','1')
147    return
148