1# Copyright 2012-2017 The Meson development team 2 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6 7# http://www.apache.org/licenses/LICENSE-2.0 8 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15import os.path 16import typing as T 17from functools import partial 18 19from .. import coredata 20from .. import mlog 21from ..mesonlib import EnvironmentException, MachineChoice, Popen_safe, OptionOverrideProxy, is_windows, LibType 22from .compilers import (Compiler, cuda_buildtype_args, cuda_optimization_args, 23 cuda_debug_args) 24 25if T.TYPE_CHECKING: 26 from ..environment import Environment # noqa: F401 27 from ..envconfig import MachineInfo 28 29 30class CudaCompiler(Compiler): 31 32 LINKER_PREFIX = '-Xlinker=' 33 language = 'cuda' 34 35 _universal_flags = {'compiler': ['-I', '-D', '-U', '-E'], 'linker': ['-l', '-L']} 36 37 def __init__(self, exelist, version, for_machine: MachineChoice, 38 is_cross, exe_wrapper, host_compiler, info: 'MachineInfo', **kwargs): 39 super().__init__(exelist, version, for_machine, info, **kwargs) 40 self.is_cross = is_cross 41 self.exe_wrapper = exe_wrapper 42 self.host_compiler = host_compiler 43 self.base_options = host_compiler.base_options 44 self.id = 'nvcc' 45 self.warn_args = {level: self._to_host_flags(flags) for level, flags in host_compiler.warn_args.items()} 46 47 @classmethod 48 def _to_host_flags(cls, flags, phase='compiler'): 49 return list(map(partial(cls._to_host_flag, phase=phase), flags)) 50 51 @classmethod 52 def _to_host_flag(cls, flag, phase): 53 if not flag[0] in ['-', '/'] or flag[:2] in cls._universal_flags[phase]: 54 return flag 55 56 return '-X{}={}'.format(phase, flag) 57 58 def needs_static_linker(self): 59 return False 60 61 def get_always_args(self): 62 return [] 63 64 def get_no_stdinc_args(self): 65 return [] 66 67 def thread_link_flags(self, environment): 68 return self._to_host_flags(self.host_compiler.thread_link_flags(environment)) 69 70 def sanity_check(self, work_dir, environment): 71 mlog.debug('Sanity testing ' + self.get_display_language() + ' compiler:', ' '.join(self.exelist)) 72 mlog.debug('Is cross compiler: %s.' % str(self.is_cross)) 73 74 sname = 'sanitycheckcuda.cu' 75 code = r''' 76 #include <cuda_runtime.h> 77 #include <stdio.h> 78 79 __global__ void kernel (void) {} 80 81 int main(void){ 82 struct cudaDeviceProp prop; 83 int count, i; 84 cudaError_t ret = cudaGetDeviceCount(&count); 85 if(ret != cudaSuccess){ 86 fprintf(stderr, "%d\n", (int)ret); 87 }else{ 88 for(i=0;i<count;i++){ 89 if(cudaGetDeviceProperties(&prop, i) == cudaSuccess){ 90 fprintf(stdout, "%d.%d\n", prop.major, prop.minor); 91 } 92 } 93 } 94 fflush(stderr); 95 fflush(stdout); 96 return 0; 97 } 98 ''' 99 binname = sname.rsplit('.', 1)[0] 100 binname += '_cross' if self.is_cross else '' 101 source_name = os.path.join(work_dir, sname) 102 binary_name = os.path.join(work_dir, binname + '.exe') 103 with open(source_name, 'w') as ofile: 104 ofile.write(code) 105 106 # The Sanity Test for CUDA language will serve as both a sanity test 107 # and a native-build GPU architecture detection test, useful later. 108 # 109 # For this second purpose, NVCC has very handy flags, --run and 110 # --run-args, that allow one to run an application with the 111 # environment set up properly. Of course, this only works for native 112 # builds; For cross builds we must still use the exe_wrapper (if any). 113 self.detected_cc = '' 114 flags = ['-w', '-cudart', 'static', source_name] 115 if self.is_cross and self.exe_wrapper is None: 116 # Linking cross built apps is painful. You can't really 117 # tell if you should use -nostdlib or not and for example 118 # on OSX the compiler binary is the same but you need 119 # a ton of compiler flags to differentiate between 120 # arm and x86_64. So just compile. 121 flags += self.get_compile_only_args() 122 flags += self.get_output_args(binary_name) 123 124 # Compile sanity check 125 cmdlist = self.exelist + flags 126 mlog.debug('Sanity check compiler command line: ', ' '.join(cmdlist)) 127 pc, stdo, stde = Popen_safe(cmdlist, cwd=work_dir) 128 mlog.debug('Sanity check compile stdout: ') 129 mlog.debug(stdo) 130 mlog.debug('-----\nSanity check compile stderr:') 131 mlog.debug(stde) 132 mlog.debug('-----') 133 if pc.returncode != 0: 134 raise EnvironmentException('Compiler {0} can not compile programs.'.format(self.name_string())) 135 136 # Run sanity check (if possible) 137 if self.is_cross: 138 if self.exe_wrapper is None: 139 return 140 else: 141 cmdlist = self.exe_wrapper + [binary_name] 142 else: 143 cmdlist = self.exelist + ['--run', '"' + binary_name + '"'] 144 mlog.debug('Sanity check run command line: ', ' '.join(cmdlist)) 145 pe, stdo, stde = Popen_safe(cmdlist, cwd=work_dir) 146 mlog.debug('Sanity check run stdout: ') 147 mlog.debug(stdo) 148 mlog.debug('-----\nSanity check run stderr:') 149 mlog.debug(stde) 150 mlog.debug('-----') 151 pe.wait() 152 if pe.returncode != 0: 153 raise EnvironmentException('Executables created by {0} compiler {1} are not runnable.'.format(self.language, self.name_string())) 154 155 # Interpret the result of the sanity test. 156 # As mentioned above, it is not only a sanity test but also a GPU 157 # architecture detection test. 158 if stde == '': 159 self.detected_cc = stdo 160 else: 161 mlog.debug('cudaGetDeviceCount() returned ' + stde) 162 163 def has_header_symbol(self, hname, symbol, prefix, env, extra_args=None, dependencies=None): 164 result, cached = super().has_header_symbol(hname, symbol, prefix, env, extra_args, dependencies) 165 if result: 166 return True, cached 167 if extra_args is None: 168 extra_args = [] 169 fargs = {'prefix': prefix, 'header': hname, 'symbol': symbol} 170 t = '''{prefix} 171 #include <{header}> 172 using {symbol}; 173 int main(void) {{ return 0; }}''' 174 return self.compiles(t.format(**fargs), env, extra_args, dependencies) 175 176 def get_options(self): 177 opts = super().get_options() 178 opts.update({'cuda_std': coredata.UserComboOption('C++ language standard to use', 179 ['none', 'c++03', 'c++11', 'c++14'], 180 'none')}) 181 return opts 182 183 def _to_host_compiler_options(self, options): 184 overrides = {name: opt.value for name, opt in options.copy().items()} 185 return OptionOverrideProxy(overrides, self.host_compiler.get_options()) 186 187 def get_option_compile_args(self, options): 188 args = [] 189 # On Windows, the version of the C++ standard used by nvcc is dictated by 190 # the combination of CUDA version and MSVC version; the --std= is thus ignored 191 # and attempting to use it will result in a warning: https://stackoverflow.com/a/51272091/741027 192 if not is_windows(): 193 std = options['cuda_std'] 194 if std.value != 'none': 195 args.append('--std=' + std.value) 196 197 return args + self._to_host_flags(self.host_compiler.get_option_compile_args(self._to_host_compiler_options(options))) 198 199 @classmethod 200 def _cook_link_args(cls, args: T.List[str]) -> T.List[str]: 201 # Prepare link args for nvcc 202 cooked = [] # type: T.List[str] 203 for arg in args: 204 if arg.startswith('-Wl,'): # strip GNU-style -Wl prefix 205 arg = arg.replace('-Wl,', '', 1) 206 arg = arg.replace(' ', '\\') # espace whitespace 207 cooked.append(arg) 208 return cls._to_host_flags(cooked, 'linker') 209 210 def get_option_link_args(self, options): 211 return self._cook_link_args(self.host_compiler.get_option_link_args(self._to_host_compiler_options(options))) 212 213 def name_string(self): 214 return ' '.join(self.exelist) 215 216 def get_soname_args(self, *args): 217 return self._cook_link_args(self.host_compiler.get_soname_args(*args)) 218 219 def get_dependency_gen_args(self, outtarget, outfile): 220 return [] 221 222 def get_compile_only_args(self): 223 return ['-c'] 224 225 def get_no_optimization_args(self): 226 return ['-O0'] 227 228 def get_optimization_args(self, optimization_level): 229 # alternatively, consider simply redirecting this to the host compiler, which would 230 # give us more control over options like "optimize for space" (which nvcc doesn't support): 231 # return self._to_host_flags(self.host_compiler.get_optimization_args(optimization_level)) 232 return cuda_optimization_args[optimization_level] 233 234 def get_debug_args(self, is_debug): 235 return cuda_debug_args[is_debug] 236 237 def get_werror_args(self): 238 return ['-Werror=cross-execution-space-call,deprecated-declarations,reorder'] 239 240 def get_warn_args(self, level): 241 return self.warn_args[level] 242 243 def get_buildtype_args(self, buildtype): 244 # nvcc doesn't support msvc's "Edit and Continue" PDB format; "downgrade" to 245 # a regular PDB to avoid cl's warning to that effect (D9025 : overriding '/ZI' with '/Zi') 246 host_args = ['/Zi' if arg == '/ZI' else arg for arg in self.host_compiler.get_buildtype_args(buildtype)] 247 return cuda_buildtype_args[buildtype] + self._to_host_flags(host_args) 248 249 def get_include_args(self, path, is_system): 250 if path == '': 251 path = '.' 252 return ['-I' + path] 253 254 def get_compile_debugfile_args(self, rel_obj, **kwargs): 255 return self._to_host_flags(self.host_compiler.get_compile_debugfile_args(rel_obj, **kwargs)) 256 257 def get_link_debugfile_args(self, targetfile): 258 return self._cook_link_args(self.host_compiler.get_link_debugfile_args(targetfile)) 259 260 def depfile_for_object(self, objfile): 261 return objfile + '.' + self.get_depfile_suffix() 262 263 def get_depfile_suffix(self): 264 return 'd' 265 266 def get_buildtype_linker_args(self, buildtype): 267 return self._cook_link_args(self.host_compiler.get_buildtype_linker_args(buildtype)) 268 269 def build_rpath_args(self, env: 'Environment', build_dir: str, from_dir: str, 270 rpath_paths: str, build_rpath: str, 271 install_rpath: str) -> T.Tuple[T.List[str], T.Set[bytes]]: 272 (rpath_args, rpath_dirs_to_remove) = self.host_compiler.build_rpath_args( 273 env, build_dir, from_dir, rpath_paths, build_rpath, install_rpath) 274 return (self._cook_link_args(rpath_args), rpath_dirs_to_remove) 275 276 def linker_to_compiler_args(self, args): 277 return args 278 279 def get_pic_args(self): 280 return self._to_host_flags(self.host_compiler.get_pic_args()) 281 282 def compute_parameters_with_absolute_paths(self, parameter_list, build_dir): 283 return [] 284 285 def get_output_args(self, target: str) -> T.List[str]: 286 return ['-o', target] 287 288 def get_std_exe_link_args(self) -> T.List[str]: 289 return self._cook_link_args(self.host_compiler.get_std_exe_link_args()) 290 291 def find_library(self, libname, env, extra_dirs, libtype: LibType = LibType.PREFER_SHARED): 292 return ['-l' + libname] # FIXME 293 294 def get_crt_compile_args(self, crt_val, buildtype): 295 return self._to_host_flags(self.host_compiler.get_crt_compile_args(crt_val, buildtype)) 296 297 def get_crt_link_args(self, crt_val, buildtype): 298 # nvcc defaults to static, release version of msvc runtime and provides no 299 # native option to override it; override it with /NODEFAULTLIB 300 host_link_arg_overrides = [] 301 host_crt_compile_args = self.host_compiler.get_crt_compile_args(crt_val, buildtype) 302 if any(arg in ['/MDd', '/MD', '/MTd'] for arg in host_crt_compile_args): 303 host_link_arg_overrides += ['/NODEFAULTLIB:LIBCMT.lib'] 304 return self._cook_link_args(host_link_arg_overrides + self.host_compiler.get_crt_link_args(crt_val, buildtype)) 305 306 def get_target_link_args(self, target): 307 return self._cook_link_args(super().get_target_link_args(target)) 308 309 def get_dependency_compile_args(self, dep): 310 return self._to_host_flags(super().get_dependency_compile_args(dep)) 311 312 def get_dependency_link_args(self, dep): 313 return self._cook_link_args(super().get_dependency_link_args(dep)) 314