1# Copyright 2012-2017 The Meson development team 2 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6 7# http://www.apache.org/licenses/LICENSE-2.0 8 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15import enum 16import os.path 17import string 18import typing as T 19 20from .. import coredata 21from .. import mlog 22from ..mesonlib import ( 23 EnvironmentException, MachineChoice, Popen_safe, OptionOverrideProxy, 24 is_windows, LibType, OptionKey, 25) 26from .compilers import (Compiler, cuda_buildtype_args, cuda_optimization_args, 27 cuda_debug_args, CompileCheckMode) 28 29if T.TYPE_CHECKING: 30 from ..build import BuildTarget 31 from ..coredata import KeyedOptionDictType 32 from ..dependencies import Dependency 33 from ..environment import Environment # noqa: F401 34 from ..envconfig import MachineInfo 35 from ..linkers import DynamicLinker 36 from ..programs import ExternalProgram 37 38 39class _Phase(enum.Enum): 40 41 COMPILER = 'compiler' 42 LINKER = 'linker' 43 44 45class CudaCompiler(Compiler): 46 47 LINKER_PREFIX = '-Xlinker=' 48 language = 'cuda' 49 50 # NVCC flags taking no arguments. 51 _FLAG_PASSTHRU_NOARGS = { 52 # NVCC --long-option, NVCC -short-option CUDA Toolkit 11.2.1 Reference 53 '--objdir-as-tempdir', '-objtemp', # 4.2.1.2 54 '--generate-dependency-targets', '-MP', # 4.2.1.12 55 '--allow-unsupported-compiler', '-allow-unsupported-compiler', # 4.2.1.14 56 '--link', # 4.2.2.1 57 '--lib', '-lib', # 4.2.2.2 58 '--device-link', '-dlink', # 4.2.2.3 59 '--device-c', '-dc', # 4.2.2.4 60 '--device-w', '-dw', # 4.2.2.5 61 '--cuda', '-cuda', # 4.2.2.6 62 '--compile', '-c', # 4.2.2.7 63 '--fatbin', '-fatbin', # 4.2.2.8 64 '--cubin', '-cubin', # 4.2.2.9 65 '--ptx', '-ptx', # 4.2.2.10 66 '--preprocess', '-E', # 4.2.2.11 67 '--generate-dependencies', '-M', # 4.2.2.12 68 '--generate-nonsystem-dependencies', '-MM', # 4.2.2.13 69 '--generate-dependencies-with-compile', '-MD', # 4.2.2.14 70 '--generate-nonsystem-dependencies-with-compile', '-MMD', # 4.2.2.15 71 '--run', # 4.2.2.16 72 '--profile', '-pg', # 4.2.3.1 73 '--debug', '-g', # 4.2.3.2 74 '--device-debug', '-G', # 4.2.3.3 75 '--extensible-whole-program', '-ewp', # 4.2.3.4 76 '--generate-line-info', '-lineinfo', # 4.2.3.5 77 '--dlink-time-opt', '-dlto', # 4.2.3.8 78 '--no-exceptions', '-noeh', # 4.2.3.11 79 '--shared', '-shared', # 4.2.3.12 80 '--no-host-device-initializer-list', '-nohdinitlist', # 4.2.3.15 81 '--expt-relaxed-constexpr', '-expt-relaxed-constexpr', # 4.2.3.16 82 '--extended-lambda', '-extended-lambda', # 4.2.3.17 83 '--expt-extended-lambda', '-expt-extended-lambda', # 4.2.3.18 84 '--m32', '-m32', # 4.2.3.20 85 '--m64', '-m64', # 4.2.3.21 86 '--forward-unknown-to-host-compiler', '-forward-unknown-to-host-compiler', # 4.2.5.1 87 '--forward-unknown-to-host-linker', '-forward-unknown-to-host-linker', # 4.2.5.2 88 '--dont-use-profile', '-noprof', # 4.2.5.3 89 '--dryrun', '-dryrun', # 4.2.5.5 90 '--verbose', '-v', # 4.2.5.6 91 '--keep', '-keep', # 4.2.5.7 92 '--save-temps', '-save-temps', # 4.2.5.9 93 '--clean-targets', '-clean', # 4.2.5.10 94 '--no-align-double', # 4.2.5.16 95 '--no-device-link', '-nodlink', # 4.2.5.17 96 '--allow-unsupported-compiler', '-allow-unsupported-compiler', # 4.2.5.18 97 '--use_fast_math', '-use_fast_math', # 4.2.7.7 98 '--extra-device-vectorization', '-extra-device-vectorization', # 4.2.7.12 99 '--compile-as-tools-patch', '-astoolspatch', # 4.2.7.13 100 '--keep-device-functions', '-keep-device-functions', # 4.2.7.14 101 '--disable-warnings', '-w', # 4.2.8.1 102 '--source-in-ptx', '-src-in-ptx', # 4.2.8.2 103 '--restrict', '-restrict', # 4.2.8.3 104 '--Wno-deprecated-gpu-targets', '-Wno-deprecated-gpu-targets', # 4.2.8.4 105 '--Wno-deprecated-declarations', '-Wno-deprecated-declarations', # 4.2.8.5 106 '--Wreorder', '-Wreorder', # 4.2.8.6 107 '--Wdefault-stream-launch', '-Wdefault-stream-launch', # 4.2.8.7 108 '--Wext-lambda-captures-this', '-Wext-lambda-captures-this', # 4.2.8.8 109 '--display-error-number', '-err-no', # 4.2.8.10 110 '--resource-usage', '-res-usage', # 4.2.8.14 111 '--help', '-h', # 4.2.8.15 112 '--version', '-V', # 4.2.8.16 113 '--list-gpu-code', '-code-ls', # 4.2.8.20 114 '--list-gpu-arch', '-arch-ls', # 4.2.8.21 115 } 116 # Dictionary of NVCC flags taking either one argument or a comma-separated list. 117 # Maps --long to -short options, because the short options are more GCC-like. 118 _FLAG_LONG2SHORT_WITHARGS = { 119 '--output-file': '-o', # 4.2.1.1 120 '--pre-include': '-include', # 4.2.1.3 121 '--library': '-l', # 4.2.1.4 122 '--define-macro': '-D', # 4.2.1.5 123 '--undefine-macro': '-U', # 4.2.1.6 124 '--include-path': '-I', # 4.2.1.7 125 '--system-include': '-isystem', # 4.2.1.8 126 '--library-path': '-L', # 4.2.1.9 127 '--output-directory': '-odir', # 4.2.1.10 128 '--dependency-output': '-MF', # 4.2.1.11 129 '--compiler-bindir': '-ccbin', # 4.2.1.13 130 '--archiver-binary': '-arbin', # 4.2.1.15 131 '--cudart': '-cudart', # 4.2.1.16 132 '--cudadevrt': '-cudadevrt', # 4.2.1.17 133 '--libdevice-directory': '-ldir', # 4.2.1.18 134 '--target-directory': '-target-dir', # 4.2.1.19 135 '--optimization-info': '-opt-info', # 4.2.3.6 136 '--optimize': '-O', # 4.2.3.7 137 '--ftemplate-backtrace-limit': '-ftemplate-backtrace-limit', # 4.2.3.9 138 '--ftemplate-depth': '-ftemplate-depth', # 4.2.3.10 139 '--x': '-x', # 4.2.3.13 140 '--std': '-std', # 4.2.3.14 141 '--machine': '-m', # 4.2.3.19 142 '--compiler-options': '-Xcompiler', # 4.2.4.1 143 '--linker-options': '-Xlinker', # 4.2.4.2 144 '--archive-options': '-Xarchive', # 4.2.4.3 145 '--ptxas-options': '-Xptxas', # 4.2.4.4 146 '--nvlink-options': '-Xnvlink', # 4.2.4.5 147 '--threads': '-t', # 4.2.5.4 148 '--keep-dir': '-keep-dir', # 4.2.5.8 149 '--run-args': '-run-args', # 4.2.5.11 150 '--input-drive-prefix': '-idp', # 4.2.5.12 151 '--dependency-drive-prefix': '-ddp', # 4.2.5.13 152 '--drive-prefix': '-dp', # 4.2.5.14 153 '--dependency-target-name': '-MT', # 4.2.5.15 154 '--default-stream': '-default-stream', # 4.2.6.1 155 '--gpu-architecture': '-arch', # 4.2.7.1 156 '--gpu-code': '-code', # 4.2.7.2 157 '--generate-code': '-gencode', # 4.2.7.3 158 '--relocatable-device-code': '-rdc', # 4.2.7.4 159 '--entries': '-e', # 4.2.7.5 160 '--maxrregcount': '-maxrregcount', # 4.2.7.6 161 '--ftz': '-ftz', # 4.2.7.8 162 '--prec-div': '-prec-div', # 4.2.7.9 163 '--prec-sqrt': '-prec-sqrt', # 4.2.7.10 164 '--fmad': '-fmad', # 4.2.7.11 165 '--Werror': '-Werror', # 4.2.8.9 166 '--diag-error': '-diag-error', # 4.2.8.11 167 '--diag-suppress': '-diag-suppress', # 4.2.8.12 168 '--diag-warn': '-diag-warn', # 4.2.8.13 169 '--options-file': '-optf', # 4.2.8.17 170 '--time': '-time', # 4.2.8.18 171 '--qpp-config': '-qpp-config', # 4.2.8.19 172 } 173 # Reverse map -short to --long options. 174 _FLAG_SHORT2LONG_WITHARGS = {v: k for k, v in _FLAG_LONG2SHORT_WITHARGS.items()} 175 176 def __init__(self, exelist: T.List[str], version: str, for_machine: MachineChoice, 177 is_cross: bool, exe_wrapper: T.Optional['ExternalProgram'], 178 host_compiler: Compiler, info: 'MachineInfo', 179 linker: T.Optional['DynamicLinker'] = None, 180 full_version: T.Optional[str] = None): 181 super().__init__(exelist, version, for_machine, info, linker=linker, full_version=full_version, is_cross=is_cross) 182 self.exe_wrapper = exe_wrapper 183 self.host_compiler = host_compiler 184 self.base_options = host_compiler.base_options 185 self.id = 'nvcc' 186 self.warn_args = {level: self._to_host_flags(flags) for level, flags in host_compiler.warn_args.items()} 187 188 @classmethod 189 def _shield_nvcc_list_arg(cls, arg: str, listmode: bool = True) -> str: 190 r""" 191 Shield an argument against both splitting by NVCC's list-argument 192 parse logic, and interpretation by any shell. 193 194 NVCC seems to consider every comma , that is neither escaped by \ nor inside 195 a double-quoted string a split-point. Single-quotes do not provide protection 196 against splitting; In fact, after splitting they are \-escaped. Unfortunately, 197 double-quotes don't protect against shell expansion. What follows is a 198 complex dance to accommodate everybody. 199 """ 200 201 SQ = "'" 202 DQ = '"' 203 CM = "," 204 BS = "\\" 205 DQSQ = DQ+SQ+DQ 206 quotable = set(string.whitespace+'"$`\\') 207 208 if CM not in arg or not listmode: 209 if SQ not in arg: 210 # If any of the special characters "$`\ or whitespace are present, single-quote. 211 # Otherwise return bare. 212 if set(arg).intersection(quotable): 213 return SQ+arg+SQ 214 else: 215 return arg # Easy case: no splits, no quoting. 216 else: 217 # There are single quotes. Double-quote them, and single-quote the 218 # strings between them. 219 l = [cls._shield_nvcc_list_arg(s) for s in arg.split(SQ)] 220 l = sum([[s, DQSQ] for s in l][:-1], []) # Interleave l with DQSQs 221 return ''.join(l) 222 else: 223 # A comma is present, and list mode was active. 224 # We apply (what we guess is) the (primitive) NVCC splitting rule: 225 l = [''] 226 instring = False 227 argit = iter(arg) 228 for c in argit: 229 if c == CM and not instring: 230 l.append('') 231 elif c == DQ: 232 l[-1] += c 233 instring = not instring 234 elif c == BS: 235 try: 236 l[-1] += next(argit) 237 except StopIteration: 238 break 239 else: 240 l[-1] += c 241 242 # Shield individual strings, without listmode, then return them with 243 # escaped commas between them. 244 l = [cls._shield_nvcc_list_arg(s, listmode=False) for s in l] 245 return r'\,'.join(l) 246 247 @classmethod 248 def _merge_flags(cls, flags: T.List[str]) -> T.List[str]: 249 r""" 250 The flags to NVCC gets exceedingly verbose and unreadable when too many of them 251 are shielded with -Xcompiler. Merge consecutive -Xcompiler-wrapped arguments 252 into one. 253 """ 254 if len(flags) <= 1: 255 return flags 256 flagit = iter(flags) 257 xflags = [] 258 259 def is_xcompiler_flag_isolated(flag: str) -> bool: 260 return flag == '-Xcompiler' 261 def is_xcompiler_flag_glued(flag: str) -> bool: 262 return flag.startswith('-Xcompiler=') 263 def is_xcompiler_flag(flag: str) -> bool: 264 return is_xcompiler_flag_isolated(flag) or is_xcompiler_flag_glued(flag) 265 def get_xcompiler_val(flag: str, flagit: T.Iterator[str]) -> str: 266 if is_xcompiler_flag_glued(flag): 267 return flag[len('-Xcompiler='):] 268 else: 269 try: 270 return next(flagit) 271 except StopIteration: 272 return "" 273 274 ingroup = False 275 for flag in flagit: 276 if not is_xcompiler_flag(flag): 277 ingroup = False 278 xflags.append(flag) 279 elif ingroup: 280 xflags[-1] += ',' 281 xflags[-1] += get_xcompiler_val(flag, flagit) 282 elif is_xcompiler_flag_isolated(flag): 283 ingroup = True 284 xflags.append(flag) 285 xflags.append(get_xcompiler_val(flag, flagit)) 286 elif is_xcompiler_flag_glued(flag): 287 ingroup = True 288 xflags.append(flag) 289 else: 290 raise ValueError("-Xcompiler flag merging failed, unknown argument form!") 291 return xflags 292 293 def _to_host_flags(self, flags: T.List[str], phase: _Phase = _Phase.COMPILER) -> T.List[str]: 294 """ 295 Translate generic "GCC-speak" plus particular "NVCC-speak" flags to NVCC flags. 296 297 NVCC's "short" flags have broad similarities to the GCC standard, but have 298 gratuitous, irritating differences. 299 """ 300 301 xflags = [] 302 flagit = iter(flags) 303 304 for flag in flagit: 305 # The CUDA Toolkit Documentation, in 4.1. Command Option Types and Notation, 306 # specifies that NVCC does not parse the standard flags as GCC does. It has 307 # its own strategy, to wit: 308 # 309 # nvcc recognizes three types of command options: boolean options, single 310 # value options, and list options. 311 # 312 # Boolean options do not have an argument; they are either specified on a 313 # command line or not. Single value options must be specified at most once, 314 # and list options may be repeated. Examples of each of these option types 315 # are, respectively: --verbose (switch to verbose mode), --output-file 316 # (specify output file), and --include-path (specify include path). 317 # 318 # Single value options and list options must have arguments, which must 319 # follow the name of the option itself by either one of more spaces or an 320 # equals character. When a one-character short name such as -I, -l, and -L 321 # is used, the value of the option may also immediately follow the option 322 # itself without being separated by spaces or an equal character. The 323 # individual values of list options may be separated by commas in a single 324 # instance of the option, or the option may be repeated, or any 325 # combination of these two cases. 326 # 327 # One strange consequence of this choice is that directory and filenames that 328 # contain commas (',') cannot be passed to NVCC (at least, not as easily as 329 # in GCC). Another strange consequence is that it is legal to supply flags 330 # such as 331 # 332 # -lpthread,rt,dl,util 333 # -l pthread,rt,dl,util 334 # -l=pthread,rt,dl,util 335 # 336 # and each of the above alternatives is equivalent to GCC-speak 337 # 338 # -lpthread -lrt -ldl -lutil 339 # -l pthread -l rt -l dl -l util 340 # -l=pthread -l=rt -l=dl -l=util 341 # 342 # *With the exception of commas in the name*, GCC-speak for these list flags 343 # is a strict subset of NVCC-speak, so we passthrough those flags. 344 # 345 # The -D macro-define flag is documented as somehow shielding commas from 346 # splitting a definition. Balanced parentheses, braces and single-quotes 347 # around the comma are not sufficient, but balanced double-quotes are. The 348 # shielding appears to work with -l, -I, -L flags as well, for instance. 349 # 350 # Since our goal is to replicate GCC-speak as much as possible, we check for 351 # commas in all list-arguments and shield them with double-quotes. We make 352 # an exception for -D (where this would be value-changing) and -U (because 353 # it isn't possible to define a macro with a comma in the name). 354 355 if flag in self._FLAG_PASSTHRU_NOARGS: 356 xflags.append(flag) 357 continue 358 359 # Handle breakup of flag-values into a flag-part and value-part. 360 if flag[:1] not in '-/': 361 # This is not a flag. It's probably a file input. Pass it through. 362 xflags.append(flag) 363 continue 364 elif flag[:1] == '/': 365 # This is ambiguously either an MVSC-style /switch or an absolute path 366 # to a file. For some magical reason the following works acceptably in 367 # both cases. 368 wrap = '"' if ',' in flag else '' 369 xflags.append(f'-X{phase.value}={wrap}{flag}{wrap}') 370 continue 371 elif len(flag) >= 2 and flag[0] == '-' and flag[1] in 'IDULlmOxmte': 372 # This is a single-letter short option. These options (with the 373 # exception of -o) are allowed to receive their argument with neither 374 # space nor = sign before them. Detect and separate them in that event. 375 if flag[2:3] == '': # -I something 376 try: 377 val = next(flagit) 378 except StopIteration: 379 pass 380 elif flag[2:3] == '=': # -I=something 381 val = flag[3:] 382 else: # -Isomething 383 val = flag[2:] 384 flag = flag[:2] # -I 385 elif flag in self._FLAG_LONG2SHORT_WITHARGS or \ 386 flag in self._FLAG_SHORT2LONG_WITHARGS: 387 # This is either -o or a multi-letter flag, and it is receiving its 388 # value isolated. 389 try: 390 val = next(flagit) # -o something 391 except StopIteration: 392 pass 393 elif flag.split('=', 1)[0] in self._FLAG_LONG2SHORT_WITHARGS or \ 394 flag.split('=', 1)[0] in self._FLAG_SHORT2LONG_WITHARGS: 395 # This is either -o or a multi-letter flag, and it is receiving its 396 # value after an = sign. 397 flag, val = flag.split('=', 1) # -o=something 398 # Some dependencies (e.g., BoostDependency) add unspaced "-isystem/usr/include" arguments 399 elif flag.startswith('-isystem'): 400 val = flag[8:].strip() 401 flag = flag[:8] 402 else: 403 # This is a flag, and it's foreign to NVCC. 404 # 405 # We do not know whether this GCC-speak flag takes an isolated 406 # argument. Assuming it does not (the vast majority indeed don't), 407 # wrap this argument in an -Xcompiler flag and send it down to NVCC. 408 if flag == '-ffast-math': 409 xflags.append('-use_fast_math') 410 xflags.append('-Xcompiler='+flag) 411 elif flag == '-fno-fast-math': 412 xflags.append('-ftz=false') 413 xflags.append('-prec-div=true') 414 xflags.append('-prec-sqrt=true') 415 xflags.append('-Xcompiler='+flag) 416 elif flag == '-freciprocal-math': 417 xflags.append('-prec-div=false') 418 xflags.append('-Xcompiler='+flag) 419 elif flag == '-fno-reciprocal-math': 420 xflags.append('-prec-div=true') 421 xflags.append('-Xcompiler='+flag) 422 else: 423 xflags.append('-Xcompiler='+self._shield_nvcc_list_arg(flag)) 424 # The above should securely handle GCC's -Wl, -Wa, -Wp, arguments. 425 continue 426 427 assert val is not None # Should only trip if there is a missing argument. 428 429 # Take care of the various NVCC-supported flags that need special handling. 430 flag = self._FLAG_LONG2SHORT_WITHARGS.get(flag, flag) 431 432 if flag in {'-include', '-isystem', '-I', '-L', '-l'}: 433 # These flags are known to GCC, but list-valued in NVCC. They potentially 434 # require double-quoting to prevent NVCC interpreting the flags as lists 435 # when GCC would not have done so. 436 # 437 # We avoid doing this quoting for -D to avoid redefining macros and for 438 # -U because it isn't possible to define a macro with a comma in the name. 439 # -U with comma arguments is impossible in GCC-speak (and thus unambiguous 440 #in NVCC-speak, albeit unportable). 441 if len(flag) == 2: 442 xflags.append(flag+self._shield_nvcc_list_arg(val)) 443 elif flag == '-isystem' and val in self.host_compiler.get_default_include_dirs(): 444 # like GnuLikeCompiler, we have to filter out include directories specified 445 # with -isystem that overlap with the host compiler's search path 446 pass 447 else: 448 xflags.append(flag) 449 xflags.append(self._shield_nvcc_list_arg(val)) 450 elif flag == '-O': 451 # Handle optimization levels GCC knows about that NVCC does not. 452 if val == 'fast': 453 xflags.append('-O3') 454 xflags.append('-use_fast_math') 455 xflags.append('-Xcompiler') 456 xflags.append(flag+val) 457 elif val in {'s', 'g', 'z'}: 458 xflags.append('-Xcompiler') 459 xflags.append(flag+val) 460 else: 461 xflags.append(flag+val) 462 elif flag in {'-D', '-U', '-m', '-t'}: 463 xflags.append(flag+val) # For style, keep glued. 464 elif flag in {'-std'}: 465 xflags.append(flag+'='+val) # For style, keep glued. 466 else: 467 xflags.append(flag) 468 xflags.append(val) 469 470 return self._merge_flags(xflags) 471 472 def needs_static_linker(self) -> bool: 473 return False 474 475 def thread_link_flags(self, environment: 'Environment') -> T.List[str]: 476 return self._to_host_flags(self.host_compiler.thread_link_flags(environment), _Phase.LINKER) 477 478 def sanity_check(self, work_dir: str, env: 'Environment') -> None: 479 mlog.debug('Sanity testing ' + self.get_display_language() + ' compiler:', ' '.join(self.exelist)) 480 mlog.debug('Is cross compiler: %s.' % str(self.is_cross)) 481 482 sname = 'sanitycheckcuda.cu' 483 code = r''' 484 #include <cuda_runtime.h> 485 #include <stdio.h> 486 487 __global__ void kernel (void) {} 488 489 int main(void){ 490 struct cudaDeviceProp prop; 491 int count, i; 492 cudaError_t ret = cudaGetDeviceCount(&count); 493 if(ret != cudaSuccess){ 494 fprintf(stderr, "%d\n", (int)ret); 495 }else{ 496 for(i=0;i<count;i++){ 497 if(cudaGetDeviceProperties(&prop, i) == cudaSuccess){ 498 fprintf(stdout, "%d.%d\n", prop.major, prop.minor); 499 } 500 } 501 } 502 fflush(stderr); 503 fflush(stdout); 504 return 0; 505 } 506 ''' 507 binname = sname.rsplit('.', 1)[0] 508 binname += '_cross' if self.is_cross else '' 509 source_name = os.path.join(work_dir, sname) 510 binary_name = os.path.join(work_dir, binname + '.exe') 511 with open(source_name, 'w', encoding='utf-8') as ofile: 512 ofile.write(code) 513 514 # The Sanity Test for CUDA language will serve as both a sanity test 515 # and a native-build GPU architecture detection test, useful later. 516 # 517 # For this second purpose, NVCC has very handy flags, --run and 518 # --run-args, that allow one to run an application with the 519 # environment set up properly. Of course, this only works for native 520 # builds; For cross builds we must still use the exe_wrapper (if any). 521 self.detected_cc = '' 522 flags = [] 523 524 # Disable warnings, compile with statically-linked runtime for minimum 525 # reliance on the system. 526 flags += ['-w', '-cudart', 'static', source_name] 527 528 # Use the -ccbin option, if available, even during sanity checking. 529 # Otherwise, on systems where CUDA does not support the default compiler, 530 # NVCC becomes unusable. 531 flags += self.get_ccbin_args(env.coredata.options) 532 533 # If cross-compiling, we can't run the sanity check, only compile it. 534 if self.is_cross and self.exe_wrapper is None: 535 # Linking cross built apps is painful. You can't really 536 # tell if you should use -nostdlib or not and for example 537 # on OSX the compiler binary is the same but you need 538 # a ton of compiler flags to differentiate between 539 # arm and x86_64. So just compile. 540 flags += self.get_compile_only_args() 541 flags += self.get_output_args(binary_name) 542 543 # Compile sanity check 544 cmdlist = self.exelist + flags 545 mlog.debug('Sanity check compiler command line: ', ' '.join(cmdlist)) 546 pc, stdo, stde = Popen_safe(cmdlist, cwd=work_dir) 547 mlog.debug('Sanity check compile stdout: ') 548 mlog.debug(stdo) 549 mlog.debug('-----\nSanity check compile stderr:') 550 mlog.debug(stde) 551 mlog.debug('-----') 552 if pc.returncode != 0: 553 raise EnvironmentException(f'Compiler {self.name_string()} can not compile programs.') 554 555 # Run sanity check (if possible) 556 if self.is_cross: 557 if self.exe_wrapper is None: 558 return 559 else: 560 cmdlist = self.exe_wrapper.get_command() + [binary_name] 561 else: 562 cmdlist = self.exelist + ['--run', '"' + binary_name + '"'] 563 mlog.debug('Sanity check run command line: ', ' '.join(cmdlist)) 564 pe, stdo, stde = Popen_safe(cmdlist, cwd=work_dir) 565 mlog.debug('Sanity check run stdout: ') 566 mlog.debug(stdo) 567 mlog.debug('-----\nSanity check run stderr:') 568 mlog.debug(stde) 569 mlog.debug('-----') 570 pe.wait() 571 if pe.returncode != 0: 572 raise EnvironmentException(f'Executables created by {self.language} compiler {self.name_string()} are not runnable.') 573 574 # Interpret the result of the sanity test. 575 # As mentioned above, it is not only a sanity test but also a GPU 576 # architecture detection test. 577 if stde == '': 578 self.detected_cc = stdo 579 else: 580 mlog.debug('cudaGetDeviceCount() returned ' + stde) 581 582 def has_header_symbol(self, hname: str, symbol: str, prefix: str, 583 env: 'Environment', *, 584 extra_args: T.Union[None, T.List[str], T.Callable[[CompileCheckMode], T.List[str]]] = None, 585 dependencies: T.Optional[T.List['Dependency']] = None) -> T.Tuple[bool, bool]: 586 if extra_args is None: 587 extra_args = [] 588 fargs = {'prefix': prefix, 'header': hname, 'symbol': symbol} 589 # Check if it's a C-like symbol 590 t = '''{prefix} 591 #include <{header}> 592 int main(void) {{ 593 /* If it's not defined as a macro, try to use as a symbol */ 594 #ifndef {symbol} 595 {symbol}; 596 #endif 597 return 0; 598 }}''' 599 found, cached = self.compiles(t.format_map(fargs), env, extra_args=extra_args, dependencies=dependencies) 600 if found: 601 return True, cached 602 # Check if it's a class or a template 603 t = '''{prefix} 604 #include <{header}> 605 using {symbol}; 606 int main(void) {{ 607 return 0; 608 }}''' 609 return self.compiles(t.format_map(fargs), env, extra_args=extra_args, dependencies=dependencies) 610 611 def get_options(self) -> 'KeyedOptionDictType': 612 opts = super().get_options() 613 std_key = OptionKey('std', machine=self.for_machine, lang=self.language) 614 ccbindir_key = OptionKey('ccbindir', machine=self.for_machine, lang=self.language) 615 opts.update({ 616 std_key: coredata.UserComboOption('C++ language standard to use with CUDA', 617 ['none', 'c++03', 'c++11', 'c++14', 'c++17'], 'none'), 618 ccbindir_key: coredata.UserStringOption('CUDA non-default toolchain directory to use (-ccbin)', 619 ''), 620 }) 621 return opts 622 623 def _to_host_compiler_options(self, options: 'KeyedOptionDictType') -> 'KeyedOptionDictType': 624 """ 625 Convert an NVCC Option set to a host compiler's option set. 626 """ 627 628 # We must strip the -std option from the host compiler option set, as NVCC has 629 # its own -std flag that may not agree with the host compiler's. 630 host_options = {key: options.get(key, opt) for key, opt in self.host_compiler.get_options().items()} 631 std_key = OptionKey('std', machine=self.for_machine, lang=self.host_compiler.language) 632 overrides = {std_key: 'none'} 633 return OptionOverrideProxy(overrides, host_options) 634 635 def get_option_compile_args(self, options: 'KeyedOptionDictType') -> T.List[str]: 636 args = self.get_ccbin_args(options) 637 # On Windows, the version of the C++ standard used by nvcc is dictated by 638 # the combination of CUDA version and MSVC version; the --std= is thus ignored 639 # and attempting to use it will result in a warning: https://stackoverflow.com/a/51272091/741027 640 if not is_windows(): 641 key = OptionKey('std', machine=self.for_machine, lang=self.language) 642 std = options[key] 643 if std.value != 'none': 644 args.append('--std=' + std.value) 645 646 return args + self._to_host_flags(self.host_compiler.get_option_compile_args(self._to_host_compiler_options(options))) 647 648 def get_option_link_args(self, options: 'KeyedOptionDictType') -> T.List[str]: 649 args = self.get_ccbin_args(options) 650 return args + self._to_host_flags(self.host_compiler.get_option_link_args(self._to_host_compiler_options(options)), _Phase.LINKER) 651 652 def get_soname_args(self, env: 'Environment', prefix: str, shlib_name: str, 653 suffix: str, soversion: str, 654 darwin_versions: T.Tuple[str, str]) -> T.List[str]: 655 return self._to_host_flags(self.host_compiler.get_soname_args( 656 env, prefix, shlib_name, suffix, soversion, darwin_versions), _Phase.LINKER) 657 658 def get_compile_only_args(self) -> T.List[str]: 659 return ['-c'] 660 661 def get_no_optimization_args(self) -> T.List[str]: 662 return ['-O0'] 663 664 def get_optimization_args(self, optimization_level: str) -> T.List[str]: 665 # alternatively, consider simply redirecting this to the host compiler, which would 666 # give us more control over options like "optimize for space" (which nvcc doesn't support): 667 # return self._to_host_flags(self.host_compiler.get_optimization_args(optimization_level)) 668 return cuda_optimization_args[optimization_level] 669 670 def sanitizer_compile_args(self, value: str) -> T.List[str]: 671 return self._to_host_flags(self.host_compiler.sanitizer_compile_args(value)) 672 673 def sanitizer_link_args(self, value: str) -> T.List[str]: 674 return self._to_host_flags(self.host_compiler.sanitizer_link_args(value)) 675 676 def get_debug_args(self, is_debug: bool) -> T.List[str]: 677 return cuda_debug_args[is_debug] 678 679 def get_werror_args(self) -> T.List[str]: 680 return ['-Werror=cross-execution-space-call,deprecated-declarations,reorder'] 681 682 def get_warn_args(self, level: str) -> T.List[str]: 683 return self.warn_args[level] 684 685 def get_buildtype_args(self, buildtype: str) -> T.List[str]: 686 # nvcc doesn't support msvc's "Edit and Continue" PDB format; "downgrade" to 687 # a regular PDB to avoid cl's warning to that effect (D9025 : overriding '/ZI' with '/Zi') 688 host_args = ['/Zi' if arg == '/ZI' else arg for arg in self.host_compiler.get_buildtype_args(buildtype)] 689 return cuda_buildtype_args[buildtype] + self._to_host_flags(host_args) 690 691 def get_include_args(self, path: str, is_system: bool) -> T.List[str]: 692 if path == '': 693 path = '.' 694 return ['-isystem=' + path] if is_system else ['-I' + path] 695 696 def get_compile_debugfile_args(self, rel_obj: str, pch: bool = False) -> T.List[str]: 697 return self._to_host_flags(self.host_compiler.get_compile_debugfile_args(rel_obj, pch)) 698 699 def get_link_debugfile_args(self, targetfile: str) -> T.List[str]: 700 return self._to_host_flags(self.host_compiler.get_link_debugfile_args(targetfile), _Phase.LINKER) 701 702 def get_depfile_suffix(self) -> str: 703 return 'd' 704 705 def get_buildtype_linker_args(self, buildtype: str) -> T.List[str]: 706 return self._to_host_flags(self.host_compiler.get_buildtype_linker_args(buildtype), _Phase.LINKER) 707 708 def build_rpath_args(self, env: 'Environment', build_dir: str, from_dir: str, 709 rpath_paths: str, build_rpath: str, 710 install_rpath: str) -> T.Tuple[T.List[str], T.Set[bytes]]: 711 (rpath_args, rpath_dirs_to_remove) = self.host_compiler.build_rpath_args( 712 env, build_dir, from_dir, rpath_paths, build_rpath, install_rpath) 713 return (self._to_host_flags(rpath_args, _Phase.LINKER), rpath_dirs_to_remove) 714 715 def linker_to_compiler_args(self, args: T.List[str]) -> T.List[str]: 716 return args 717 718 def get_pic_args(self) -> T.List[str]: 719 return self._to_host_flags(self.host_compiler.get_pic_args()) 720 721 def compute_parameters_with_absolute_paths(self, parameter_list: T.List[str], 722 build_dir: str) -> T.List[str]: 723 return [] 724 725 def get_output_args(self, target: str) -> T.List[str]: 726 return ['-o', target] 727 728 def get_std_exe_link_args(self) -> T.List[str]: 729 return self._to_host_flags(self.host_compiler.get_std_exe_link_args(), _Phase.LINKER) 730 731 def find_library(self, libname: str, env: 'Environment', extra_dirs: T.List[str], 732 libtype: LibType = LibType.PREFER_SHARED) -> T.Optional[T.List[str]]: 733 return ['-l' + libname] # FIXME 734 735 def get_crt_compile_args(self, crt_val: str, buildtype: str) -> T.List[str]: 736 return self._to_host_flags(self.host_compiler.get_crt_compile_args(crt_val, buildtype)) 737 738 def get_crt_link_args(self, crt_val: str, buildtype: str) -> T.List[str]: 739 # nvcc defaults to static, release version of msvc runtime and provides no 740 # native option to override it; override it with /NODEFAULTLIB 741 host_link_arg_overrides = [] 742 host_crt_compile_args = self.host_compiler.get_crt_compile_args(crt_val, buildtype) 743 if any(arg in ['/MDd', '/MD', '/MTd'] for arg in host_crt_compile_args): 744 host_link_arg_overrides += ['/NODEFAULTLIB:LIBCMT.lib'] 745 return self._to_host_flags(host_link_arg_overrides + self.host_compiler.get_crt_link_args(crt_val, buildtype), _Phase.LINKER) 746 747 def get_target_link_args(self, target: 'BuildTarget') -> T.List[str]: 748 return self._to_host_flags(super().get_target_link_args(target), _Phase.LINKER) 749 750 def get_dependency_compile_args(self, dep: 'Dependency') -> T.List[str]: 751 return self._to_host_flags(super().get_dependency_compile_args(dep)) 752 753 def get_dependency_link_args(self, dep: 'Dependency') -> T.List[str]: 754 return self._to_host_flags(super().get_dependency_link_args(dep), _Phase.LINKER) 755 756 def get_ccbin_args(self, options: 'KeyedOptionDictType') -> T.List[str]: 757 key = OptionKey('ccbindir', machine=self.for_machine, lang=self.language) 758 ccbindir = options[key].value 759 if isinstance(ccbindir, str) and ccbindir != '': 760 return [self._shield_nvcc_list_arg('-ccbin='+ccbindir, False)] 761 else: 762 return [] 763 764 def get_profile_generate_args(self) -> T.List[str]: 765 return ['-Xcompiler=' + x for x in self.host_compiler.get_profile_generate_args()] 766 767 def get_profile_use_args(self) -> T.List[str]: 768 return ['-Xcompiler=' + x for x in self.host_compiler.get_profile_use_args()] 769 770 def get_disable_assert_args(self) -> T.List[str]: 771 return self.host_compiler.get_disable_assert_args() 772