1# Copyright 2012-2017 The Meson development team
2
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6
7#     http://www.apache.org/licenses/LICENSE-2.0
8
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15import enum
16import os.path
17import string
18import typing as T
19
20from .. import coredata
21from .. import mlog
22from ..mesonlib import (
23    EnvironmentException, MachineChoice, Popen_safe, OptionOverrideProxy,
24    is_windows, LibType, OptionKey,
25)
26from .compilers import (Compiler, cuda_buildtype_args, cuda_optimization_args,
27                        cuda_debug_args, CompileCheckMode)
28
29if T.TYPE_CHECKING:
30    from ..build import BuildTarget
31    from ..coredata import KeyedOptionDictType
32    from ..dependencies import Dependency
33    from ..environment import Environment  # noqa: F401
34    from ..envconfig import MachineInfo
35    from ..linkers import DynamicLinker
36    from ..programs import ExternalProgram
37
38
39class _Phase(enum.Enum):
40
41    COMPILER = 'compiler'
42    LINKER = 'linker'
43
44
45class CudaCompiler(Compiler):
46
47    LINKER_PREFIX = '-Xlinker='
48    language = 'cuda'
49
50    # NVCC flags taking no arguments.
51    _FLAG_PASSTHRU_NOARGS = {
52        # NVCC --long-option,                   NVCC -short-option              CUDA Toolkit 11.2.1 Reference
53        '--objdir-as-tempdir',                  '-objtemp',                     # 4.2.1.2
54        '--generate-dependency-targets',        '-MP',                          # 4.2.1.12
55        '--allow-unsupported-compiler',         '-allow-unsupported-compiler',  # 4.2.1.14
56        '--link',                                                               # 4.2.2.1
57        '--lib',                                '-lib',                         # 4.2.2.2
58        '--device-link',                        '-dlink',                       # 4.2.2.3
59        '--device-c',                           '-dc',                          # 4.2.2.4
60        '--device-w',                           '-dw',                          # 4.2.2.5
61        '--cuda',                               '-cuda',                        # 4.2.2.6
62        '--compile',                            '-c',                           # 4.2.2.7
63        '--fatbin',                             '-fatbin',                      # 4.2.2.8
64        '--cubin',                              '-cubin',                       # 4.2.2.9
65        '--ptx',                                '-ptx',                         # 4.2.2.10
66        '--preprocess',                         '-E',                           # 4.2.2.11
67        '--generate-dependencies',              '-M',                           # 4.2.2.12
68        '--generate-nonsystem-dependencies',    '-MM',                          # 4.2.2.13
69        '--generate-dependencies-with-compile', '-MD',                          # 4.2.2.14
70        '--generate-nonsystem-dependencies-with-compile', '-MMD',               # 4.2.2.15
71        '--run',                                                                # 4.2.2.16
72        '--profile',                            '-pg',                          # 4.2.3.1
73        '--debug',                              '-g',                           # 4.2.3.2
74        '--device-debug',                       '-G',                           # 4.2.3.3
75        '--extensible-whole-program',           '-ewp',                         # 4.2.3.4
76        '--generate-line-info',                 '-lineinfo',                    # 4.2.3.5
77        '--dlink-time-opt',                     '-dlto',                        # 4.2.3.8
78        '--no-exceptions',                      '-noeh',                        # 4.2.3.11
79        '--shared',                             '-shared',                      # 4.2.3.12
80        '--no-host-device-initializer-list',    '-nohdinitlist',                # 4.2.3.15
81        '--expt-relaxed-constexpr',             '-expt-relaxed-constexpr',      # 4.2.3.16
82        '--extended-lambda',                    '-extended-lambda',             # 4.2.3.17
83        '--expt-extended-lambda',               '-expt-extended-lambda',        # 4.2.3.18
84        '--m32',                                '-m32',                         # 4.2.3.20
85        '--m64',                                '-m64',                         # 4.2.3.21
86        '--forward-unknown-to-host-compiler',   '-forward-unknown-to-host-compiler', # 4.2.5.1
87        '--forward-unknown-to-host-linker',     '-forward-unknown-to-host-linker',   # 4.2.5.2
88        '--dont-use-profile',                   '-noprof',                      # 4.2.5.3
89        '--dryrun',                             '-dryrun',                      # 4.2.5.5
90        '--verbose',                            '-v',                           # 4.2.5.6
91        '--keep',                               '-keep',                        # 4.2.5.7
92        '--save-temps',                         '-save-temps',                  # 4.2.5.9
93        '--clean-targets',                      '-clean',                       # 4.2.5.10
94        '--no-align-double',                                                    # 4.2.5.16
95        '--no-device-link',                     '-nodlink',                     # 4.2.5.17
96        '--allow-unsupported-compiler',         '-allow-unsupported-compiler',  # 4.2.5.18
97        '--use_fast_math',                      '-use_fast_math',               # 4.2.7.7
98        '--extra-device-vectorization',         '-extra-device-vectorization',  # 4.2.7.12
99        '--compile-as-tools-patch',             '-astoolspatch',                # 4.2.7.13
100        '--keep-device-functions',              '-keep-device-functions',       # 4.2.7.14
101        '--disable-warnings',                   '-w',                           # 4.2.8.1
102        '--source-in-ptx',                      '-src-in-ptx',                  # 4.2.8.2
103        '--restrict',                           '-restrict',                    # 4.2.8.3
104        '--Wno-deprecated-gpu-targets',         '-Wno-deprecated-gpu-targets',  # 4.2.8.4
105        '--Wno-deprecated-declarations',        '-Wno-deprecated-declarations', # 4.2.8.5
106        '--Wreorder',                           '-Wreorder',                    # 4.2.8.6
107        '--Wdefault-stream-launch',             '-Wdefault-stream-launch',      # 4.2.8.7
108        '--Wext-lambda-captures-this',          '-Wext-lambda-captures-this',   # 4.2.8.8
109        '--display-error-number',               '-err-no',                      # 4.2.8.10
110        '--resource-usage',                     '-res-usage',                   # 4.2.8.14
111        '--help',                               '-h',                           # 4.2.8.15
112        '--version',                            '-V',                           # 4.2.8.16
113        '--list-gpu-code',                      '-code-ls',                     # 4.2.8.20
114        '--list-gpu-arch',                      '-arch-ls',                     # 4.2.8.21
115    }
116    # Dictionary of NVCC flags taking either one argument or a comma-separated list.
117    # Maps --long to -short options, because the short options are more GCC-like.
118    _FLAG_LONG2SHORT_WITHARGS = {
119        '--output-file':                        '-o',                           # 4.2.1.1
120        '--pre-include':                        '-include',                     # 4.2.1.3
121        '--library':                            '-l',                           # 4.2.1.4
122        '--define-macro':                       '-D',                           # 4.2.1.5
123        '--undefine-macro':                     '-U',                           # 4.2.1.6
124        '--include-path':                       '-I',                           # 4.2.1.7
125        '--system-include':                     '-isystem',                     # 4.2.1.8
126        '--library-path':                       '-L',                           # 4.2.1.9
127        '--output-directory':                   '-odir',                        # 4.2.1.10
128        '--dependency-output':                  '-MF',                          # 4.2.1.11
129        '--compiler-bindir':                    '-ccbin',                       # 4.2.1.13
130        '--archiver-binary':                    '-arbin',                       # 4.2.1.15
131        '--cudart':                             '-cudart',                      # 4.2.1.16
132        '--cudadevrt':                          '-cudadevrt',                   # 4.2.1.17
133        '--libdevice-directory':                '-ldir',                        # 4.2.1.18
134        '--target-directory':                   '-target-dir',                  # 4.2.1.19
135        '--optimization-info':                  '-opt-info',                    # 4.2.3.6
136        '--optimize':                           '-O',                           # 4.2.3.7
137        '--ftemplate-backtrace-limit':          '-ftemplate-backtrace-limit',   # 4.2.3.9
138        '--ftemplate-depth':                    '-ftemplate-depth',             # 4.2.3.10
139        '--x':                                  '-x',                           # 4.2.3.13
140        '--std':                                '-std',                         # 4.2.3.14
141        '--machine':                            '-m',                           # 4.2.3.19
142        '--compiler-options':                   '-Xcompiler',                   # 4.2.4.1
143        '--linker-options':                     '-Xlinker',                     # 4.2.4.2
144        '--archive-options':                    '-Xarchive',                    # 4.2.4.3
145        '--ptxas-options':                      '-Xptxas',                      # 4.2.4.4
146        '--nvlink-options':                     '-Xnvlink',                     # 4.2.4.5
147        '--threads':                            '-t',                           # 4.2.5.4
148        '--keep-dir':                           '-keep-dir',                    # 4.2.5.8
149        '--run-args':                           '-run-args',                    # 4.2.5.11
150        '--input-drive-prefix':                 '-idp',                         # 4.2.5.12
151        '--dependency-drive-prefix':            '-ddp',                         # 4.2.5.13
152        '--drive-prefix':                       '-dp',                          # 4.2.5.14
153        '--dependency-target-name':             '-MT',                          # 4.2.5.15
154        '--default-stream':                     '-default-stream',              # 4.2.6.1
155        '--gpu-architecture':                   '-arch',                        # 4.2.7.1
156        '--gpu-code':                           '-code',                        # 4.2.7.2
157        '--generate-code':                      '-gencode',                     # 4.2.7.3
158        '--relocatable-device-code':            '-rdc',                         # 4.2.7.4
159        '--entries':                            '-e',                           # 4.2.7.5
160        '--maxrregcount':                       '-maxrregcount',                # 4.2.7.6
161        '--ftz':                                '-ftz',                         # 4.2.7.8
162        '--prec-div':                           '-prec-div',                    # 4.2.7.9
163        '--prec-sqrt':                          '-prec-sqrt',                   # 4.2.7.10
164        '--fmad':                               '-fmad',                        # 4.2.7.11
165        '--Werror':                             '-Werror',                      # 4.2.8.9
166        '--diag-error':                         '-diag-error',                  # 4.2.8.11
167        '--diag-suppress':                      '-diag-suppress',               # 4.2.8.12
168        '--diag-warn':                          '-diag-warn',                   # 4.2.8.13
169        '--options-file':                       '-optf',                        # 4.2.8.17
170        '--time':                               '-time',                        # 4.2.8.18
171        '--qpp-config':                         '-qpp-config',                  # 4.2.8.19
172    }
173    # Reverse map -short to --long options.
174    _FLAG_SHORT2LONG_WITHARGS = {v: k for k, v in _FLAG_LONG2SHORT_WITHARGS.items()}
175
176    def __init__(self, exelist: T.List[str], version: str, for_machine: MachineChoice,
177                 is_cross: bool, exe_wrapper: T.Optional['ExternalProgram'],
178                 host_compiler: Compiler, info: 'MachineInfo',
179                 linker: T.Optional['DynamicLinker'] = None,
180                 full_version: T.Optional[str] = None):
181        super().__init__(exelist, version, for_machine, info, linker=linker, full_version=full_version, is_cross=is_cross)
182        self.exe_wrapper = exe_wrapper
183        self.host_compiler = host_compiler
184        self.base_options = host_compiler.base_options
185        self.id = 'nvcc'
186        self.warn_args = {level: self._to_host_flags(flags) for level, flags in host_compiler.warn_args.items()}
187
188    @classmethod
189    def _shield_nvcc_list_arg(cls, arg: str, listmode: bool = True) -> str:
190        r"""
191        Shield an argument against both splitting by NVCC's list-argument
192        parse logic, and interpretation by any shell.
193
194        NVCC seems to consider every comma , that is neither escaped by \ nor inside
195        a double-quoted string a split-point. Single-quotes do not provide protection
196        against splitting; In fact, after splitting they are \-escaped. Unfortunately,
197        double-quotes don't protect against shell expansion. What follows is a
198        complex dance to accommodate everybody.
199        """
200
201        SQ = "'"
202        DQ = '"'
203        CM = ","
204        BS = "\\"
205        DQSQ = DQ+SQ+DQ
206        quotable = set(string.whitespace+'"$`\\')
207
208        if CM not in arg or not listmode:
209            if SQ not in arg:
210                # If any of the special characters "$`\ or whitespace are present, single-quote.
211                # Otherwise return bare.
212                if set(arg).intersection(quotable):
213                    return SQ+arg+SQ
214                else:
215                    return arg # Easy case: no splits, no quoting.
216            else:
217                # There are single quotes. Double-quote them, and single-quote the
218                # strings between them.
219                l = [cls._shield_nvcc_list_arg(s) for s in arg.split(SQ)]
220                l = sum([[s, DQSQ] for s in l][:-1], [])  # Interleave l with DQSQs
221                return ''.join(l)
222        else:
223            # A comma is present, and list mode was active.
224            # We apply (what we guess is) the (primitive) NVCC splitting rule:
225            l = ['']
226            instring = False
227            argit = iter(arg)
228            for c in argit:
229                if   c == CM and not instring:
230                    l.append('')
231                elif c == DQ:
232                    l[-1] += c
233                    instring = not instring
234                elif c == BS:
235                    try:
236                        l[-1] += next(argit)
237                    except StopIteration:
238                        break
239                else:
240                    l[-1] += c
241
242            # Shield individual strings, without listmode, then return them with
243            # escaped commas between them.
244            l = [cls._shield_nvcc_list_arg(s, listmode=False) for s in l]
245            return r'\,'.join(l)
246
247    @classmethod
248    def _merge_flags(cls, flags: T.List[str]) -> T.List[str]:
249        r"""
250        The flags to NVCC gets exceedingly verbose and unreadable when too many of them
251        are shielded with -Xcompiler. Merge consecutive -Xcompiler-wrapped arguments
252        into one.
253        """
254        if len(flags) <= 1:
255            return flags
256        flagit = iter(flags)
257        xflags = []
258
259        def is_xcompiler_flag_isolated(flag: str) -> bool:
260            return flag == '-Xcompiler'
261        def is_xcompiler_flag_glued(flag: str) -> bool:
262            return flag.startswith('-Xcompiler=')
263        def is_xcompiler_flag(flag: str) -> bool:
264            return is_xcompiler_flag_isolated(flag) or is_xcompiler_flag_glued(flag)
265        def get_xcompiler_val(flag: str, flagit: T.Iterator[str]) -> str:
266            if is_xcompiler_flag_glued(flag):
267                return flag[len('-Xcompiler='):]
268            else:
269                try:
270                    return next(flagit)
271                except StopIteration:
272                    return ""
273
274        ingroup = False
275        for flag in flagit:
276            if not is_xcompiler_flag(flag):
277                ingroup = False
278                xflags.append(flag)
279            elif ingroup:
280                xflags[-1] += ','
281                xflags[-1] += get_xcompiler_val(flag, flagit)
282            elif is_xcompiler_flag_isolated(flag):
283                ingroup = True
284                xflags.append(flag)
285                xflags.append(get_xcompiler_val(flag, flagit))
286            elif is_xcompiler_flag_glued(flag):
287                ingroup = True
288                xflags.append(flag)
289            else:
290                raise ValueError("-Xcompiler flag merging failed, unknown argument form!")
291        return xflags
292
293    def _to_host_flags(self, flags: T.List[str], phase: _Phase = _Phase.COMPILER) -> T.List[str]:
294        """
295        Translate generic "GCC-speak" plus particular "NVCC-speak" flags to NVCC flags.
296
297        NVCC's "short" flags have broad similarities to the GCC standard, but have
298        gratuitous, irritating differences.
299        """
300
301        xflags = []
302        flagit = iter(flags)
303
304        for flag in flagit:
305            # The CUDA Toolkit Documentation, in 4.1. Command Option Types and Notation,
306            # specifies that NVCC does not parse the standard flags as GCC does. It has
307            # its own strategy, to wit:
308            #
309            #     nvcc recognizes three types of command options: boolean options, single
310            #     value options, and list options.
311            #
312            #     Boolean options do not have an argument; they are either specified on a
313            #     command line or not. Single value options must be specified at most once,
314            #     and list options may be repeated. Examples of each of these option types
315            #     are, respectively: --verbose (switch to verbose mode), --output-file
316            #     (specify output file), and --include-path (specify include path).
317            #
318            #     Single value options and list options must have arguments, which must
319            #     follow the name of the option itself by either one of more spaces or an
320            #     equals character. When a one-character short name such as -I, -l, and -L
321            #     is used, the value of the option may also immediately follow the option
322            #     itself without being separated by spaces or an equal character. The
323            #     individual values of list options may be separated by commas in a single
324            #     instance of the option, or the option may be repeated, or any
325            #     combination of these two cases.
326            #
327            # One strange consequence of this choice is that directory and filenames that
328            # contain commas (',') cannot be passed to NVCC (at least, not as easily as
329            # in GCC). Another strange consequence is that it is legal to supply flags
330            # such as
331            #
332            #     -lpthread,rt,dl,util
333            #     -l pthread,rt,dl,util
334            #     -l=pthread,rt,dl,util
335            #
336            # and each of the above alternatives is equivalent to GCC-speak
337            #
338            #     -lpthread -lrt -ldl -lutil
339            #     -l pthread -l rt -l dl -l util
340            #     -l=pthread -l=rt -l=dl -l=util
341            #
342            # *With the exception of commas in the name*, GCC-speak for these list flags
343            # is a strict subset of NVCC-speak, so we passthrough those flags.
344            #
345            # The -D macro-define flag is documented as somehow shielding commas from
346            # splitting a definition. Balanced parentheses, braces and single-quotes
347            # around the comma are not sufficient, but balanced double-quotes are. The
348            # shielding appears to work with -l, -I, -L flags as well, for instance.
349            #
350            # Since our goal is to replicate GCC-speak as much as possible, we check for
351            # commas in all list-arguments and shield them with double-quotes. We make
352            # an exception for -D (where this would be value-changing) and -U (because
353            # it isn't possible to define a macro with a comma in the name).
354
355            if flag in self._FLAG_PASSTHRU_NOARGS:
356                xflags.append(flag)
357                continue
358
359            # Handle breakup of flag-values into a flag-part and value-part.
360            if   flag[:1] not in '-/':
361                # This is not a flag. It's probably a file input. Pass it through.
362                xflags.append(flag)
363                continue
364            elif flag[:1] == '/':
365                # This is ambiguously either an MVSC-style /switch or an absolute path
366                # to a file. For some magical reason the following works acceptably in
367                # both cases.
368                wrap = '"' if ',' in flag else ''
369                xflags.append(f'-X{phase.value}={wrap}{flag}{wrap}')
370                continue
371            elif len(flag) >= 2 and flag[0] == '-' and flag[1] in 'IDULlmOxmte':
372                # This is a single-letter short option. These options (with the
373                # exception of -o) are allowed to receive their argument with neither
374                # space nor = sign before them. Detect and separate them in that event.
375                if   flag[2:3] == '':            # -I something
376                    try:
377                        val = next(flagit)
378                    except StopIteration:
379                        pass
380                elif flag[2:3] == '=':           # -I=something
381                    val = flag[3:]
382                else:                            # -Isomething
383                    val = flag[2:]
384                flag = flag[:2]                  # -I
385            elif flag in self._FLAG_LONG2SHORT_WITHARGS or \
386                 flag in self._FLAG_SHORT2LONG_WITHARGS:
387                # This is either -o or a multi-letter flag, and it is receiving its
388                # value isolated.
389                try:
390                    val = next(flagit)           # -o something
391                except StopIteration:
392                    pass
393            elif flag.split('=', 1)[0] in self._FLAG_LONG2SHORT_WITHARGS or \
394                 flag.split('=', 1)[0] in self._FLAG_SHORT2LONG_WITHARGS:
395                # This is either -o or a multi-letter flag, and it is receiving its
396                # value after an = sign.
397                flag, val = flag.split('=', 1)    # -o=something
398            # Some dependencies (e.g., BoostDependency) add unspaced "-isystem/usr/include" arguments
399            elif flag.startswith('-isystem'):
400                val = flag[8:].strip()
401                flag = flag[:8]
402            else:
403                # This is a flag, and it's foreign to NVCC.
404                #
405                # We do not know whether this GCC-speak flag takes an isolated
406                # argument. Assuming it does not (the vast majority indeed don't),
407                # wrap this argument in an -Xcompiler flag and send it down to NVCC.
408                if   flag == '-ffast-math':
409                    xflags.append('-use_fast_math')
410                    xflags.append('-Xcompiler='+flag)
411                elif flag == '-fno-fast-math':
412                    xflags.append('-ftz=false')
413                    xflags.append('-prec-div=true')
414                    xflags.append('-prec-sqrt=true')
415                    xflags.append('-Xcompiler='+flag)
416                elif flag == '-freciprocal-math':
417                    xflags.append('-prec-div=false')
418                    xflags.append('-Xcompiler='+flag)
419                elif flag == '-fno-reciprocal-math':
420                    xflags.append('-prec-div=true')
421                    xflags.append('-Xcompiler='+flag)
422                else:
423                    xflags.append('-Xcompiler='+self._shield_nvcc_list_arg(flag))
424                    # The above should securely handle GCC's -Wl, -Wa, -Wp, arguments.
425                continue
426
427            assert val is not None  # Should only trip if there is a missing argument.
428
429            # Take care of the various NVCC-supported flags that need special handling.
430            flag = self._FLAG_LONG2SHORT_WITHARGS.get(flag, flag)
431
432            if   flag in {'-include', '-isystem', '-I', '-L', '-l'}:
433                # These flags are known to GCC, but list-valued in NVCC. They potentially
434                # require double-quoting to prevent NVCC interpreting the flags as lists
435                # when GCC would not have done so.
436                #
437                # We avoid doing this quoting for -D to avoid redefining macros and for
438                # -U because it isn't possible to define a macro with a comma in the name.
439                # -U with comma arguments is impossible in GCC-speak (and thus unambiguous
440                #in NVCC-speak, albeit unportable).
441                if len(flag) == 2:
442                    xflags.append(flag+self._shield_nvcc_list_arg(val))
443                elif flag == '-isystem' and val in self.host_compiler.get_default_include_dirs():
444                    # like GnuLikeCompiler, we have to filter out include directories specified
445                    # with -isystem that overlap with the host compiler's search path
446                    pass
447                else:
448                    xflags.append(flag)
449                    xflags.append(self._shield_nvcc_list_arg(val))
450            elif flag == '-O':
451                # Handle optimization levels GCC knows about that NVCC does not.
452                if   val == 'fast':
453                    xflags.append('-O3')
454                    xflags.append('-use_fast_math')
455                    xflags.append('-Xcompiler')
456                    xflags.append(flag+val)
457                elif val in {'s', 'g', 'z'}:
458                    xflags.append('-Xcompiler')
459                    xflags.append(flag+val)
460                else:
461                    xflags.append(flag+val)
462            elif flag in {'-D', '-U', '-m', '-t'}:
463                xflags.append(flag+val)       # For style, keep glued.
464            elif flag in {'-std'}:
465                xflags.append(flag+'='+val)   # For style, keep glued.
466            else:
467                xflags.append(flag)
468                xflags.append(val)
469
470        return self._merge_flags(xflags)
471
472    def needs_static_linker(self) -> bool:
473        return False
474
475    def thread_link_flags(self, environment: 'Environment') -> T.List[str]:
476        return self._to_host_flags(self.host_compiler.thread_link_flags(environment), _Phase.LINKER)
477
478    def sanity_check(self, work_dir: str, env: 'Environment') -> None:
479        mlog.debug('Sanity testing ' + self.get_display_language() + ' compiler:', ' '.join(self.exelist))
480        mlog.debug('Is cross compiler: %s.' % str(self.is_cross))
481
482        sname = 'sanitycheckcuda.cu'
483        code = r'''
484        #include <cuda_runtime.h>
485        #include <stdio.h>
486
487        __global__ void kernel (void) {}
488
489        int main(void){
490            struct cudaDeviceProp prop;
491            int count, i;
492            cudaError_t ret = cudaGetDeviceCount(&count);
493            if(ret != cudaSuccess){
494                fprintf(stderr, "%d\n", (int)ret);
495            }else{
496                for(i=0;i<count;i++){
497                    if(cudaGetDeviceProperties(&prop, i) == cudaSuccess){
498                        fprintf(stdout, "%d.%d\n", prop.major, prop.minor);
499                    }
500                }
501            }
502            fflush(stderr);
503            fflush(stdout);
504            return 0;
505        }
506        '''
507        binname = sname.rsplit('.', 1)[0]
508        binname += '_cross' if self.is_cross else ''
509        source_name = os.path.join(work_dir, sname)
510        binary_name = os.path.join(work_dir, binname + '.exe')
511        with open(source_name, 'w', encoding='utf-8') as ofile:
512            ofile.write(code)
513
514        # The Sanity Test for CUDA language will serve as both a sanity test
515        # and a native-build GPU architecture detection test, useful later.
516        #
517        # For this second purpose, NVCC has very handy flags, --run and
518        # --run-args, that allow one to run an application with the
519        # environment set up properly. Of course, this only works for native
520        # builds; For cross builds we must still use the exe_wrapper (if any).
521        self.detected_cc = ''
522        flags = []
523
524        # Disable warnings, compile with statically-linked runtime for minimum
525        # reliance on the system.
526        flags += ['-w', '-cudart', 'static', source_name]
527
528        # Use the -ccbin option, if available, even during sanity checking.
529        # Otherwise, on systems where CUDA does not support the default compiler,
530        # NVCC becomes unusable.
531        flags += self.get_ccbin_args(env.coredata.options)
532
533        # If cross-compiling, we can't run the sanity check, only compile it.
534        if self.is_cross and self.exe_wrapper is None:
535            # Linking cross built apps is painful. You can't really
536            # tell if you should use -nostdlib or not and for example
537            # on OSX the compiler binary is the same but you need
538            # a ton of compiler flags to differentiate between
539            # arm and x86_64. So just compile.
540            flags += self.get_compile_only_args()
541        flags += self.get_output_args(binary_name)
542
543        # Compile sanity check
544        cmdlist = self.exelist + flags
545        mlog.debug('Sanity check compiler command line: ', ' '.join(cmdlist))
546        pc, stdo, stde = Popen_safe(cmdlist, cwd=work_dir)
547        mlog.debug('Sanity check compile stdout: ')
548        mlog.debug(stdo)
549        mlog.debug('-----\nSanity check compile stderr:')
550        mlog.debug(stde)
551        mlog.debug('-----')
552        if pc.returncode != 0:
553            raise EnvironmentException(f'Compiler {self.name_string()} can not compile programs.')
554
555        # Run sanity check (if possible)
556        if self.is_cross:
557            if self.exe_wrapper is None:
558                return
559            else:
560                cmdlist = self.exe_wrapper.get_command() + [binary_name]
561        else:
562            cmdlist = self.exelist + ['--run', '"' + binary_name + '"']
563        mlog.debug('Sanity check run command line: ', ' '.join(cmdlist))
564        pe, stdo, stde = Popen_safe(cmdlist, cwd=work_dir)
565        mlog.debug('Sanity check run stdout: ')
566        mlog.debug(stdo)
567        mlog.debug('-----\nSanity check run stderr:')
568        mlog.debug(stde)
569        mlog.debug('-----')
570        pe.wait()
571        if pe.returncode != 0:
572            raise EnvironmentException(f'Executables created by {self.language} compiler {self.name_string()} are not runnable.')
573
574        # Interpret the result of the sanity test.
575        # As mentioned above, it is not only a sanity test but also a GPU
576        # architecture detection test.
577        if stde == '':
578            self.detected_cc = stdo
579        else:
580            mlog.debug('cudaGetDeviceCount() returned ' + stde)
581
582    def has_header_symbol(self, hname: str, symbol: str, prefix: str,
583                          env: 'Environment', *,
584                          extra_args: T.Union[None, T.List[str], T.Callable[[CompileCheckMode], T.List[str]]] = None,
585                          dependencies: T.Optional[T.List['Dependency']] = None) -> T.Tuple[bool, bool]:
586        if extra_args is None:
587            extra_args = []
588        fargs = {'prefix': prefix, 'header': hname, 'symbol': symbol}
589        # Check if it's a C-like symbol
590        t = '''{prefix}
591        #include <{header}>
592        int main(void) {{
593            /* If it's not defined as a macro, try to use as a symbol */
594            #ifndef {symbol}
595                {symbol};
596            #endif
597            return 0;
598        }}'''
599        found, cached = self.compiles(t.format_map(fargs), env, extra_args=extra_args, dependencies=dependencies)
600        if found:
601            return True, cached
602        # Check if it's a class or a template
603        t = '''{prefix}
604        #include <{header}>
605        using {symbol};
606        int main(void) {{
607            return 0;
608        }}'''
609        return self.compiles(t.format_map(fargs), env, extra_args=extra_args, dependencies=dependencies)
610
611    def get_options(self) -> 'KeyedOptionDictType':
612        opts = super().get_options()
613        std_key      = OptionKey('std',      machine=self.for_machine, lang=self.language)
614        ccbindir_key = OptionKey('ccbindir', machine=self.for_machine, lang=self.language)
615        opts.update({
616            std_key:      coredata.UserComboOption('C++ language standard to use with CUDA',
617                                                   ['none', 'c++03', 'c++11', 'c++14', 'c++17'], 'none'),
618            ccbindir_key: coredata.UserStringOption('CUDA non-default toolchain directory to use (-ccbin)',
619                                                    ''),
620        })
621        return opts
622
623    def _to_host_compiler_options(self, options: 'KeyedOptionDictType') -> 'KeyedOptionDictType':
624        """
625        Convert an NVCC Option set to a host compiler's option set.
626        """
627
628        # We must strip the -std option from the host compiler option set, as NVCC has
629        # its own -std flag that may not agree with the host compiler's.
630        host_options = {key: options.get(key, opt) for key, opt in self.host_compiler.get_options().items()}
631        std_key = OptionKey('std', machine=self.for_machine, lang=self.host_compiler.language)
632        overrides = {std_key: 'none'}
633        return OptionOverrideProxy(overrides, host_options)
634
635    def get_option_compile_args(self, options: 'KeyedOptionDictType') -> T.List[str]:
636        args = self.get_ccbin_args(options)
637        # On Windows, the version of the C++ standard used by nvcc is dictated by
638        # the combination of CUDA version and MSVC version; the --std= is thus ignored
639        # and attempting to use it will result in a warning: https://stackoverflow.com/a/51272091/741027
640        if not is_windows():
641            key = OptionKey('std', machine=self.for_machine, lang=self.language)
642            std = options[key]
643            if std.value != 'none':
644                args.append('--std=' + std.value)
645
646        return args + self._to_host_flags(self.host_compiler.get_option_compile_args(self._to_host_compiler_options(options)))
647
648    def get_option_link_args(self, options: 'KeyedOptionDictType') -> T.List[str]:
649        args = self.get_ccbin_args(options)
650        return args + self._to_host_flags(self.host_compiler.get_option_link_args(self._to_host_compiler_options(options)), _Phase.LINKER)
651
652    def get_soname_args(self, env: 'Environment', prefix: str, shlib_name: str,
653                        suffix: str, soversion: str,
654                        darwin_versions: T.Tuple[str, str]) -> T.List[str]:
655        return self._to_host_flags(self.host_compiler.get_soname_args(
656            env, prefix, shlib_name, suffix, soversion, darwin_versions), _Phase.LINKER)
657
658    def get_compile_only_args(self) -> T.List[str]:
659        return ['-c']
660
661    def get_no_optimization_args(self) -> T.List[str]:
662        return ['-O0']
663
664    def get_optimization_args(self, optimization_level: str) -> T.List[str]:
665        # alternatively, consider simply redirecting this to the host compiler, which would
666        # give us more control over options like "optimize for space" (which nvcc doesn't support):
667        # return self._to_host_flags(self.host_compiler.get_optimization_args(optimization_level))
668        return cuda_optimization_args[optimization_level]
669
670    def sanitizer_compile_args(self, value: str) -> T.List[str]:
671        return self._to_host_flags(self.host_compiler.sanitizer_compile_args(value))
672
673    def sanitizer_link_args(self, value: str) -> T.List[str]:
674        return self._to_host_flags(self.host_compiler.sanitizer_link_args(value))
675
676    def get_debug_args(self, is_debug: bool) -> T.List[str]:
677        return cuda_debug_args[is_debug]
678
679    def get_werror_args(self) -> T.List[str]:
680        return ['-Werror=cross-execution-space-call,deprecated-declarations,reorder']
681
682    def get_warn_args(self, level: str) -> T.List[str]:
683        return self.warn_args[level]
684
685    def get_buildtype_args(self, buildtype: str) -> T.List[str]:
686        # nvcc doesn't support msvc's "Edit and Continue" PDB format; "downgrade" to
687        # a regular PDB to avoid cl's warning to that effect (D9025 : overriding '/ZI' with '/Zi')
688        host_args = ['/Zi' if arg == '/ZI' else arg for arg in self.host_compiler.get_buildtype_args(buildtype)]
689        return cuda_buildtype_args[buildtype] + self._to_host_flags(host_args)
690
691    def get_include_args(self, path: str, is_system: bool) -> T.List[str]:
692        if path == '':
693            path = '.'
694        return ['-isystem=' + path] if is_system else ['-I' + path]
695
696    def get_compile_debugfile_args(self, rel_obj: str, pch: bool = False) -> T.List[str]:
697        return self._to_host_flags(self.host_compiler.get_compile_debugfile_args(rel_obj, pch))
698
699    def get_link_debugfile_args(self, targetfile: str) -> T.List[str]:
700        return self._to_host_flags(self.host_compiler.get_link_debugfile_args(targetfile), _Phase.LINKER)
701
702    def get_depfile_suffix(self) -> str:
703        return 'd'
704
705    def get_buildtype_linker_args(self, buildtype: str) -> T.List[str]:
706        return self._to_host_flags(self.host_compiler.get_buildtype_linker_args(buildtype), _Phase.LINKER)
707
708    def build_rpath_args(self, env: 'Environment', build_dir: str, from_dir: str,
709                         rpath_paths: str, build_rpath: str,
710                         install_rpath: str) -> T.Tuple[T.List[str], T.Set[bytes]]:
711        (rpath_args, rpath_dirs_to_remove) = self.host_compiler.build_rpath_args(
712            env, build_dir, from_dir, rpath_paths, build_rpath, install_rpath)
713        return (self._to_host_flags(rpath_args, _Phase.LINKER), rpath_dirs_to_remove)
714
715    def linker_to_compiler_args(self, args: T.List[str]) -> T.List[str]:
716        return args
717
718    def get_pic_args(self) -> T.List[str]:
719        return self._to_host_flags(self.host_compiler.get_pic_args())
720
721    def compute_parameters_with_absolute_paths(self, parameter_list: T.List[str],
722                                               build_dir: str) -> T.List[str]:
723        return []
724
725    def get_output_args(self, target: str) -> T.List[str]:
726        return ['-o', target]
727
728    def get_std_exe_link_args(self) -> T.List[str]:
729        return self._to_host_flags(self.host_compiler.get_std_exe_link_args(), _Phase.LINKER)
730
731    def find_library(self, libname: str, env: 'Environment', extra_dirs: T.List[str],
732                     libtype: LibType = LibType.PREFER_SHARED) -> T.Optional[T.List[str]]:
733        return ['-l' + libname] # FIXME
734
735    def get_crt_compile_args(self, crt_val: str, buildtype: str) -> T.List[str]:
736        return self._to_host_flags(self.host_compiler.get_crt_compile_args(crt_val, buildtype))
737
738    def get_crt_link_args(self, crt_val: str, buildtype: str) -> T.List[str]:
739        # nvcc defaults to static, release version of msvc runtime and provides no
740        # native option to override it; override it with /NODEFAULTLIB
741        host_link_arg_overrides = []
742        host_crt_compile_args = self.host_compiler.get_crt_compile_args(crt_val, buildtype)
743        if any(arg in ['/MDd', '/MD', '/MTd'] for arg in host_crt_compile_args):
744            host_link_arg_overrides += ['/NODEFAULTLIB:LIBCMT.lib']
745        return self._to_host_flags(host_link_arg_overrides + self.host_compiler.get_crt_link_args(crt_val, buildtype), _Phase.LINKER)
746
747    def get_target_link_args(self, target: 'BuildTarget') -> T.List[str]:
748        return self._to_host_flags(super().get_target_link_args(target), _Phase.LINKER)
749
750    def get_dependency_compile_args(self, dep: 'Dependency') -> T.List[str]:
751        return self._to_host_flags(super().get_dependency_compile_args(dep))
752
753    def get_dependency_link_args(self, dep: 'Dependency') -> T.List[str]:
754        return self._to_host_flags(super().get_dependency_link_args(dep), _Phase.LINKER)
755
756    def get_ccbin_args(self, options: 'KeyedOptionDictType') -> T.List[str]:
757        key = OptionKey('ccbindir', machine=self.for_machine, lang=self.language)
758        ccbindir = options[key].value
759        if isinstance(ccbindir, str) and ccbindir != '':
760            return [self._shield_nvcc_list_arg('-ccbin='+ccbindir, False)]
761        else:
762            return []
763
764    def get_profile_generate_args(self) -> T.List[str]:
765        return ['-Xcompiler=' + x for x in self.host_compiler.get_profile_generate_args()]
766
767    def get_profile_use_args(self) -> T.List[str]:
768        return ['-Xcompiler=' + x for x in self.host_compiler.get_profile_use_args()]
769
770    def get_disable_assert_args(self) -> T.List[str]:
771        return self.host_compiler.get_disable_assert_args()
772