1# Copyright 2012-2017 The Meson development team
2
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6
7#     http://www.apache.org/licenses/LICENSE-2.0
8
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15import os.path
16import typing as T
17from functools import partial
18
19from .. import coredata
20from .. import mlog
21from ..mesonlib import EnvironmentException, MachineChoice, Popen_safe, OptionOverrideProxy, is_windows, LibType
22from .compilers import (Compiler, cuda_buildtype_args, cuda_optimization_args,
23                        cuda_debug_args)
24
25if T.TYPE_CHECKING:
26    from ..environment import Environment  # noqa: F401
27    from ..envconfig import MachineInfo
28
29
30class CudaCompiler(Compiler):
31
32    LINKER_PREFIX = '-Xlinker='
33    language = 'cuda'
34
35    _universal_flags = {'compiler': ['-I', '-D', '-U', '-E'], 'linker': ['-l', '-L']}
36
37    def __init__(self, exelist, version, for_machine: MachineChoice,
38                 is_cross, exe_wrapper, host_compiler, info: 'MachineInfo', **kwargs):
39        super().__init__(exelist, version, for_machine, info, **kwargs)
40        self.is_cross = is_cross
41        self.exe_wrapper = exe_wrapper
42        self.host_compiler = host_compiler
43        self.base_options = host_compiler.base_options
44        self.id = 'nvcc'
45        self.warn_args = {level: self._to_host_flags(flags) for level, flags in host_compiler.warn_args.items()}
46
47    @classmethod
48    def _to_host_flags(cls, flags, phase='compiler'):
49        return list(map(partial(cls._to_host_flag, phase=phase), flags))
50
51    @classmethod
52    def _to_host_flag(cls, flag, phase):
53        if not flag[0] in ['-', '/'] or flag[:2] in cls._universal_flags[phase]:
54            return flag
55
56        return '-X{}={}'.format(phase, flag)
57
58    def needs_static_linker(self):
59        return False
60
61    def get_always_args(self):
62        return []
63
64    def get_no_stdinc_args(self):
65        return []
66
67    def thread_link_flags(self, environment):
68        return self._to_host_flags(self.host_compiler.thread_link_flags(environment))
69
70    def sanity_check(self, work_dir, environment):
71        mlog.debug('Sanity testing ' + self.get_display_language() + ' compiler:', ' '.join(self.exelist))
72        mlog.debug('Is cross compiler: %s.' % str(self.is_cross))
73
74        sname = 'sanitycheckcuda.cu'
75        code = r'''
76        #include <cuda_runtime.h>
77        #include <stdio.h>
78
79        __global__ void kernel (void) {}
80
81        int main(void){
82            struct cudaDeviceProp prop;
83            int count, i;
84            cudaError_t ret = cudaGetDeviceCount(&count);
85            if(ret != cudaSuccess){
86                fprintf(stderr, "%d\n", (int)ret);
87            }else{
88                for(i=0;i<count;i++){
89                    if(cudaGetDeviceProperties(&prop, i) == cudaSuccess){
90                        fprintf(stdout, "%d.%d\n", prop.major, prop.minor);
91                    }
92                }
93            }
94            fflush(stderr);
95            fflush(stdout);
96            return 0;
97        }
98        '''
99        binname = sname.rsplit('.', 1)[0]
100        binname += '_cross' if self.is_cross else ''
101        source_name = os.path.join(work_dir, sname)
102        binary_name = os.path.join(work_dir, binname + '.exe')
103        with open(source_name, 'w') as ofile:
104            ofile.write(code)
105
106        # The Sanity Test for CUDA language will serve as both a sanity test
107        # and a native-build GPU architecture detection test, useful later.
108        #
109        # For this second purpose, NVCC has very handy flags, --run and
110        # --run-args, that allow one to run an application with the
111        # environment set up properly. Of course, this only works for native
112        # builds; For cross builds we must still use the exe_wrapper (if any).
113        self.detected_cc = ''
114        flags = ['-w', '-cudart', 'static', source_name]
115        if self.is_cross and self.exe_wrapper is None:
116            # Linking cross built apps is painful. You can't really
117            # tell if you should use -nostdlib or not and for example
118            # on OSX the compiler binary is the same but you need
119            # a ton of compiler flags to differentiate between
120            # arm and x86_64. So just compile.
121            flags += self.get_compile_only_args()
122        flags += self.get_output_args(binary_name)
123
124        # Compile sanity check
125        cmdlist = self.exelist + flags
126        mlog.debug('Sanity check compiler command line: ', ' '.join(cmdlist))
127        pc, stdo, stde = Popen_safe(cmdlist, cwd=work_dir)
128        mlog.debug('Sanity check compile stdout: ')
129        mlog.debug(stdo)
130        mlog.debug('-----\nSanity check compile stderr:')
131        mlog.debug(stde)
132        mlog.debug('-----')
133        if pc.returncode != 0:
134            raise EnvironmentException('Compiler {0} can not compile programs.'.format(self.name_string()))
135
136        # Run sanity check (if possible)
137        if self.is_cross:
138            if self.exe_wrapper is None:
139                return
140            else:
141                cmdlist = self.exe_wrapper + [binary_name]
142        else:
143            cmdlist = self.exelist + ['--run', '"' + binary_name + '"']
144        mlog.debug('Sanity check run command line: ', ' '.join(cmdlist))
145        pe, stdo, stde = Popen_safe(cmdlist, cwd=work_dir)
146        mlog.debug('Sanity check run stdout: ')
147        mlog.debug(stdo)
148        mlog.debug('-----\nSanity check run stderr:')
149        mlog.debug(stde)
150        mlog.debug('-----')
151        pe.wait()
152        if pe.returncode != 0:
153            raise EnvironmentException('Executables created by {0} compiler {1} are not runnable.'.format(self.language, self.name_string()))
154
155        # Interpret the result of the sanity test.
156        # As mentioned above, it is not only a sanity test but also a GPU
157        # architecture detection test.
158        if stde == '':
159            self.detected_cc = stdo
160        else:
161            mlog.debug('cudaGetDeviceCount() returned ' + stde)
162
163    def has_header_symbol(self, hname, symbol, prefix, env, extra_args=None, dependencies=None):
164        result, cached = super().has_header_symbol(hname, symbol, prefix, env, extra_args, dependencies)
165        if result:
166            return True, cached
167        if extra_args is None:
168            extra_args = []
169        fargs = {'prefix': prefix, 'header': hname, 'symbol': symbol}
170        t = '''{prefix}
171        #include <{header}>
172        using {symbol};
173        int main(void) {{ return 0; }}'''
174        return self.compiles(t.format(**fargs), env, extra_args, dependencies)
175
176    def get_options(self):
177        opts = super().get_options()
178        opts.update({'cuda_std': coredata.UserComboOption('C++ language standard to use',
179                                                          ['none', 'c++03', 'c++11', 'c++14'],
180                                                          'none')})
181        return opts
182
183    def _to_host_compiler_options(self, options):
184        overrides = {name: opt.value for name, opt in options.copy().items()}
185        return OptionOverrideProxy(overrides, self.host_compiler.get_options())
186
187    def get_option_compile_args(self, options):
188        args = []
189        # On Windows, the version of the C++ standard used by nvcc is dictated by
190        # the combination of CUDA version and MSVC version; the --std= is thus ignored
191        # and attempting to use it will result in a warning: https://stackoverflow.com/a/51272091/741027
192        if not is_windows():
193            std = options['cuda_std']
194            if std.value != 'none':
195                args.append('--std=' + std.value)
196
197        return args + self._to_host_flags(self.host_compiler.get_option_compile_args(self._to_host_compiler_options(options)))
198
199    @classmethod
200    def _cook_link_args(cls, args: T.List[str]) -> T.List[str]:
201        # Prepare link args for nvcc
202        cooked = []  # type: T.List[str]
203        for arg in args:
204            if arg.startswith('-Wl,'): # strip GNU-style -Wl prefix
205                arg = arg.replace('-Wl,', '', 1)
206            arg = arg.replace(' ', '\\') # espace whitespace
207            cooked.append(arg)
208        return cls._to_host_flags(cooked, 'linker')
209
210    def get_option_link_args(self, options):
211        return self._cook_link_args(self.host_compiler.get_option_link_args(self._to_host_compiler_options(options)))
212
213    def name_string(self):
214        return ' '.join(self.exelist)
215
216    def get_soname_args(self, *args):
217        return self._cook_link_args(self.host_compiler.get_soname_args(*args))
218
219    def get_dependency_gen_args(self, outtarget, outfile):
220        return []
221
222    def get_compile_only_args(self):
223        return ['-c']
224
225    def get_no_optimization_args(self):
226        return ['-O0']
227
228    def get_optimization_args(self, optimization_level):
229        # alternatively, consider simply redirecting this to the host compiler, which would
230        # give us more control over options like "optimize for space" (which nvcc doesn't support):
231        # return self._to_host_flags(self.host_compiler.get_optimization_args(optimization_level))
232        return cuda_optimization_args[optimization_level]
233
234    def get_debug_args(self, is_debug):
235        return cuda_debug_args[is_debug]
236
237    def get_werror_args(self):
238        return ['-Werror=cross-execution-space-call,deprecated-declarations,reorder']
239
240    def get_warn_args(self, level):
241        return self.warn_args[level]
242
243    def get_buildtype_args(self, buildtype):
244        # nvcc doesn't support msvc's "Edit and Continue" PDB format; "downgrade" to
245        # a regular PDB to avoid cl's warning to that effect (D9025 : overriding '/ZI' with '/Zi')
246        host_args = ['/Zi' if arg == '/ZI' else arg for arg in self.host_compiler.get_buildtype_args(buildtype)]
247        return cuda_buildtype_args[buildtype] + self._to_host_flags(host_args)
248
249    def get_include_args(self, path, is_system):
250        if path == '':
251            path = '.'
252        return ['-I' + path]
253
254    def get_compile_debugfile_args(self, rel_obj, **kwargs):
255        return self._to_host_flags(self.host_compiler.get_compile_debugfile_args(rel_obj, **kwargs))
256
257    def get_link_debugfile_args(self, targetfile):
258        return self._cook_link_args(self.host_compiler.get_link_debugfile_args(targetfile))
259
260    def depfile_for_object(self, objfile):
261        return objfile + '.' + self.get_depfile_suffix()
262
263    def get_depfile_suffix(self):
264        return 'd'
265
266    def get_buildtype_linker_args(self, buildtype):
267        return self._cook_link_args(self.host_compiler.get_buildtype_linker_args(buildtype))
268
269    def build_rpath_args(self, env: 'Environment', build_dir: str, from_dir: str,
270                         rpath_paths: str, build_rpath: str,
271                         install_rpath: str) -> T.Tuple[T.List[str], T.Set[bytes]]:
272        (rpath_args, rpath_dirs_to_remove) = self.host_compiler.build_rpath_args(
273            env, build_dir, from_dir, rpath_paths, build_rpath, install_rpath)
274        return (self._cook_link_args(rpath_args), rpath_dirs_to_remove)
275
276    def linker_to_compiler_args(self, args):
277        return args
278
279    def get_pic_args(self):
280        return self._to_host_flags(self.host_compiler.get_pic_args())
281
282    def compute_parameters_with_absolute_paths(self, parameter_list, build_dir):
283        return []
284
285    def get_output_args(self, target: str) -> T.List[str]:
286        return ['-o', target]
287
288    def get_std_exe_link_args(self) -> T.List[str]:
289        return self._cook_link_args(self.host_compiler.get_std_exe_link_args())
290
291    def find_library(self, libname, env, extra_dirs, libtype: LibType = LibType.PREFER_SHARED):
292        return ['-l' + libname] # FIXME
293
294    def get_crt_compile_args(self, crt_val, buildtype):
295        return self._to_host_flags(self.host_compiler.get_crt_compile_args(crt_val, buildtype))
296
297    def get_crt_link_args(self, crt_val, buildtype):
298        # nvcc defaults to static, release version of msvc runtime and provides no
299        # native option to override it; override it with /NODEFAULTLIB
300        host_link_arg_overrides = []
301        host_crt_compile_args = self.host_compiler.get_crt_compile_args(crt_val, buildtype)
302        if any(arg in ['/MDd', '/MD', '/MTd'] for arg in host_crt_compile_args):
303            host_link_arg_overrides += ['/NODEFAULTLIB:LIBCMT.lib']
304        return self._cook_link_args(host_link_arg_overrides + self.host_compiler.get_crt_link_args(crt_val, buildtype))
305
306    def get_target_link_args(self, target):
307        return self._cook_link_args(super().get_target_link_args(target))
308
309    def get_dependency_compile_args(self, dep):
310        return self._to_host_flags(super().get_dependency_compile_args(dep))
311
312    def get_dependency_link_args(self, dep):
313        return self._cook_link_args(super().get_dependency_link_args(dep))
314