1 //===--- HIP.cpp - HIP Tool and ToolChain Implementations -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "HIP.h"
10 #include "AMDGPU.h"
11 #include "CommonArgs.h"
12 #include "InputInfo.h"
13 #include "clang/Basic/Cuda.h"
14 #include "clang/Basic/TargetID.h"
15 #include "clang/Driver/Compilation.h"
16 #include "clang/Driver/Driver.h"
17 #include "clang/Driver/DriverDiagnostic.h"
18 #include "clang/Driver/Options.h"
19 #include "llvm/Support/Alignment.h"
20 #include "llvm/Support/FileSystem.h"
21 #include "llvm/Support/Path.h"
22 #include "llvm/Support/TargetParser.h"
23 
24 using namespace clang::driver;
25 using namespace clang::driver::toolchains;
26 using namespace clang::driver::tools;
27 using namespace clang;
28 using namespace llvm::opt;
29 
30 #if defined(_WIN32) || defined(_WIN64)
31 #define NULL_FILE "nul"
32 #else
33 #define NULL_FILE "/dev/null"
34 #endif
35 
36 namespace {
37 const unsigned HIPCodeObjectAlign = 4096;
38 
addBCLib(const Driver & D,const ArgList & Args,ArgStringList & CmdArgs,ArgStringList LibraryPaths,StringRef BCName)39 static void addBCLib(const Driver &D, const ArgList &Args,
40                      ArgStringList &CmdArgs, ArgStringList LibraryPaths,
41                      StringRef BCName) {
42   StringRef FullName;
43   for (std::string LibraryPath : LibraryPaths) {
44     SmallString<128> Path(LibraryPath);
45     llvm::sys::path::append(Path, BCName);
46     FullName = Path;
47     if (llvm::sys::fs::exists(FullName)) {
48       CmdArgs.push_back("-mlink-builtin-bitcode");
49       CmdArgs.push_back(Args.MakeArgString(FullName));
50       return;
51     }
52   }
53   D.Diag(diag::err_drv_no_such_file) << BCName;
54 }
55 } // namespace
56 
constructLldCommand(Compilation & C,const JobAction & JA,const InputInfoList & Inputs,const InputInfo & Output,const llvm::opt::ArgList & Args) const57 void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA,
58                                           const InputInfoList &Inputs,
59                                           const InputInfo &Output,
60                                           const llvm::opt::ArgList &Args) const {
61   // Construct lld command.
62   // The output from ld.lld is an HSA code object file.
63   ArgStringList LldArgs{"-flavor", "gnu", "--no-undefined", "-shared",
64                         "-plugin-opt=-amdgpu-internalize-symbols"};
65 
66   auto &TC = getToolChain();
67   auto &D = TC.getDriver();
68   assert(!Inputs.empty() && "Must have at least one input.");
69   addLTOOptions(TC, Args, LldArgs, Output, Inputs[0],
70                 D.getLTOMode() == LTOK_Thin);
71 
72   // Extract all the -m options
73   std::vector<llvm::StringRef> Features;
74   amdgpu::getAMDGPUTargetFeatures(D, TC.getTriple(), Args, Features);
75 
76   // Add features to mattr such as cumode
77   std::string MAttrString = "-plugin-opt=-mattr=";
78   for (auto OneFeature : unifyTargetFeatures(Features)) {
79     MAttrString.append(Args.MakeArgString(OneFeature));
80     if (OneFeature != Features.back())
81       MAttrString.append(",");
82   }
83   if (!Features.empty())
84     LldArgs.push_back(Args.MakeArgString(MAttrString));
85 
86   for (const Arg *A : Args.filtered(options::OPT_mllvm)) {
87     LldArgs.push_back(
88         Args.MakeArgString(Twine("-plugin-opt=") + A->getValue(0)));
89   }
90 
91   if (C.getDriver().isSaveTempsEnabled())
92     LldArgs.push_back("-save-temps");
93 
94   addLinkerCompressDebugSectionsOption(TC, Args, LldArgs);
95 
96   LldArgs.append({"-o", Output.getFilename()});
97   for (auto Input : Inputs)
98     LldArgs.push_back(Input.getFilename());
99   const char *Lld = Args.MakeArgString(getToolChain().GetProgramPath("lld"));
100   C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
101                                          Lld, LldArgs, Inputs, Output));
102 }
103 
104 // Construct a clang-offload-bundler command to bundle code objects for
105 // different GPU's into a HIP fat binary.
constructHIPFatbinCommand(Compilation & C,const JobAction & JA,StringRef OutputFileName,const InputInfoList & Inputs,const llvm::opt::ArgList & Args,const Tool & T)106 void AMDGCN::constructHIPFatbinCommand(Compilation &C, const JobAction &JA,
107                   StringRef OutputFileName, const InputInfoList &Inputs,
108                   const llvm::opt::ArgList &Args, const Tool& T) {
109   // Construct clang-offload-bundler command to bundle object files for
110   // for different GPU archs.
111   ArgStringList BundlerArgs;
112   BundlerArgs.push_back(Args.MakeArgString("-type=o"));
113   BundlerArgs.push_back(
114       Args.MakeArgString("-bundle-align=" + Twine(HIPCodeObjectAlign)));
115 
116   // ToDo: Remove the dummy host binary entry which is required by
117   // clang-offload-bundler.
118   std::string BundlerTargetArg = "-targets=host-x86_64-unknown-linux";
119   std::string BundlerInputArg = "-inputs=" NULL_FILE;
120 
121   // TODO: Change the bundle ID as requested by HIP runtime.
122   // For code object version 2 and 3, the offload kind in bundle ID is 'hip'
123   // for backward compatibility. For code object version 4 and greater, the
124   // offload kind in bundle ID is 'hipv4'.
125   std::string OffloadKind = "hip";
126   for (const auto &II : Inputs) {
127     const auto* A = II.getAction();
128     BundlerTargetArg = BundlerTargetArg + "," + OffloadKind +
129                        "-amdgcn-amd-amdhsa--" +
130                        StringRef(A->getOffloadingArch()).str();
131     BundlerInputArg = BundlerInputArg + "," + II.getFilename();
132   }
133   BundlerArgs.push_back(Args.MakeArgString(BundlerTargetArg));
134   BundlerArgs.push_back(Args.MakeArgString(BundlerInputArg));
135 
136   std::string Output = std::string(OutputFileName);
137   auto BundlerOutputArg =
138       Args.MakeArgString(std::string("-outputs=").append(Output));
139   BundlerArgs.push_back(BundlerOutputArg);
140 
141   const char *Bundler = Args.MakeArgString(
142       T.getToolChain().GetProgramPath("clang-offload-bundler"));
143   C.addCommand(std::make_unique<Command>(
144       JA, T, ResponseFileSupport::None(), Bundler, BundlerArgs, Inputs,
145       InputInfo(&JA, Args.MakeArgString(Output))));
146 }
147 
148 /// Add Generated HIP Object File which has device images embedded into the
149 /// host to the argument list for linking. Using MC directives, embed the
150 /// device code and also define symbols required by the code generation so that
151 /// the image can be retrieved at runtime.
constructGenerateObjFileFromHIPFatBinary(Compilation & C,const InputInfo & Output,const InputInfoList & Inputs,const ArgList & Args,const JobAction & JA) const152 void AMDGCN::Linker::constructGenerateObjFileFromHIPFatBinary(
153     Compilation &C, const InputInfo &Output,
154     const InputInfoList &Inputs, const ArgList &Args,
155     const JobAction &JA) const {
156   const ToolChain &TC = getToolChain();
157   std::string Name =
158       std::string(llvm::sys::path::stem(Output.getFilename()));
159 
160   // Create Temp Object File Generator,
161   // Offload Bundled file and Bundled Object file.
162   // Keep them if save-temps is enabled.
163   const char *McinFile;
164   const char *BundleFile;
165   if (C.getDriver().isSaveTempsEnabled()) {
166     McinFile = C.getArgs().MakeArgString(Name + ".mcin");
167     BundleFile = C.getArgs().MakeArgString(Name + ".hipfb");
168   } else {
169     auto TmpNameMcin = C.getDriver().GetTemporaryPath(Name, "mcin");
170     McinFile = C.addTempFile(C.getArgs().MakeArgString(TmpNameMcin));
171     auto TmpNameFb = C.getDriver().GetTemporaryPath(Name, "hipfb");
172     BundleFile = C.addTempFile(C.getArgs().MakeArgString(TmpNameFb));
173   }
174   constructHIPFatbinCommand(C, JA, BundleFile, Inputs, Args, *this);
175 
176   // Create a buffer to write the contents of the temp obj generator.
177   std::string ObjBuffer;
178   llvm::raw_string_ostream ObjStream(ObjBuffer);
179 
180   // Add MC directives to embed target binaries. We ensure that each
181   // section and image is 16-byte aligned. This is not mandatory, but
182   // increases the likelihood of data to be aligned with a cache block
183   // in several main host machines.
184   ObjStream << "#       HIP Object Generator\n";
185   ObjStream << "# *** Automatically generated by Clang ***\n";
186   ObjStream << "  .type __hip_fatbin,@object\n";
187   ObjStream << "  .section .hip_fatbin,\"a\",@progbits\n";
188   ObjStream << "  .globl __hip_fatbin\n";
189   ObjStream << "  .p2align " << llvm::Log2(llvm::Align(HIPCodeObjectAlign))
190             << "\n";
191   ObjStream << "__hip_fatbin:\n";
192   ObjStream << "  .incbin \"" << BundleFile << "\"\n";
193   ObjStream.flush();
194 
195   // Dump the contents of the temp object file gen if the user requested that.
196   // We support this option to enable testing of behavior with -###.
197   if (C.getArgs().hasArg(options::OPT_fhip_dump_offload_linker_script))
198     llvm::errs() << ObjBuffer;
199 
200   // Open script file and write the contents.
201   std::error_code EC;
202   llvm::raw_fd_ostream Objf(McinFile, EC, llvm::sys::fs::OF_None);
203 
204   if (EC) {
205     C.getDriver().Diag(clang::diag::err_unable_to_make_temp) << EC.message();
206     return;
207   }
208 
209   Objf << ObjBuffer;
210 
211   ArgStringList McArgs{"-o",      Output.getFilename(),
212                        McinFile,  "--filetype=obj"};
213   const char *Mc = Args.MakeArgString(TC.GetProgramPath("llvm-mc"));
214   C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
215                                          Mc, McArgs, Inputs, Output));
216 }
217 
218 // For amdgcn the inputs of the linker job are device bitcode and output is
219 // object file. It calls llvm-link, opt, llc, then lld steps.
ConstructJob(Compilation & C,const JobAction & JA,const InputInfo & Output,const InputInfoList & Inputs,const ArgList & Args,const char * LinkingOutput) const220 void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA,
221                                    const InputInfo &Output,
222                                    const InputInfoList &Inputs,
223                                    const ArgList &Args,
224                                    const char *LinkingOutput) const {
225   if (Inputs.size() > 0 &&
226       Inputs[0].getType() == types::TY_Image &&
227       JA.getType() == types::TY_Object)
228     return constructGenerateObjFileFromHIPFatBinary(C, Output, Inputs, Args, JA);
229 
230   if (JA.getType() == types::TY_HIP_FATBIN)
231     return constructHIPFatbinCommand(C, JA, Output.getFilename(), Inputs, Args, *this);
232 
233   return constructLldCommand(C, JA, Inputs, Output, Args);
234 }
235 
HIPToolChain(const Driver & D,const llvm::Triple & Triple,const ToolChain & HostTC,const ArgList & Args)236 HIPToolChain::HIPToolChain(const Driver &D, const llvm::Triple &Triple,
237                              const ToolChain &HostTC, const ArgList &Args)
238     : ROCMToolChain(D, Triple, Args), HostTC(HostTC) {
239   // Lookup binaries into the driver directory, this is used to
240   // discover the clang-offload-bundler executable.
241   getProgramPaths().push_back(getDriver().Dir);
242 }
243 
addClangTargetOptions(const llvm::opt::ArgList & DriverArgs,llvm::opt::ArgStringList & CC1Args,Action::OffloadKind DeviceOffloadingKind) const244 void HIPToolChain::addClangTargetOptions(
245     const llvm::opt::ArgList &DriverArgs,
246     llvm::opt::ArgStringList &CC1Args,
247     Action::OffloadKind DeviceOffloadingKind) const {
248   HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
249 
250   StringRef GpuArch = getGPUArch(DriverArgs);
251   assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
252   (void) GpuArch;
253   assert(DeviceOffloadingKind == Action::OFK_HIP &&
254          "Only HIP offloading kinds are supported for GPUs.");
255   auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
256   const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
257 
258   CC1Args.push_back("-fcuda-is-device");
259 
260   if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
261                          options::OPT_fno_cuda_approx_transcendentals, false))
262     CC1Args.push_back("-fcuda-approx-transcendentals");
263 
264   if (DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
265                          false))
266     CC1Args.push_back("-fgpu-rdc");
267   else
268     CC1Args.append({"-mllvm", "-amdgpu-internalize-symbols"});
269 
270   StringRef MaxThreadsPerBlock =
271       DriverArgs.getLastArgValue(options::OPT_gpu_max_threads_per_block_EQ);
272   if (!MaxThreadsPerBlock.empty()) {
273     std::string ArgStr =
274         std::string("--gpu-max-threads-per-block=") + MaxThreadsPerBlock.str();
275     CC1Args.push_back(DriverArgs.MakeArgStringRef(ArgStr));
276   }
277 
278   CC1Args.push_back("-fcuda-allow-variadic-functions");
279 
280   // Default to "hidden" visibility, as object level linking will not be
281   // supported for the foreseeable future.
282   if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ,
283                          options::OPT_fvisibility_ms_compat)) {
284     CC1Args.append({"-fvisibility", "hidden"});
285     CC1Args.push_back("-fapply-global-visibility-to-externs");
286   }
287 
288   if (DriverArgs.hasArg(options::OPT_nogpulib))
289     return;
290   ArgStringList LibraryPaths;
291 
292   // Find in --hip-device-lib-path and HIP_LIBRARY_PATH.
293   for (auto Path : RocmInstallation.getRocmDeviceLibPathArg())
294     LibraryPaths.push_back(DriverArgs.MakeArgString(Path));
295 
296   addDirectoryList(DriverArgs, LibraryPaths, "", "HIP_DEVICE_LIB_PATH");
297 
298   // Maintain compatability with --hip-device-lib.
299   auto BCLibs = DriverArgs.getAllArgValues(options::OPT_hip_device_lib_EQ);
300   if (!BCLibs.empty()) {
301     for (auto Lib : BCLibs)
302       addBCLib(getDriver(), DriverArgs, CC1Args, LibraryPaths, Lib);
303   } else {
304     if (!RocmInstallation.hasDeviceLibrary()) {
305       getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 0;
306       return;
307     }
308 
309     std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch);
310     if (LibDeviceFile.empty()) {
311       getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 1 << GpuArch;
312       return;
313     }
314 
315     // If --hip-device-lib is not set, add the default bitcode libraries.
316     // TODO: There are way too many flags that change this. Do we need to check
317     // them all?
318     bool DAZ = DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
319                                   options::OPT_fno_cuda_flush_denormals_to_zero,
320                                   getDefaultDenormsAreZeroForTarget(Kind));
321     // TODO: Check standard C++ flags?
322     bool FiniteOnly = false;
323     bool UnsafeMathOpt = false;
324     bool FastRelaxedMath = false;
325     bool CorrectSqrt = true;
326     bool Wave64 = isWave64(DriverArgs, Kind);
327 
328     // Add the HIP specific bitcode library.
329     CC1Args.push_back("-mlink-builtin-bitcode");
330     CC1Args.push_back(DriverArgs.MakeArgString(RocmInstallation.getHIPPath()));
331 
332     // Add the generic set of libraries.
333     RocmInstallation.addCommonBitcodeLibCC1Args(
334       DriverArgs, CC1Args, LibDeviceFile, Wave64, DAZ, FiniteOnly,
335       UnsafeMathOpt, FastRelaxedMath, CorrectSqrt);
336 
337     // Add instrument lib.
338     auto InstLib =
339         DriverArgs.getLastArgValue(options::OPT_gpu_instrument_lib_EQ);
340     if (InstLib.empty())
341       return;
342     if (llvm::sys::fs::exists(InstLib)) {
343       CC1Args.push_back("-mlink-builtin-bitcode");
344       CC1Args.push_back(DriverArgs.MakeArgString(InstLib));
345     } else
346       getDriver().Diag(diag::err_drv_no_such_file) << InstLib;
347   }
348 }
349 
350 llvm::opt::DerivedArgList *
TranslateArgs(const llvm::opt::DerivedArgList & Args,StringRef BoundArch,Action::OffloadKind DeviceOffloadKind) const351 HIPToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
352                              StringRef BoundArch,
353                              Action::OffloadKind DeviceOffloadKind) const {
354   DerivedArgList *DAL =
355       HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
356   if (!DAL)
357     DAL = new DerivedArgList(Args.getBaseArgs());
358 
359   const OptTable &Opts = getDriver().getOpts();
360 
361   for (Arg *A : Args) {
362     if (!shouldSkipArgument(A))
363       DAL->append(A);
364   }
365 
366   if (!BoundArch.empty()) {
367     DAL->eraseArg(options::OPT_mcpu_EQ);
368     DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_mcpu_EQ), BoundArch);
369     checkTargetID(*DAL);
370   }
371 
372   return DAL;
373 }
374 
buildLinker() const375 Tool *HIPToolChain::buildLinker() const {
376   assert(getTriple().getArch() == llvm::Triple::amdgcn);
377   return new tools::AMDGCN::Linker(*this);
378 }
379 
addClangWarningOptions(ArgStringList & CC1Args) const380 void HIPToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
381   HostTC.addClangWarningOptions(CC1Args);
382 }
383 
384 ToolChain::CXXStdlibType
GetCXXStdlibType(const ArgList & Args) const385 HIPToolChain::GetCXXStdlibType(const ArgList &Args) const {
386   return HostTC.GetCXXStdlibType(Args);
387 }
388 
AddClangSystemIncludeArgs(const ArgList & DriverArgs,ArgStringList & CC1Args) const389 void HIPToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
390                                               ArgStringList &CC1Args) const {
391   HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
392 }
393 
AddClangCXXStdlibIncludeArgs(const ArgList & Args,ArgStringList & CC1Args) const394 void HIPToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &Args,
395                                                  ArgStringList &CC1Args) const {
396   HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args);
397 }
398 
AddIAMCUIncludeArgs(const ArgList & Args,ArgStringList & CC1Args) const399 void HIPToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
400                                         ArgStringList &CC1Args) const {
401   HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
402 }
403 
AddHIPIncludeArgs(const ArgList & DriverArgs,ArgStringList & CC1Args) const404 void HIPToolChain::AddHIPIncludeArgs(const ArgList &DriverArgs,
405                                      ArgStringList &CC1Args) const {
406   RocmInstallation.AddHIPIncludeArgs(DriverArgs, CC1Args);
407 }
408 
getSupportedSanitizers() const409 SanitizerMask HIPToolChain::getSupportedSanitizers() const {
410   // The HIPToolChain only supports sanitizers in the sense that it allows
411   // sanitizer arguments on the command line if they are supported by the host
412   // toolchain. The HIPToolChain will actually ignore any command line
413   // arguments for any of these "supported" sanitizers. That means that no
414   // sanitization of device code is actually supported at this time.
415   //
416   // This behavior is necessary because the host and device toolchains
417   // invocations often share the command line, so the device toolchain must
418   // tolerate flags meant only for the host toolchain.
419   return HostTC.getSupportedSanitizers();
420 }
421 
computeMSVCVersion(const Driver * D,const ArgList & Args) const422 VersionTuple HIPToolChain::computeMSVCVersion(const Driver *D,
423                                                const ArgList &Args) const {
424   return HostTC.computeMSVCVersion(D, Args);
425 }
426