1 //===--- HIP.cpp - HIP Tool and ToolChain Implementations -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "HIP.h"
10 #include "AMDGPU.h"
11 #include "CommonArgs.h"
12 #include "clang/Basic/Cuda.h"
13 #include "clang/Basic/TargetID.h"
14 #include "clang/Driver/Compilation.h"
15 #include "clang/Driver/Driver.h"
16 #include "clang/Driver/DriverDiagnostic.h"
17 #include "clang/Driver/InputInfo.h"
18 #include "clang/Driver/Options.h"
19 #include "llvm/Support/Alignment.h"
20 #include "llvm/Support/FileSystem.h"
21 #include "llvm/Support/Path.h"
22 #include "llvm/Support/TargetParser.h"
23
24 using namespace clang::driver;
25 using namespace clang::driver::toolchains;
26 using namespace clang::driver::tools;
27 using namespace clang;
28 using namespace llvm::opt;
29
30 #if defined(_WIN32) || defined(_WIN64)
31 #define NULL_FILE "nul"
32 #else
33 #define NULL_FILE "/dev/null"
34 #endif
35
36 namespace {
37 const unsigned HIPCodeObjectAlign = 4096;
38 } // namespace
39
constructLldCommand(Compilation & C,const JobAction & JA,const InputInfoList & Inputs,const InputInfo & Output,const llvm::opt::ArgList & Args) const40 void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA,
41 const InputInfoList &Inputs,
42 const InputInfo &Output,
43 const llvm::opt::ArgList &Args) const {
44 // Construct lld command.
45 // The output from ld.lld is an HSA code object file.
46 ArgStringList LldArgs{"-flavor", "gnu", "--no-undefined", "-shared",
47 "-plugin-opt=-amdgpu-internalize-symbols"};
48
49 auto &TC = getToolChain();
50 auto &D = TC.getDriver();
51 assert(!Inputs.empty() && "Must have at least one input.");
52 bool IsThinLTO = D.getLTOMode(/*IsOffload=*/true) == LTOK_Thin;
53 addLTOOptions(TC, Args, LldArgs, Output, Inputs[0], IsThinLTO);
54
55 // Extract all the -m options
56 std::vector<llvm::StringRef> Features;
57 amdgpu::getAMDGPUTargetFeatures(D, TC.getTriple(), Args, Features);
58
59 // Add features to mattr such as cumode
60 std::string MAttrString = "-plugin-opt=-mattr=";
61 for (auto OneFeature : unifyTargetFeatures(Features)) {
62 MAttrString.append(Args.MakeArgString(OneFeature));
63 if (OneFeature != Features.back())
64 MAttrString.append(",");
65 }
66 if (!Features.empty())
67 LldArgs.push_back(Args.MakeArgString(MAttrString));
68
69 // ToDo: Remove this option after AMDGPU backend supports ISA-level linking.
70 // Since AMDGPU backend currently does not support ISA-level linking, all
71 // called functions need to be imported.
72 if (IsThinLTO)
73 LldArgs.push_back(Args.MakeArgString("-plugin-opt=-force-import-all"));
74
75 for (const Arg *A : Args.filtered(options::OPT_mllvm)) {
76 LldArgs.push_back(
77 Args.MakeArgString(Twine("-plugin-opt=") + A->getValue(0)));
78 }
79
80 if (C.getDriver().isSaveTempsEnabled())
81 LldArgs.push_back("-save-temps");
82
83 addLinkerCompressDebugSectionsOption(TC, Args, LldArgs);
84
85 LldArgs.append({"-o", Output.getFilename()});
86 for (auto Input : Inputs)
87 LldArgs.push_back(Input.getFilename());
88
89 if (Args.hasFlag(options::OPT_fgpu_sanitize, options::OPT_fno_gpu_sanitize,
90 false))
91 llvm::for_each(TC.getHIPDeviceLibs(Args), [&](StringRef BCFile) {
92 LldArgs.push_back(Args.MakeArgString(BCFile));
93 });
94
95 const char *Lld = Args.MakeArgString(getToolChain().GetProgramPath("lld"));
96 C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
97 Lld, LldArgs, Inputs, Output));
98 }
99
100 // Construct a clang-offload-bundler command to bundle code objects for
101 // different GPU's into a HIP fat binary.
constructHIPFatbinCommand(Compilation & C,const JobAction & JA,StringRef OutputFileName,const InputInfoList & Inputs,const llvm::opt::ArgList & Args,const Tool & T)102 void AMDGCN::constructHIPFatbinCommand(Compilation &C, const JobAction &JA,
103 StringRef OutputFileName, const InputInfoList &Inputs,
104 const llvm::opt::ArgList &Args, const Tool& T) {
105 // Construct clang-offload-bundler command to bundle object files for
106 // for different GPU archs.
107 ArgStringList BundlerArgs;
108 BundlerArgs.push_back(Args.MakeArgString("-type=o"));
109 BundlerArgs.push_back(
110 Args.MakeArgString("-bundle-align=" + Twine(HIPCodeObjectAlign)));
111
112 // ToDo: Remove the dummy host binary entry which is required by
113 // clang-offload-bundler.
114 std::string BundlerTargetArg = "-targets=host-x86_64-unknown-linux";
115 std::string BundlerInputArg = "-inputs=" NULL_FILE;
116
117 // For code object version 2 and 3, the offload kind in bundle ID is 'hip'
118 // for backward compatibility. For code object version 4 and greater, the
119 // offload kind in bundle ID is 'hipv4'.
120 std::string OffloadKind = "hip";
121 if (getAMDGPUCodeObjectVersion(C.getDriver(), Args) >= 4)
122 OffloadKind = OffloadKind + "v4";
123 for (const auto &II : Inputs) {
124 const auto* A = II.getAction();
125 BundlerTargetArg = BundlerTargetArg + "," + OffloadKind +
126 "-amdgcn-amd-amdhsa--" +
127 StringRef(A->getOffloadingArch()).str();
128 BundlerInputArg = BundlerInputArg + "," + II.getFilename();
129 }
130 BundlerArgs.push_back(Args.MakeArgString(BundlerTargetArg));
131 BundlerArgs.push_back(Args.MakeArgString(BundlerInputArg));
132
133 std::string Output = std::string(OutputFileName);
134 auto BundlerOutputArg =
135 Args.MakeArgString(std::string("-outputs=").append(Output));
136 BundlerArgs.push_back(BundlerOutputArg);
137
138 const char *Bundler = Args.MakeArgString(
139 T.getToolChain().GetProgramPath("clang-offload-bundler"));
140 C.addCommand(std::make_unique<Command>(
141 JA, T, ResponseFileSupport::None(), Bundler, BundlerArgs, Inputs,
142 InputInfo(&JA, Args.MakeArgString(Output))));
143 }
144
145 /// Add Generated HIP Object File which has device images embedded into the
146 /// host to the argument list for linking. Using MC directives, embed the
147 /// device code and also define symbols required by the code generation so that
148 /// the image can be retrieved at runtime.
constructGenerateObjFileFromHIPFatBinary(Compilation & C,const InputInfo & Output,const InputInfoList & Inputs,const ArgList & Args,const JobAction & JA) const149 void AMDGCN::Linker::constructGenerateObjFileFromHIPFatBinary(
150 Compilation &C, const InputInfo &Output,
151 const InputInfoList &Inputs, const ArgList &Args,
152 const JobAction &JA) const {
153 const ToolChain &TC = getToolChain();
154 std::string Name =
155 std::string(llvm::sys::path::stem(Output.getFilename()));
156
157 // Create Temp Object File Generator,
158 // Offload Bundled file and Bundled Object file.
159 // Keep them if save-temps is enabled.
160 const char *McinFile;
161 const char *BundleFile;
162 if (C.getDriver().isSaveTempsEnabled()) {
163 McinFile = C.getArgs().MakeArgString(Name + ".mcin");
164 BundleFile = C.getArgs().MakeArgString(Name + ".hipfb");
165 } else {
166 auto TmpNameMcin = C.getDriver().GetTemporaryPath(Name, "mcin");
167 McinFile = C.addTempFile(C.getArgs().MakeArgString(TmpNameMcin));
168 auto TmpNameFb = C.getDriver().GetTemporaryPath(Name, "hipfb");
169 BundleFile = C.addTempFile(C.getArgs().MakeArgString(TmpNameFb));
170 }
171 constructHIPFatbinCommand(C, JA, BundleFile, Inputs, Args, *this);
172
173 // Create a buffer to write the contents of the temp obj generator.
174 std::string ObjBuffer;
175 llvm::raw_string_ostream ObjStream(ObjBuffer);
176
177 // Add MC directives to embed target binaries. We ensure that each
178 // section and image is 16-byte aligned. This is not mandatory, but
179 // increases the likelihood of data to be aligned with a cache block
180 // in several main host machines.
181 ObjStream << "# HIP Object Generator\n";
182 ObjStream << "# *** Automatically generated by Clang ***\n";
183 ObjStream << " .protected __hip_fatbin\n";
184 ObjStream << " .type __hip_fatbin,@object\n";
185 ObjStream << " .section .hip_fatbin,\"a\",@progbits\n";
186 ObjStream << " .globl __hip_fatbin\n";
187 ObjStream << " .p2align " << llvm::Log2(llvm::Align(HIPCodeObjectAlign))
188 << "\n";
189 ObjStream << "__hip_fatbin:\n";
190 ObjStream << " .incbin \"" << BundleFile << "\"\n";
191 ObjStream.flush();
192
193 // Dump the contents of the temp object file gen if the user requested that.
194 // We support this option to enable testing of behavior with -###.
195 if (C.getArgs().hasArg(options::OPT_fhip_dump_offload_linker_script))
196 llvm::errs() << ObjBuffer;
197
198 // Open script file and write the contents.
199 std::error_code EC;
200 llvm::raw_fd_ostream Objf(McinFile, EC, llvm::sys::fs::OF_None);
201
202 if (EC) {
203 C.getDriver().Diag(clang::diag::err_unable_to_make_temp) << EC.message();
204 return;
205 }
206
207 Objf << ObjBuffer;
208
209 ArgStringList McArgs{"-o", Output.getFilename(),
210 McinFile, "--filetype=obj"};
211 const char *Mc = Args.MakeArgString(TC.GetProgramPath("llvm-mc"));
212 C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
213 Mc, McArgs, Inputs, Output));
214 }
215
216 // For amdgcn the inputs of the linker job are device bitcode and output is
217 // object file. It calls llvm-link, opt, llc, then lld steps.
ConstructJob(Compilation & C,const JobAction & JA,const InputInfo & Output,const InputInfoList & Inputs,const ArgList & Args,const char * LinkingOutput) const218 void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA,
219 const InputInfo &Output,
220 const InputInfoList &Inputs,
221 const ArgList &Args,
222 const char *LinkingOutput) const {
223 if (Inputs.size() > 0 &&
224 Inputs[0].getType() == types::TY_Image &&
225 JA.getType() == types::TY_Object)
226 return constructGenerateObjFileFromHIPFatBinary(C, Output, Inputs, Args, JA);
227
228 if (JA.getType() == types::TY_HIP_FATBIN)
229 return constructHIPFatbinCommand(C, JA, Output.getFilename(), Inputs, Args, *this);
230
231 return constructLldCommand(C, JA, Inputs, Output, Args);
232 }
233
HIPToolChain(const Driver & D,const llvm::Triple & Triple,const ToolChain & HostTC,const ArgList & Args)234 HIPToolChain::HIPToolChain(const Driver &D, const llvm::Triple &Triple,
235 const ToolChain &HostTC, const ArgList &Args)
236 : ROCMToolChain(D, Triple, Args), HostTC(HostTC) {
237 // Lookup binaries into the driver directory, this is used to
238 // discover the clang-offload-bundler executable.
239 getProgramPaths().push_back(getDriver().Dir);
240 }
241
addClangTargetOptions(const llvm::opt::ArgList & DriverArgs,llvm::opt::ArgStringList & CC1Args,Action::OffloadKind DeviceOffloadingKind) const242 void HIPToolChain::addClangTargetOptions(
243 const llvm::opt::ArgList &DriverArgs,
244 llvm::opt::ArgStringList &CC1Args,
245 Action::OffloadKind DeviceOffloadingKind) const {
246 HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
247
248 assert(DeviceOffloadingKind == Action::OFK_HIP &&
249 "Only HIP offloading kinds are supported for GPUs.");
250
251 CC1Args.push_back("-fcuda-is-device");
252
253 if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
254 options::OPT_fno_cuda_approx_transcendentals, false))
255 CC1Args.push_back("-fcuda-approx-transcendentals");
256
257 if (!DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
258 false))
259 CC1Args.append({"-mllvm", "-amdgpu-internalize-symbols"});
260
261 StringRef MaxThreadsPerBlock =
262 DriverArgs.getLastArgValue(options::OPT_gpu_max_threads_per_block_EQ);
263 if (!MaxThreadsPerBlock.empty()) {
264 std::string ArgStr =
265 std::string("--gpu-max-threads-per-block=") + MaxThreadsPerBlock.str();
266 CC1Args.push_back(DriverArgs.MakeArgStringRef(ArgStr));
267 }
268
269 CC1Args.push_back("-fcuda-allow-variadic-functions");
270
271 // Default to "hidden" visibility, as object level linking will not be
272 // supported for the foreseeable future.
273 if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ,
274 options::OPT_fvisibility_ms_compat)) {
275 CC1Args.append({"-fvisibility", "hidden"});
276 CC1Args.push_back("-fapply-global-visibility-to-externs");
277 }
278
279 llvm::for_each(getHIPDeviceLibs(DriverArgs), [&](StringRef BCFile) {
280 CC1Args.push_back("-mlink-builtin-bitcode");
281 CC1Args.push_back(DriverArgs.MakeArgString(BCFile));
282 });
283 }
284
285 llvm::opt::DerivedArgList *
TranslateArgs(const llvm::opt::DerivedArgList & Args,StringRef BoundArch,Action::OffloadKind DeviceOffloadKind) const286 HIPToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
287 StringRef BoundArch,
288 Action::OffloadKind DeviceOffloadKind) const {
289 DerivedArgList *DAL =
290 HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
291 if (!DAL)
292 DAL = new DerivedArgList(Args.getBaseArgs());
293
294 const OptTable &Opts = getDriver().getOpts();
295
296 for (Arg *A : Args) {
297 if (!shouldSkipArgument(A))
298 DAL->append(A);
299 }
300
301 if (!BoundArch.empty()) {
302 DAL->eraseArg(options::OPT_mcpu_EQ);
303 DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_mcpu_EQ), BoundArch);
304 checkTargetID(*DAL);
305 }
306
307 return DAL;
308 }
309
buildLinker() const310 Tool *HIPToolChain::buildLinker() const {
311 assert(getTriple().getArch() == llvm::Triple::amdgcn);
312 return new tools::AMDGCN::Linker(*this);
313 }
314
addClangWarningOptions(ArgStringList & CC1Args) const315 void HIPToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
316 HostTC.addClangWarningOptions(CC1Args);
317 }
318
319 ToolChain::CXXStdlibType
GetCXXStdlibType(const ArgList & Args) const320 HIPToolChain::GetCXXStdlibType(const ArgList &Args) const {
321 return HostTC.GetCXXStdlibType(Args);
322 }
323
AddClangSystemIncludeArgs(const ArgList & DriverArgs,ArgStringList & CC1Args) const324 void HIPToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
325 ArgStringList &CC1Args) const {
326 HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
327 }
328
AddClangCXXStdlibIncludeArgs(const ArgList & Args,ArgStringList & CC1Args) const329 void HIPToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &Args,
330 ArgStringList &CC1Args) const {
331 HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args);
332 }
333
AddIAMCUIncludeArgs(const ArgList & Args,ArgStringList & CC1Args) const334 void HIPToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
335 ArgStringList &CC1Args) const {
336 HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
337 }
338
AddHIPIncludeArgs(const ArgList & DriverArgs,ArgStringList & CC1Args) const339 void HIPToolChain::AddHIPIncludeArgs(const ArgList &DriverArgs,
340 ArgStringList &CC1Args) const {
341 RocmInstallation.AddHIPIncludeArgs(DriverArgs, CC1Args);
342 }
343
getSupportedSanitizers() const344 SanitizerMask HIPToolChain::getSupportedSanitizers() const {
345 // The HIPToolChain only supports sanitizers in the sense that it allows
346 // sanitizer arguments on the command line if they are supported by the host
347 // toolchain. The HIPToolChain will actually ignore any command line
348 // arguments for any of these "supported" sanitizers. That means that no
349 // sanitization of device code is actually supported at this time.
350 //
351 // This behavior is necessary because the host and device toolchains
352 // invocations often share the command line, so the device toolchain must
353 // tolerate flags meant only for the host toolchain.
354 return HostTC.getSupportedSanitizers();
355 }
356
computeMSVCVersion(const Driver * D,const ArgList & Args) const357 VersionTuple HIPToolChain::computeMSVCVersion(const Driver *D,
358 const ArgList &Args) const {
359 return HostTC.computeMSVCVersion(D, Args);
360 }
361
362 llvm::SmallVector<std::string, 12>
getHIPDeviceLibs(const llvm::opt::ArgList & DriverArgs) const363 HIPToolChain::getHIPDeviceLibs(const llvm::opt::ArgList &DriverArgs) const {
364 llvm::SmallVector<std::string, 12> BCLibs;
365 if (DriverArgs.hasArg(options::OPT_nogpulib))
366 return {};
367 ArgStringList LibraryPaths;
368
369 // Find in --hip-device-lib-path and HIP_LIBRARY_PATH.
370 for (auto Path : RocmInstallation.getRocmDeviceLibPathArg())
371 LibraryPaths.push_back(DriverArgs.MakeArgString(Path));
372
373 addDirectoryList(DriverArgs, LibraryPaths, "", "HIP_DEVICE_LIB_PATH");
374
375 // Maintain compatability with --hip-device-lib.
376 auto BCLibArgs = DriverArgs.getAllArgValues(options::OPT_hip_device_lib_EQ);
377 if (!BCLibArgs.empty()) {
378 llvm::for_each(BCLibArgs, [&](StringRef BCName) {
379 StringRef FullName;
380 for (std::string LibraryPath : LibraryPaths) {
381 SmallString<128> Path(LibraryPath);
382 llvm::sys::path::append(Path, BCName);
383 FullName = Path;
384 if (llvm::sys::fs::exists(FullName)) {
385 BCLibs.push_back(FullName.str());
386 return;
387 }
388 }
389 getDriver().Diag(diag::err_drv_no_such_file) << BCName;
390 });
391 } else {
392 if (!RocmInstallation.hasDeviceLibrary()) {
393 getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 0;
394 return {};
395 }
396 StringRef GpuArch = getGPUArch(DriverArgs);
397 assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
398
399 // If --hip-device-lib is not set, add the default bitcode libraries.
400 if (DriverArgs.hasFlag(options::OPT_fgpu_sanitize,
401 options::OPT_fno_gpu_sanitize, false)) {
402 auto AsanRTL = RocmInstallation.getAsanRTLPath();
403 if (AsanRTL.empty()) {
404 unsigned DiagID = getDriver().getDiags().getCustomDiagID(
405 DiagnosticsEngine::Error,
406 "AMDGPU address sanitizer runtime library (asanrtl) is not found. "
407 "Please install ROCm device library which supports address "
408 "sanitizer");
409 getDriver().Diag(DiagID);
410 return {};
411 } else
412 BCLibs.push_back(AsanRTL.str());
413 }
414
415 // Add the HIP specific bitcode library.
416 BCLibs.push_back(RocmInstallation.getHIPPath().str());
417
418 // Add common device libraries like ocml etc.
419 BCLibs.append(getCommonDeviceLibNames(DriverArgs, GpuArch.str()));
420
421 // Add instrument lib.
422 auto InstLib =
423 DriverArgs.getLastArgValue(options::OPT_gpu_instrument_lib_EQ);
424 if (InstLib.empty())
425 return BCLibs;
426 if (llvm::sys::fs::exists(InstLib))
427 BCLibs.push_back(InstLib.str());
428 else
429 getDriver().Diag(diag::err_drv_no_such_file) << InstLib;
430 }
431
432 return BCLibs;
433 }
434
checkTargetID(const llvm::opt::ArgList & DriverArgs) const435 void HIPToolChain::checkTargetID(const llvm::opt::ArgList &DriverArgs) const {
436 auto PTID = getParsedTargetID(DriverArgs);
437 if (PTID.OptionalTargetID && !PTID.OptionalGPUArch) {
438 getDriver().Diag(clang::diag::err_drv_bad_target_id)
439 << PTID.OptionalTargetID.getValue();
440 return;
441 }
442
443 assert(PTID.OptionalFeatures && "Invalid return from getParsedTargetID");
444 auto &FeatureMap = PTID.OptionalFeatures.getValue();
445 // Sanitizer is not supported with xnack-.
446 if (DriverArgs.hasFlag(options::OPT_fgpu_sanitize,
447 options::OPT_fno_gpu_sanitize, false)) {
448 auto Loc = FeatureMap.find("xnack");
449 if (Loc != FeatureMap.end() && !Loc->second) {
450 auto &Diags = getDriver().getDiags();
451 auto DiagID = Diags.getCustomDiagID(
452 DiagnosticsEngine::Error,
453 "'-fgpu-sanitize' is not compatible with offload arch '%0'. "
454 "Use an offload arch without 'xnack-' instead");
455 Diags.Report(DiagID) << PTID.OptionalTargetID.getValue();
456 }
457 }
458 }
459