1 //===-- clang-offload-bundler/ClangOffloadBundler.cpp ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements a clang-offload-bundler that bundles different
11 /// files that relate with the same source code but different targets into a
12 /// single one. Also the implements the opposite functionality, i.e. unbundle
13 /// files previous created by this tool.
14 ///
15 //===----------------------------------------------------------------------===//
16 
17 #include "clang/Basic/Version.h"
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/StringMap.h"
22 #include "llvm/ADT/StringRef.h"
23 #include "llvm/ADT/StringSwitch.h"
24 #include "llvm/ADT/Triple.h"
25 #include "llvm/Object/Binary.h"
26 #include "llvm/Object/ObjectFile.h"
27 #include "llvm/Support/Casting.h"
28 #include "llvm/Support/CommandLine.h"
29 #include "llvm/Support/Errc.h"
30 #include "llvm/Support/Error.h"
31 #include "llvm/Support/ErrorOr.h"
32 #include "llvm/Support/FileSystem.h"
33 #include "llvm/Support/MemoryBuffer.h"
34 #include "llvm/Support/Path.h"
35 #include "llvm/Support/Program.h"
36 #include "llvm/Support/Signals.h"
37 #include "llvm/Support/StringSaver.h"
38 #include "llvm/Support/WithColor.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include <algorithm>
41 #include <cassert>
42 #include <cstddef>
43 #include <cstdint>
44 #include <memory>
45 #include <string>
46 #include <system_error>
47 #include <utility>
48 
49 using namespace llvm;
50 using namespace llvm::object;
51 
52 static cl::opt<bool> Help("h", cl::desc("Alias for -help"), cl::Hidden);
53 
54 // Mark all our options with this category, everything else (except for -version
55 // and -help) will be hidden.
56 static cl::OptionCategory
57     ClangOffloadBundlerCategory("clang-offload-bundler options");
58 
59 static cl::list<std::string>
60     InputFileNames("inputs", cl::CommaSeparated, cl::OneOrMore,
61                    cl::desc("[<input file>,...]"),
62                    cl::cat(ClangOffloadBundlerCategory));
63 static cl::list<std::string>
64     OutputFileNames("outputs", cl::CommaSeparated, cl::OneOrMore,
65                     cl::desc("[<output file>,...]"),
66                     cl::cat(ClangOffloadBundlerCategory));
67 static cl::list<std::string>
68     TargetNames("targets", cl::CommaSeparated, cl::OneOrMore,
69                 cl::desc("[<offload kind>-<target triple>,...]"),
70                 cl::cat(ClangOffloadBundlerCategory));
71 static cl::opt<std::string>
72     FilesType("type", cl::Required,
73               cl::desc("Type of the files to be bundled/unbundled.\n"
74                        "Current supported types are:\n"
75                        "  i   - cpp-output\n"
76                        "  ii  - c++-cpp-output\n"
77                        "  cui - cuda/hip-output\n"
78                        "  d   - dependency\n"
79                        "  ll  - llvm\n"
80                        "  bc  - llvm-bc\n"
81                        "  s   - assembler\n"
82                        "  o   - object\n"
83                        "  gch - precompiled-header\n"
84                        "  ast - clang AST file"),
85               cl::cat(ClangOffloadBundlerCategory));
86 static cl::opt<bool>
87     Unbundle("unbundle",
88              cl::desc("Unbundle bundled file into several output files.\n"),
89              cl::init(false), cl::cat(ClangOffloadBundlerCategory));
90 
91 static cl::opt<bool> PrintExternalCommands(
92     "###",
93     cl::desc("Print any external commands that are to be executed "
94              "instead of actually executing them - for testing purposes.\n"),
95     cl::init(false), cl::cat(ClangOffloadBundlerCategory));
96 
97 /// Magic string that marks the existence of offloading data.
98 #define OFFLOAD_BUNDLER_MAGIC_STR "__CLANG_OFFLOAD_BUNDLE__"
99 
100 /// The index of the host input in the list of inputs.
101 static unsigned HostInputIndex = ~0u;
102 
103 /// Path to the current binary.
104 static std::string BundlerExecutable;
105 
106 /// Obtain the offload kind and real machine triple out of the target
107 /// information specified by the user.
108 static void getOffloadKindAndTriple(StringRef Target, StringRef &OffloadKind,
109                                     StringRef &Triple) {
110   auto KindTriplePair = Target.split('-');
111   OffloadKind = KindTriplePair.first;
112   Triple = KindTriplePair.second;
113 }
114 static bool hasHostKind(StringRef Target) {
115   StringRef OffloadKind;
116   StringRef Triple;
117   getOffloadKindAndTriple(Target, OffloadKind, Triple);
118   return OffloadKind == "host";
119 }
120 
121 /// Generic file handler interface.
122 class FileHandler {
123 public:
124   FileHandler() {}
125 
126   virtual ~FileHandler() {}
127 
128   /// Update the file handler with information from the header of the bundled
129   /// file.
130   virtual Error ReadHeader(MemoryBuffer &Input) = 0;
131 
132   /// Read the marker of the next bundled to be read in the file. The bundle
133   /// name is returned if there is one in the file, or `None` if there are no
134   /// more bundles to be read.
135   virtual Expected<Optional<StringRef>>
136   ReadBundleStart(MemoryBuffer &Input) = 0;
137 
138   /// Read the marker that closes the current bundle.
139   virtual Error ReadBundleEnd(MemoryBuffer &Input) = 0;
140 
141   /// Read the current bundle and write the result into the stream \a OS.
142   virtual Error ReadBundle(raw_fd_ostream &OS, MemoryBuffer &Input) = 0;
143 
144   /// Write the header of the bundled file to \a OS based on the information
145   /// gathered from \a Inputs.
146   virtual Error WriteHeader(raw_fd_ostream &OS,
147                             ArrayRef<std::unique_ptr<MemoryBuffer>> Inputs) = 0;
148 
149   /// Write the marker that initiates a bundle for the triple \a TargetTriple to
150   /// \a OS.
151   virtual Error WriteBundleStart(raw_fd_ostream &OS,
152                                  StringRef TargetTriple) = 0;
153 
154   /// Write the marker that closes a bundle for the triple \a TargetTriple to \a
155   /// OS.
156   virtual Error WriteBundleEnd(raw_fd_ostream &OS, StringRef TargetTriple) = 0;
157 
158   /// Write the bundle from \a Input into \a OS.
159   virtual Error WriteBundle(raw_fd_ostream &OS, MemoryBuffer &Input) = 0;
160 };
161 
162 /// Handler for binary files. The bundled file will have the following format
163 /// (all integers are stored in little-endian format):
164 ///
165 /// "OFFLOAD_BUNDLER_MAGIC_STR" (ASCII encoding of the string)
166 ///
167 /// NumberOfOffloadBundles (8-byte integer)
168 ///
169 /// OffsetOfBundle1 (8-byte integer)
170 /// SizeOfBundle1 (8-byte integer)
171 /// NumberOfBytesInTripleOfBundle1 (8-byte integer)
172 /// TripleOfBundle1 (byte length defined before)
173 ///
174 /// ...
175 ///
176 /// OffsetOfBundleN (8-byte integer)
177 /// SizeOfBundleN (8-byte integer)
178 /// NumberOfBytesInTripleOfBundleN (8-byte integer)
179 /// TripleOfBundleN (byte length defined before)
180 ///
181 /// Bundle1
182 /// ...
183 /// BundleN
184 
185 /// Read 8-byte integers from a buffer in little-endian format.
186 static uint64_t Read8byteIntegerFromBuffer(StringRef Buffer, size_t pos) {
187   uint64_t Res = 0;
188   const char *Data = Buffer.data();
189 
190   for (unsigned i = 0; i < 8; ++i) {
191     Res <<= 8;
192     uint64_t Char = (uint64_t)Data[pos + 7 - i];
193     Res |= 0xffu & Char;
194   }
195   return Res;
196 }
197 
198 /// Write 8-byte integers to a buffer in little-endian format.
199 static void Write8byteIntegerToBuffer(raw_fd_ostream &OS, uint64_t Val) {
200   for (unsigned i = 0; i < 8; ++i) {
201     char Char = (char)(Val & 0xffu);
202     OS.write(&Char, 1);
203     Val >>= 8;
204   }
205 }
206 
207 class BinaryFileHandler final : public FileHandler {
208   /// Information about the bundles extracted from the header.
209   struct BundleInfo final {
210     /// Size of the bundle.
211     uint64_t Size = 0u;
212     /// Offset at which the bundle starts in the bundled file.
213     uint64_t Offset = 0u;
214 
215     BundleInfo() {}
216     BundleInfo(uint64_t Size, uint64_t Offset) : Size(Size), Offset(Offset) {}
217   };
218 
219   /// Map between a triple and the corresponding bundle information.
220   StringMap<BundleInfo> BundlesInfo;
221 
222   /// Iterator for the bundle information that is being read.
223   StringMap<BundleInfo>::iterator CurBundleInfo;
224   StringMap<BundleInfo>::iterator NextBundleInfo;
225 
226 public:
227   BinaryFileHandler() : FileHandler() {}
228 
229   ~BinaryFileHandler() final {}
230 
231   Error ReadHeader(MemoryBuffer &Input) final {
232     StringRef FC = Input.getBuffer();
233 
234     // Initialize the current bundle with the end of the container.
235     CurBundleInfo = BundlesInfo.end();
236 
237     // Check if buffer is smaller than magic string.
238     size_t ReadChars = sizeof(OFFLOAD_BUNDLER_MAGIC_STR) - 1;
239     if (ReadChars > FC.size())
240       return Error::success();
241 
242     // Check if no magic was found.
243     StringRef Magic(FC.data(), sizeof(OFFLOAD_BUNDLER_MAGIC_STR) - 1);
244     if (!Magic.equals(OFFLOAD_BUNDLER_MAGIC_STR))
245       return Error::success();
246 
247     // Read number of bundles.
248     if (ReadChars + 8 > FC.size())
249       return Error::success();
250 
251     uint64_t NumberOfBundles = Read8byteIntegerFromBuffer(FC, ReadChars);
252     ReadChars += 8;
253 
254     // Read bundle offsets, sizes and triples.
255     for (uint64_t i = 0; i < NumberOfBundles; ++i) {
256 
257       // Read offset.
258       if (ReadChars + 8 > FC.size())
259         return Error::success();
260 
261       uint64_t Offset = Read8byteIntegerFromBuffer(FC, ReadChars);
262       ReadChars += 8;
263 
264       // Read size.
265       if (ReadChars + 8 > FC.size())
266         return Error::success();
267 
268       uint64_t Size = Read8byteIntegerFromBuffer(FC, ReadChars);
269       ReadChars += 8;
270 
271       // Read triple size.
272       if (ReadChars + 8 > FC.size())
273         return Error::success();
274 
275       uint64_t TripleSize = Read8byteIntegerFromBuffer(FC, ReadChars);
276       ReadChars += 8;
277 
278       // Read triple.
279       if (ReadChars + TripleSize > FC.size())
280         return Error::success();
281 
282       StringRef Triple(&FC.data()[ReadChars], TripleSize);
283       ReadChars += TripleSize;
284 
285       // Check if the offset and size make sense.
286       if (!Offset || Offset + Size > FC.size())
287         return Error::success();
288 
289       assert(BundlesInfo.find(Triple) == BundlesInfo.end() &&
290              "Triple is duplicated??");
291       BundlesInfo[Triple] = BundleInfo(Size, Offset);
292     }
293     // Set the iterator to where we will start to read.
294     CurBundleInfo = BundlesInfo.end();
295     NextBundleInfo = BundlesInfo.begin();
296     return Error::success();
297   }
298 
299   Expected<Optional<StringRef>> ReadBundleStart(MemoryBuffer &Input) final {
300     if (NextBundleInfo == BundlesInfo.end())
301       return None;
302     CurBundleInfo = NextBundleInfo++;
303     return CurBundleInfo->first();
304   }
305 
306   Error ReadBundleEnd(MemoryBuffer &Input) final {
307     assert(CurBundleInfo != BundlesInfo.end() && "Invalid reader info!");
308     return Error::success();
309   }
310 
311   Error ReadBundle(raw_fd_ostream &OS, MemoryBuffer &Input) final {
312     assert(CurBundleInfo != BundlesInfo.end() && "Invalid reader info!");
313     StringRef FC = Input.getBuffer();
314     OS.write(FC.data() + CurBundleInfo->second.Offset,
315              CurBundleInfo->second.Size);
316     return Error::success();
317   }
318 
319   Error WriteHeader(raw_fd_ostream &OS,
320                     ArrayRef<std::unique_ptr<MemoryBuffer>> Inputs) final {
321     // Compute size of the header.
322     uint64_t HeaderSize = 0;
323 
324     HeaderSize += sizeof(OFFLOAD_BUNDLER_MAGIC_STR) - 1;
325     HeaderSize += 8; // Number of Bundles
326 
327     for (auto &T : TargetNames) {
328       HeaderSize += 3 * 8; // Bundle offset, Size of bundle and size of triple.
329       HeaderSize += T.size(); // The triple.
330     }
331 
332     // Write to the buffer the header.
333     OS << OFFLOAD_BUNDLER_MAGIC_STR;
334 
335     Write8byteIntegerToBuffer(OS, TargetNames.size());
336 
337     unsigned Idx = 0;
338     for (auto &T : TargetNames) {
339       MemoryBuffer &MB = *Inputs[Idx++];
340       // Bundle offset.
341       Write8byteIntegerToBuffer(OS, HeaderSize);
342       // Size of the bundle (adds to the next bundle's offset)
343       Write8byteIntegerToBuffer(OS, MB.getBufferSize());
344       HeaderSize += MB.getBufferSize();
345       // Size of the triple
346       Write8byteIntegerToBuffer(OS, T.size());
347       // Triple
348       OS << T;
349     }
350     return Error::success();
351   }
352 
353   Error WriteBundleStart(raw_fd_ostream &OS, StringRef TargetTriple) final {
354     return Error::success();
355   }
356 
357   Error WriteBundleEnd(raw_fd_ostream &OS, StringRef TargetTriple) final {
358     return Error::success();
359   }
360 
361   Error WriteBundle(raw_fd_ostream &OS, MemoryBuffer &Input) final {
362     OS.write(Input.getBufferStart(), Input.getBufferSize());
363     return Error::success();
364   }
365 };
366 
367 namespace {
368 
369 // This class implements a list of temporary files that are removed upon
370 // object destruction.
371 class TempFileHandlerRAII {
372 public:
373   ~TempFileHandlerRAII() {
374     for (const auto &File : Files)
375       sys::fs::remove(File);
376   }
377 
378   // Creates temporary file with given contents.
379   Expected<StringRef> Create(Optional<ArrayRef<char>> Contents) {
380     SmallString<128u> File;
381     if (std::error_code EC =
382             sys::fs::createTemporaryFile("clang-offload-bundler", "tmp", File))
383       return createFileError(File, EC);
384     Files.push_back(File);
385 
386     if (Contents) {
387       std::error_code EC;
388       raw_fd_ostream OS(File, EC);
389       if (EC)
390         return createFileError(File, EC);
391       OS.write(Contents->data(), Contents->size());
392     }
393     return Files.back();
394   }
395 
396 private:
397   SmallVector<SmallString<128u>, 4u> Files;
398 };
399 
400 } // end anonymous namespace
401 
402 /// Handler for object files. The bundles are organized by sections with a
403 /// designated name.
404 ///
405 /// To unbundle, we just copy the contents of the designated section.
406 class ObjectFileHandler final : public FileHandler {
407 
408   /// The object file we are currently dealing with.
409   std::unique_ptr<ObjectFile> Obj;
410 
411   /// Return the input file contents.
412   StringRef getInputFileContents() const { return Obj->getData(); }
413 
414   /// Return bundle name (<kind>-<triple>) if the provided section is an offload
415   /// section.
416   static Expected<Optional<StringRef>> IsOffloadSection(SectionRef CurSection) {
417     Expected<StringRef> NameOrErr = CurSection.getName();
418     if (!NameOrErr)
419       return NameOrErr.takeError();
420 
421     // If it does not start with the reserved suffix, just skip this section.
422     if (!NameOrErr->startswith(OFFLOAD_BUNDLER_MAGIC_STR))
423       return None;
424 
425     // Return the triple that is right after the reserved prefix.
426     return NameOrErr->substr(sizeof(OFFLOAD_BUNDLER_MAGIC_STR) - 1);
427   }
428 
429   /// Total number of inputs.
430   unsigned NumberOfInputs = 0;
431 
432   /// Total number of processed inputs, i.e, inputs that were already
433   /// read from the buffers.
434   unsigned NumberOfProcessedInputs = 0;
435 
436   /// Iterator of the current and next section.
437   section_iterator CurrentSection;
438   section_iterator NextSection;
439 
440 public:
441   ObjectFileHandler(std::unique_ptr<ObjectFile> ObjIn)
442       : FileHandler(), Obj(std::move(ObjIn)),
443         CurrentSection(Obj->section_begin()),
444         NextSection(Obj->section_begin()) {}
445 
446   ~ObjectFileHandler() final {}
447 
448   Error ReadHeader(MemoryBuffer &Input) final { return Error::success(); }
449 
450   Expected<Optional<StringRef>> ReadBundleStart(MemoryBuffer &Input) final {
451     while (NextSection != Obj->section_end()) {
452       CurrentSection = NextSection;
453       ++NextSection;
454 
455       // Check if the current section name starts with the reserved prefix. If
456       // so, return the triple.
457       Expected<Optional<StringRef>> TripleOrErr =
458           IsOffloadSection(*CurrentSection);
459       if (!TripleOrErr)
460         return TripleOrErr.takeError();
461       if (*TripleOrErr)
462         return **TripleOrErr;
463     }
464     return None;
465   }
466 
467   Error ReadBundleEnd(MemoryBuffer &Input) final { return Error::success(); }
468 
469   Error ReadBundle(raw_fd_ostream &OS, MemoryBuffer &Input) final {
470     Expected<StringRef> ContentOrErr = CurrentSection->getContents();
471     if (!ContentOrErr)
472       return ContentOrErr.takeError();
473     StringRef Content = *ContentOrErr;
474 
475     // Copy fat object contents to the output when extracting host bundle.
476     if (Content.size() == 1u && Content.front() == 0)
477       Content = StringRef(Input.getBufferStart(), Input.getBufferSize());
478 
479     OS.write(Content.data(), Content.size());
480     return Error::success();
481   }
482 
483   Error WriteHeader(raw_fd_ostream &OS,
484                     ArrayRef<std::unique_ptr<MemoryBuffer>> Inputs) final {
485     assert(HostInputIndex != ~0u && "Host input index not defined.");
486 
487     // Record number of inputs.
488     NumberOfInputs = Inputs.size();
489     return Error::success();
490   }
491 
492   Error WriteBundleStart(raw_fd_ostream &OS, StringRef TargetTriple) final {
493     ++NumberOfProcessedInputs;
494     return Error::success();
495   }
496 
497   Error WriteBundleEnd(raw_fd_ostream &OS, StringRef TargetTriple) final {
498     assert(NumberOfProcessedInputs <= NumberOfInputs &&
499            "Processing more inputs that actually exist!");
500     assert(HostInputIndex != ~0u && "Host input index not defined.");
501 
502     // If this is not the last output, we don't have to do anything.
503     if (NumberOfProcessedInputs != NumberOfInputs)
504       return Error::success();
505 
506     // We will use llvm-objcopy to add target objects sections to the output
507     // fat object. These sections should have 'exclude' flag set which tells
508     // link editor to remove them from linker inputs when linking executable or
509     // shared library. llvm-objcopy currently does not support adding new
510     // section and changing flags for the added section in one invocation, and
511     // because of that we have to run it two times. First run adds sections and
512     // the second changes flags.
513     // TODO: change it to one run once llvm-objcopy starts supporting that.
514 
515     // Find llvm-objcopy in order to create the bundle binary.
516     ErrorOr<std::string> Objcopy = sys::findProgramByName(
517         "llvm-objcopy", sys::path::parent_path(BundlerExecutable));
518     if (!Objcopy)
519       Objcopy = sys::findProgramByName("llvm-objcopy");
520     if (!Objcopy)
521       return createStringError(Objcopy.getError(),
522                                "unable to find 'llvm-objcopy' in path");
523 
524     // We write to the output file directly. So, we close it and use the name
525     // to pass down to llvm-objcopy.
526     OS.close();
527 
528     // Temporary files that need to be removed.
529     TempFileHandlerRAII TempFiles;
530 
531     // Create an intermediate temporary file to save object after the first
532     // llvm-objcopy run.
533     Expected<StringRef> IntermediateObjOrErr = TempFiles.Create(None);
534     if (!IntermediateObjOrErr)
535       return IntermediateObjOrErr.takeError();
536     StringRef IntermediateObj = *IntermediateObjOrErr;
537 
538     // Compose llvm-objcopy command line for add target objects' sections.
539     BumpPtrAllocator Alloc;
540     StringSaver SS{Alloc};
541     SmallVector<StringRef, 8u> ObjcopyArgs{"llvm-objcopy"};
542     for (unsigned I = 0; I < NumberOfInputs; ++I) {
543       StringRef InputFile = InputFileNames[I];
544       if (I == HostInputIndex) {
545         // Special handling for the host bundle. We do not need to add a
546         // standard bundle for the host object since we are going to use fat
547         // object as a host object. Therefore use dummy contents (one zero byte)
548         // when creating section for the host bundle.
549         Expected<StringRef> TempFileOrErr = TempFiles.Create(ArrayRef<char>(0));
550         if (!TempFileOrErr)
551           return TempFileOrErr.takeError();
552         InputFile = *TempFileOrErr;
553       }
554 
555       ObjcopyArgs.push_back(SS.save(Twine("--add-section=") +
556                                     OFFLOAD_BUNDLER_MAGIC_STR + TargetNames[I] +
557                                     "=" + InputFile));
558     }
559     ObjcopyArgs.push_back(InputFileNames[HostInputIndex]);
560     ObjcopyArgs.push_back(IntermediateObj);
561 
562     if (Error Err = executeObjcopy(*Objcopy, ObjcopyArgs))
563       return Err;
564 
565     // And run llvm-objcopy for the second time to update section flags.
566     ObjcopyArgs.resize(1);
567     for (unsigned I = 0; I < NumberOfInputs; ++I)
568       ObjcopyArgs.push_back(SS.save(Twine("--set-section-flags=") +
569                                     OFFLOAD_BUNDLER_MAGIC_STR + TargetNames[I] +
570                                     "=readonly,exclude"));
571     ObjcopyArgs.push_back(IntermediateObj);
572     ObjcopyArgs.push_back(OutputFileNames.front());
573 
574     if (Error Err = executeObjcopy(*Objcopy, ObjcopyArgs))
575       return Err;
576 
577     return Error::success();
578   }
579 
580   Error WriteBundle(raw_fd_ostream &OS, MemoryBuffer &Input) final {
581     return Error::success();
582   }
583 
584 private:
585   static Error executeObjcopy(StringRef Objcopy, ArrayRef<StringRef> Args) {
586     // If the user asked for the commands to be printed out, we do that
587     // instead of executing it.
588     if (PrintExternalCommands) {
589       errs() << "\"" << Objcopy << "\"";
590       for (StringRef Arg : drop_begin(Args, 1))
591         errs() << " \"" << Arg << "\"";
592       errs() << "\n";
593     } else {
594       if (sys::ExecuteAndWait(Objcopy, Args))
595         return createStringError(inconvertibleErrorCode(),
596                                  "'llvm-objcopy' tool failed");
597     }
598     return Error::success();
599   }
600 };
601 
602 /// Handler for text files. The bundled file will have the following format.
603 ///
604 /// "Comment OFFLOAD_BUNDLER_MAGIC_STR__START__ triple"
605 /// Bundle 1
606 /// "Comment OFFLOAD_BUNDLER_MAGIC_STR__END__ triple"
607 /// ...
608 /// "Comment OFFLOAD_BUNDLER_MAGIC_STR__START__ triple"
609 /// Bundle N
610 /// "Comment OFFLOAD_BUNDLER_MAGIC_STR__END__ triple"
611 class TextFileHandler final : public FileHandler {
612   /// String that begins a line comment.
613   StringRef Comment;
614 
615   /// String that initiates a bundle.
616   std::string BundleStartString;
617 
618   /// String that closes a bundle.
619   std::string BundleEndString;
620 
621   /// Number of chars read from input.
622   size_t ReadChars = 0u;
623 
624 protected:
625   Error ReadHeader(MemoryBuffer &Input) final { return Error::success(); }
626 
627   Expected<Optional<StringRef>> ReadBundleStart(MemoryBuffer &Input) final {
628     StringRef FC = Input.getBuffer();
629 
630     // Find start of the bundle.
631     ReadChars = FC.find(BundleStartString, ReadChars);
632     if (ReadChars == FC.npos)
633       return None;
634 
635     // Get position of the triple.
636     size_t TripleStart = ReadChars = ReadChars + BundleStartString.size();
637 
638     // Get position that closes the triple.
639     size_t TripleEnd = ReadChars = FC.find("\n", ReadChars);
640     if (TripleEnd == FC.npos)
641       return None;
642 
643     // Next time we read after the new line.
644     ++ReadChars;
645 
646     return StringRef(&FC.data()[TripleStart], TripleEnd - TripleStart);
647   }
648 
649   Error ReadBundleEnd(MemoryBuffer &Input) final {
650     StringRef FC = Input.getBuffer();
651 
652     // Read up to the next new line.
653     assert(FC[ReadChars] == '\n' && "The bundle should end with a new line.");
654 
655     size_t TripleEnd = ReadChars = FC.find("\n", ReadChars + 1);
656     if (TripleEnd != FC.npos)
657       // Next time we read after the new line.
658       ++ReadChars;
659 
660     return Error::success();
661   }
662 
663   Error ReadBundle(raw_fd_ostream &OS, MemoryBuffer &Input) final {
664     StringRef FC = Input.getBuffer();
665     size_t BundleStart = ReadChars;
666 
667     // Find end of the bundle.
668     size_t BundleEnd = ReadChars = FC.find(BundleEndString, ReadChars);
669 
670     StringRef Bundle(&FC.data()[BundleStart], BundleEnd - BundleStart);
671     OS << Bundle;
672 
673     return Error::success();
674   }
675 
676   Error WriteHeader(raw_fd_ostream &OS,
677                     ArrayRef<std::unique_ptr<MemoryBuffer>> Inputs) final {
678     return Error::success();
679   }
680 
681   Error WriteBundleStart(raw_fd_ostream &OS, StringRef TargetTriple) final {
682     OS << BundleStartString << TargetTriple << "\n";
683     return Error::success();
684   }
685 
686   Error WriteBundleEnd(raw_fd_ostream &OS, StringRef TargetTriple) final {
687     OS << BundleEndString << TargetTriple << "\n";
688     return Error::success();
689   }
690 
691   Error WriteBundle(raw_fd_ostream &OS, MemoryBuffer &Input) final {
692     OS << Input.getBuffer();
693     return Error::success();
694   }
695 
696 public:
697   TextFileHandler(StringRef Comment)
698       : FileHandler(), Comment(Comment), ReadChars(0) {
699     BundleStartString =
700         "\n" + Comment.str() + " " OFFLOAD_BUNDLER_MAGIC_STR "__START__ ";
701     BundleEndString =
702         "\n" + Comment.str() + " " OFFLOAD_BUNDLER_MAGIC_STR "__END__ ";
703   }
704 };
705 
706 /// Return an appropriate object file handler. We use the specific object
707 /// handler if we know how to deal with that format, otherwise we use a default
708 /// binary file handler.
709 static std::unique_ptr<FileHandler>
710 CreateObjectFileHandler(MemoryBuffer &FirstInput) {
711   // Check if the input file format is one that we know how to deal with.
712   Expected<std::unique_ptr<Binary>> BinaryOrErr = createBinary(FirstInput);
713 
714   // We only support regular object files. If failed to open the input as a
715   // known binary or this is not an object file use the default binary handler.
716   if (errorToBool(BinaryOrErr.takeError()) || !isa<ObjectFile>(*BinaryOrErr))
717     return std::make_unique<BinaryFileHandler>();
718 
719   // Otherwise create an object file handler. The handler will be owned by the
720   // client of this function.
721   return std::make_unique<ObjectFileHandler>(
722       std::unique_ptr<ObjectFile>(cast<ObjectFile>(BinaryOrErr->release())));
723 }
724 
725 /// Return an appropriate handler given the input files and options.
726 static Expected<std::unique_ptr<FileHandler>>
727 CreateFileHandler(MemoryBuffer &FirstInput) {
728   if (FilesType == "i")
729     return std::make_unique<TextFileHandler>(/*Comment=*/"//");
730   if (FilesType == "ii")
731     return std::make_unique<TextFileHandler>(/*Comment=*/"//");
732   if (FilesType == "cui")
733     return std::make_unique<TextFileHandler>(/*Comment=*/"//");
734   // TODO: `.d` should be eventually removed once `-M` and its variants are
735   // handled properly in offload compilation.
736   if (FilesType == "d")
737     return std::make_unique<TextFileHandler>(/*Comment=*/"#");
738   if (FilesType == "ll")
739     return std::make_unique<TextFileHandler>(/*Comment=*/";");
740   if (FilesType == "bc")
741     return std::make_unique<BinaryFileHandler>();
742   if (FilesType == "s")
743     return std::make_unique<TextFileHandler>(/*Comment=*/"#");
744   if (FilesType == "o")
745     return CreateObjectFileHandler(FirstInput);
746   if (FilesType == "gch")
747     return std::make_unique<BinaryFileHandler>();
748   if (FilesType == "ast")
749     return std::make_unique<BinaryFileHandler>();
750 
751   return createStringError(errc::invalid_argument,
752                            "'" + FilesType + "': invalid file type specified");
753 }
754 
755 /// Bundle the files. Return true if an error was found.
756 static Error BundleFiles() {
757   std::error_code EC;
758 
759   // Create output file.
760   raw_fd_ostream OutputFile(OutputFileNames.front(), EC, sys::fs::OF_None);
761   if (EC)
762     return createFileError(OutputFileNames.front(), EC);
763 
764   // Open input files.
765   SmallVector<std::unique_ptr<MemoryBuffer>, 8u> InputBuffers;
766   InputBuffers.reserve(InputFileNames.size());
767   for (auto &I : InputFileNames) {
768     ErrorOr<std::unique_ptr<MemoryBuffer>> CodeOrErr =
769         MemoryBuffer::getFileOrSTDIN(I);
770     if (std::error_code EC = CodeOrErr.getError())
771       return createFileError(I, EC);
772     InputBuffers.emplace_back(std::move(*CodeOrErr));
773   }
774 
775   // Get the file handler. We use the host buffer as reference.
776   assert(HostInputIndex != ~0u && "Host input index undefined??");
777   Expected<std::unique_ptr<FileHandler>> FileHandlerOrErr =
778       CreateFileHandler(*InputBuffers[HostInputIndex]);
779   if (!FileHandlerOrErr)
780     return FileHandlerOrErr.takeError();
781 
782   std::unique_ptr<FileHandler> &FH = *FileHandlerOrErr;
783   assert(FH);
784 
785   // Write header.
786   if (Error Err = FH->WriteHeader(OutputFile, InputBuffers))
787     return Err;
788 
789   // Write all bundles along with the start/end markers. If an error was found
790   // writing the end of the bundle component, abort the bundle writing.
791   auto Input = InputBuffers.begin();
792   for (auto &Triple : TargetNames) {
793     if (Error Err = FH->WriteBundleStart(OutputFile, Triple))
794       return Err;
795     if (Error Err = FH->WriteBundle(OutputFile, **Input))
796       return Err;
797     if (Error Err = FH->WriteBundleEnd(OutputFile, Triple))
798       return Err;
799     ++Input;
800   }
801   return Error::success();
802 }
803 
804 // Unbundle the files. Return true if an error was found.
805 static Error UnbundleFiles() {
806   // Open Input file.
807   ErrorOr<std::unique_ptr<MemoryBuffer>> CodeOrErr =
808       MemoryBuffer::getFileOrSTDIN(InputFileNames.front());
809   if (std::error_code EC = CodeOrErr.getError())
810     return createFileError(InputFileNames.front(), EC);
811 
812   MemoryBuffer &Input = **CodeOrErr;
813 
814   // Select the right files handler.
815   Expected<std::unique_ptr<FileHandler>> FileHandlerOrErr =
816       CreateFileHandler(Input);
817   if (!FileHandlerOrErr)
818     return FileHandlerOrErr.takeError();
819 
820   std::unique_ptr<FileHandler> &FH = *FileHandlerOrErr;
821   assert(FH);
822 
823   // Read the header of the bundled file.
824   if (Error Err = FH->ReadHeader(Input))
825     return Err;
826 
827   // Create a work list that consist of the map triple/output file.
828   StringMap<StringRef> Worklist;
829   auto Output = OutputFileNames.begin();
830   for (auto &Triple : TargetNames) {
831     Worklist[Triple] = *Output;
832     ++Output;
833   }
834 
835   // Read all the bundles that are in the work list. If we find no bundles we
836   // assume the file is meant for the host target.
837   bool FoundHostBundle = false;
838   while (!Worklist.empty()) {
839     Expected<Optional<StringRef>> CurTripleOrErr = FH->ReadBundleStart(Input);
840     if (!CurTripleOrErr)
841       return CurTripleOrErr.takeError();
842 
843     // We don't have more bundles.
844     if (!*CurTripleOrErr)
845       break;
846 
847     StringRef CurTriple = **CurTripleOrErr;
848     assert(!CurTriple.empty());
849 
850     auto Output = Worklist.find(CurTriple);
851     // The file may have more bundles for other targets, that we don't care
852     // about. Therefore, move on to the next triple
853     if (Output == Worklist.end())
854       continue;
855 
856     // Check if the output file can be opened and copy the bundle to it.
857     std::error_code EC;
858     raw_fd_ostream OutputFile(Output->second, EC, sys::fs::OF_None);
859     if (EC)
860       return createFileError(Output->second, EC);
861     if (Error Err = FH->ReadBundle(OutputFile, Input))
862       return Err;
863     if (Error Err = FH->ReadBundleEnd(Input))
864       return Err;
865     Worklist.erase(Output);
866 
867     // Record if we found the host bundle.
868     if (hasHostKind(CurTriple))
869       FoundHostBundle = true;
870   }
871 
872   // If no bundles were found, assume the input file is the host bundle and
873   // create empty files for the remaining targets.
874   if (Worklist.size() == TargetNames.size()) {
875     for (auto &E : Worklist) {
876       std::error_code EC;
877       raw_fd_ostream OutputFile(E.second, EC, sys::fs::OF_None);
878       if (EC)
879         return createFileError(E.second, EC);
880 
881       // If this entry has a host kind, copy the input file to the output file.
882       if (hasHostKind(E.first()))
883         OutputFile.write(Input.getBufferStart(), Input.getBufferSize());
884     }
885     return Error::success();
886   }
887 
888   // If we found elements, we emit an error if none of those were for the host
889   // in case host bundle name was provided in command line.
890   if (!FoundHostBundle && HostInputIndex != ~0u)
891     return createStringError(inconvertibleErrorCode(),
892                              "Can't find bundle for the host target");
893 
894   // If we still have any elements in the worklist, create empty files for them.
895   for (auto &E : Worklist) {
896     std::error_code EC;
897     raw_fd_ostream OutputFile(E.second, EC, sys::fs::OF_None);
898     if (EC)
899       return createFileError(E.second, EC);
900   }
901 
902   return Error::success();
903 }
904 
905 static void PrintVersion(raw_ostream &OS) {
906   OS << clang::getClangToolFullVersion("clang-offload-bundler") << '\n';
907 }
908 
909 int main(int argc, const char **argv) {
910   sys::PrintStackTraceOnErrorSignal(argv[0]);
911 
912   cl::HideUnrelatedOptions(ClangOffloadBundlerCategory);
913   cl::SetVersionPrinter(PrintVersion);
914   cl::ParseCommandLineOptions(
915       argc, argv,
916       "A tool to bundle several input files of the specified type <type> \n"
917       "referring to the same source file but different targets into a single \n"
918       "one. The resulting file can also be unbundled into different files by \n"
919       "this tool if -unbundle is provided.\n");
920 
921   if (Help) {
922     cl::PrintHelpMessage();
923     return 0;
924   }
925 
926   auto reportError = [argv](Error E) {
927     logAllUnhandledErrors(std::move(E), WithColor::error(errs(), argv[0]));
928   };
929 
930   bool Error = false;
931   if (Unbundle) {
932     if (InputFileNames.size() != 1) {
933       Error = true;
934       reportError(createStringError(
935           errc::invalid_argument,
936           "only one input file supported in unbundling mode"));
937     }
938     if (OutputFileNames.size() != TargetNames.size()) {
939       Error = true;
940       reportError(createStringError(errc::invalid_argument,
941                                     "number of output files and targets should "
942                                     "match in unbundling mode"));
943     }
944   } else {
945     if (OutputFileNames.size() != 1) {
946       Error = true;
947       reportError(createStringError(
948           errc::invalid_argument,
949           "only one output file supported in bundling mode"));
950     }
951     if (InputFileNames.size() != TargetNames.size()) {
952       Error = true;
953       reportError(createStringError(
954           errc::invalid_argument,
955           "number of input files and targets should match in bundling mode"));
956     }
957   }
958 
959   // Verify that the offload kinds and triples are known. We also check that we
960   // have exactly one host target.
961   unsigned Index = 0u;
962   unsigned HostTargetNum = 0u;
963   for (StringRef Target : TargetNames) {
964     StringRef Kind;
965     StringRef Triple;
966     getOffloadKindAndTriple(Target, Kind, Triple);
967 
968     bool KindIsValid = !Kind.empty();
969     KindIsValid = KindIsValid && StringSwitch<bool>(Kind)
970                                      .Case("host", true)
971                                      .Case("openmp", true)
972                                      .Case("hip", true)
973                                      .Default(false);
974 
975     bool TripleIsValid = !Triple.empty();
976     llvm::Triple T(Triple);
977     TripleIsValid &= T.getArch() != Triple::UnknownArch;
978 
979     if (!KindIsValid || !TripleIsValid) {
980       Error = true;
981 
982       SmallVector<char, 128u> Buf;
983       raw_svector_ostream Msg(Buf);
984       Msg << "invalid target '" << Target << "'";
985       if (!KindIsValid)
986         Msg << ", unknown offloading kind '" << Kind << "'";
987       if (!TripleIsValid)
988         Msg << ", unknown target triple '" << Triple << "'";
989       reportError(createStringError(errc::invalid_argument, Msg.str()));
990     }
991 
992     if (KindIsValid && Kind == "host") {
993       ++HostTargetNum;
994       // Save the index of the input that refers to the host.
995       HostInputIndex = Index;
996     }
997 
998     ++Index;
999   }
1000 
1001   // Host triple is not really needed for unbundling operation, so do not
1002   // treat missing host triple as error if we do unbundling.
1003   if ((Unbundle && HostTargetNum > 1) || (!Unbundle && HostTargetNum != 1)) {
1004     Error = true;
1005     reportError(createStringError(errc::invalid_argument,
1006                                   "expecting exactly one host target but got " +
1007                                       Twine(HostTargetNum)));
1008   }
1009 
1010   if (Error)
1011     return 1;
1012 
1013   // Save the current executable directory as it will be useful to find other
1014   // tools.
1015   BundlerExecutable = sys::fs::getMainExecutable(argv[0], &BundlerExecutable);
1016 
1017   if (llvm::Error Err = Unbundle ? UnbundleFiles() : BundleFiles()) {
1018     reportError(std::move(Err));
1019     return 1;
1020   }
1021   return 0;
1022 }
1023