1 //===- InputFiles.h ---------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLD_MACHO_INPUT_FILES_H
10 #define LLD_MACHO_INPUT_FILES_H
11 
12 #include "MachOStructs.h"
13 #include "Target.h"
14 
15 #include "lld/Common/LLVM.h"
16 #include "lld/Common/Memory.h"
17 #include "llvm/ADT/DenseSet.h"
18 #include "llvm/ADT/SetVector.h"
19 #include "llvm/BinaryFormat/MachO.h"
20 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
21 #include "llvm/Object/Archive.h"
22 #include "llvm/Support/MemoryBuffer.h"
23 #include "llvm/TextAPI/TextAPIReader.h"
24 
25 #include <vector>
26 
27 namespace llvm {
28 namespace lto {
29 class InputFile;
30 } // namespace lto
31 namespace MachO {
32 class InterfaceFile;
33 } // namespace MachO
34 class TarWriter;
35 } // namespace llvm
36 
37 namespace lld {
38 namespace macho {
39 
40 struct PlatformInfo;
41 class ConcatInputSection;
42 class Symbol;
43 struct Reloc;
44 enum class RefState : uint8_t;
45 
46 // If --reproduce option is given, all input files are written
47 // to this tar archive.
48 extern std::unique_ptr<llvm::TarWriter> tar;
49 
50 // If .subsections_via_symbols is set, each InputSection will be split along
51 // symbol boundaries. The field offset represents the offset of the subsection
52 // from the start of the original pre-split InputSection.
53 struct SubsectionEntry {
54   uint64_t offset;
55   InputSection *isec;
56 };
57 using SubsectionMap = std::vector<SubsectionEntry>;
58 
59 class InputFile {
60 public:
61   enum Kind {
62     ObjKind,
63     OpaqueKind,
64     DylibKind,
65     ArchiveKind,
66     BitcodeKind,
67   };
68 
69   virtual ~InputFile() = default;
70   Kind kind() const { return fileKind; }
71   StringRef getName() const { return name; }
72 
73   MemoryBufferRef mb;
74 
75   std::vector<Symbol *> symbols;
76   std::vector<SubsectionMap> subsections;
77   // Provides an easy way to sort InputFiles deterministically.
78   const int id;
79 
80   // If not empty, this stores the name of the archive containing this file.
81   // We use this string for creating error messages.
82   std::string archiveName;
83 
84 protected:
85   InputFile(Kind kind, MemoryBufferRef mb)
86       : mb(mb), id(idCount++), fileKind(kind), name(mb.getBufferIdentifier()) {}
87 
88   InputFile(Kind, const llvm::MachO::InterfaceFile &);
89 
90 private:
91   const Kind fileKind;
92   const StringRef name;
93 
94   static int idCount;
95 };
96 
97 // .o file
98 class ObjFile final : public InputFile {
99 public:
100   ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName);
101   static bool classof(const InputFile *f) { return f->kind() == ObjKind; }
102 
103   llvm::DWARFUnit *compileUnit = nullptr;
104   const uint32_t modTime;
105   std::vector<ConcatInputSection *> debugSections;
106   ArrayRef<llvm::MachO::data_in_code_entry> dataInCodeEntries;
107 
108 private:
109   template <class LP> void parse();
110   template <class Section> void parseSections(ArrayRef<Section>);
111   template <class LP>
112   void parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
113                     ArrayRef<typename LP::nlist> nList, const char *strtab,
114                     bool subsectionsViaSymbols);
115   template <class NList>
116   Symbol *parseNonSectionSymbol(const NList &sym, StringRef name);
117   template <class Section>
118   void parseRelocations(ArrayRef<Section> sectionHeaders, const Section &,
119                         SubsectionMap &);
120   void parseDebugInfo();
121   void parseDataInCode();
122 };
123 
124 // command-line -sectcreate file
125 class OpaqueFile final : public InputFile {
126 public:
127   OpaqueFile(MemoryBufferRef mb, StringRef segName, StringRef sectName);
128   static bool classof(const InputFile *f) { return f->kind() == OpaqueKind; }
129 };
130 
131 // .dylib or .tbd file
132 class DylibFile final : public InputFile {
133 public:
134   // Mach-O dylibs can re-export other dylibs as sub-libraries, meaning that the
135   // symbols in those sub-libraries will be available under the umbrella
136   // library's namespace. Those sub-libraries can also have their own
137   // re-exports. When loading a re-exported dylib, `umbrella` should be set to
138   // the root dylib to ensure symbols in the child library are correctly bound
139   // to the root. On the other hand, if a dylib is being directly loaded
140   // (through an -lfoo flag), then `umbrella` should be a nullptr.
141   explicit DylibFile(MemoryBufferRef mb, DylibFile *umbrella,
142                      bool isBundleLoader = false);
143   explicit DylibFile(const llvm::MachO::InterfaceFile &interface,
144                      DylibFile *umbrella = nullptr,
145                      bool isBundleLoader = false);
146 
147   void parseLoadCommands(MemoryBufferRef mb);
148   void parseReexports(const llvm::MachO::InterfaceFile &interface);
149 
150   static bool classof(const InputFile *f) { return f->kind() == DylibKind; }
151 
152   StringRef installName;
153   DylibFile *exportingFile = nullptr;
154   DylibFile *umbrella;
155   SmallVector<StringRef, 2> rpaths;
156   uint32_t compatibilityVersion = 0;
157   uint32_t currentVersion = 0;
158   int64_t ordinal = 0; // Ordinal numbering starts from 1, so 0 is a sentinel
159   RefState refState;
160   bool reexport = false;
161   bool forceNeeded = false;
162   bool forceWeakImport = false;
163   bool deadStrippable = false;
164   bool explicitlyLinked = false;
165 
166   unsigned numReferencedSymbols = 0;
167 
168   bool isReferenced() const { return numReferencedSymbols > 0; }
169 
170   // An executable can be used as a bundle loader that will load the output
171   // file being linked, and that contains symbols referenced, but not
172   // implemented in the bundle. When used like this, it is very similar
173   // to a Dylib, so we re-used the same class to represent it.
174   bool isBundleLoader;
175 
176 private:
177   bool handleLDSymbol(StringRef originalName);
178   void handleLDPreviousSymbol(StringRef name, StringRef originalName);
179   void handleLDInstallNameSymbol(StringRef name, StringRef originalName);
180   void checkAppExtensionSafety(bool dylibIsAppExtensionSafe) const;
181 };
182 
183 // .a file
184 class ArchiveFile final : public InputFile {
185 public:
186   explicit ArchiveFile(std::unique_ptr<llvm::object::Archive> &&file);
187   static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
188   void fetch(const llvm::object::Archive::Symbol &sym);
189 
190 private:
191   std::unique_ptr<llvm::object::Archive> file;
192   // Keep track of children fetched from the archive by tracking
193   // which address offsets have been fetched already.
194   llvm::DenseSet<uint64_t> seen;
195 };
196 
197 class BitcodeFile final : public InputFile {
198 public:
199   explicit BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
200                        uint64_t offsetInArchive);
201   static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
202 
203   std::unique_ptr<llvm::lto::InputFile> obj;
204 };
205 
206 extern llvm::SetVector<InputFile *> inputFiles;
207 
208 llvm::Optional<MemoryBufferRef> readFile(StringRef path);
209 
210 namespace detail {
211 
212 template <class CommandType, class... Types>
213 std::vector<const CommandType *>
214 findCommands(const void *anyHdr, size_t maxCommands, Types... types) {
215   std::vector<const CommandType *> cmds;
216   std::initializer_list<uint32_t> typesList{types...};
217   const auto *hdr = reinterpret_cast<const llvm::MachO::mach_header *>(anyHdr);
218   const uint8_t *p =
219       reinterpret_cast<const uint8_t *>(hdr) + target->headerSize;
220   for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) {
221     auto *cmd = reinterpret_cast<const CommandType *>(p);
222     if (llvm::is_contained(typesList, cmd->cmd)) {
223       cmds.push_back(cmd);
224       if (cmds.size() == maxCommands)
225         return cmds;
226     }
227     p += cmd->cmdsize;
228   }
229   return cmds;
230 }
231 
232 } // namespace detail
233 
234 // anyHdr should be a pointer to either mach_header or mach_header_64
235 template <class CommandType = llvm::MachO::load_command, class... Types>
236 const CommandType *findCommand(const void *anyHdr, Types... types) {
237   std::vector<const CommandType *> cmds =
238       detail::findCommands<CommandType>(anyHdr, 1, types...);
239   return cmds.size() ? cmds[0] : nullptr;
240 }
241 
242 template <class CommandType = llvm::MachO::load_command, class... Types>
243 std::vector<const CommandType *> findCommands(const void *anyHdr,
244                                               Types... types) {
245   return detail::findCommands<CommandType>(anyHdr, 0, types...);
246 }
247 
248 } // namespace macho
249 
250 std::string toString(const macho::InputFile *file);
251 } // namespace lld
252 
253 #endif
254