1 //===- InputFiles.h ---------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLD_MACHO_INPUT_FILES_H
10 #define LLD_MACHO_INPUT_FILES_H
11 
12 #include "MachOStructs.h"
13 #include "Target.h"
14 
15 #include "lld/Common/DWARF.h"
16 #include "lld/Common/LLVM.h"
17 #include "lld/Common/Memory.h"
18 #include "llvm/ADT/CachedHashString.h"
19 #include "llvm/ADT/DenseSet.h"
20 #include "llvm/ADT/SetVector.h"
21 #include "llvm/BinaryFormat/MachO.h"
22 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
23 #include "llvm/Object/Archive.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/Threading.h"
26 #include "llvm/TextAPI/TextAPIReader.h"
27 
28 #include <vector>
29 
30 namespace llvm {
31 namespace lto {
32 class InputFile;
33 } // namespace lto
34 namespace MachO {
35 class InterfaceFile;
36 } // namespace MachO
37 class TarWriter;
38 } // namespace llvm
39 
40 namespace lld {
41 namespace macho {
42 
43 struct PlatformInfo;
44 class ConcatInputSection;
45 class Symbol;
46 class Defined;
47 class AliasSymbol;
48 struct Reloc;
49 enum class RefState : uint8_t;
50 
51 // If --reproduce option is given, all input files are written
52 // to this tar archive.
53 extern std::unique_ptr<llvm::TarWriter> tar;
54 
55 // If .subsections_via_symbols is set, each InputSection will be split along
56 // symbol boundaries. The field offset represents the offset of the subsection
57 // from the start of the original pre-split InputSection.
58 struct Subsection {
59   uint64_t offset = 0;
60   InputSection *isec = nullptr;
61 };
62 
63 using Subsections = std::vector<Subsection>;
64 class InputFile;
65 
66 class Section {
67 public:
68   InputFile *file;
69   StringRef segname;
70   StringRef name;
71   uint32_t flags;
72   uint64_t addr;
73   Subsections subsections;
74 
Section(InputFile * file,StringRef segname,StringRef name,uint32_t flags,uint64_t addr)75   Section(InputFile *file, StringRef segname, StringRef name, uint32_t flags,
76           uint64_t addr)
77       : file(file), segname(segname), name(name), flags(flags), addr(addr) {}
78   // Ensure pointers to Sections are never invalidated.
79   Section(const Section &) = delete;
80   Section &operator=(const Section &) = delete;
81   Section(Section &&) = delete;
82   Section &operator=(Section &&) = delete;
83 
84 private:
85   // Whether we have already split this section into individual subsections.
86   // For sections that cannot be split (e.g. literal sections), this is always
87   // false.
88   bool doneSplitting = false;
89   friend class ObjFile;
90 };
91 
92 // Represents a call graph profile edge.
93 struct CallGraphEntry {
94   // The index of the caller in the symbol table.
95   uint32_t fromIndex;
96   // The index of the callee in the symbol table.
97   uint32_t toIndex;
98   // Number of calls from callee to caller in the profile.
99   uint64_t count;
100 
CallGraphEntryCallGraphEntry101   CallGraphEntry(uint32_t fromIndex, uint32_t toIndex, uint64_t count)
102       : fromIndex(fromIndex), toIndex(toIndex), count(count) {}
103 };
104 
105 class InputFile {
106 public:
107   enum Kind {
108     ObjKind,
109     OpaqueKind,
110     DylibKind,
111     ArchiveKind,
112     BitcodeKind,
113   };
114 
115   virtual ~InputFile() = default;
kind()116   Kind kind() const { return fileKind; }
getName()117   StringRef getName() const { return name; }
resetIdCount()118   static void resetIdCount() { idCount = 0; }
119 
120   MemoryBufferRef mb;
121 
122   std::vector<Symbol *> symbols;
123   std::vector<Section *> sections;
124   ArrayRef<uint8_t> objCImageInfo;
125 
126   // If not empty, this stores the name of the archive containing this file.
127   // We use this string for creating error messages.
128   std::string archiveName;
129 
130   // Provides an easy way to sort InputFiles deterministically.
131   const int id;
132 
133   // True if this is a lazy ObjFile or BitcodeFile.
134   bool lazy = false;
135 
136 protected:
137   InputFile(Kind kind, MemoryBufferRef mb, bool lazy = false)
mb(mb)138       : mb(mb), id(idCount++), lazy(lazy), fileKind(kind),
139         name(mb.getBufferIdentifier()) {}
140 
141   InputFile(Kind, const llvm::MachO::InterfaceFile &);
142 
143   // If true, this input's arch is compatible with target.
144   bool compatArch = true;
145 
146 private:
147   const Kind fileKind;
148   const StringRef name;
149 
150   static int idCount;
151 };
152 
153 struct FDE {
154   uint32_t funcLength;
155   Symbol *personality;
156   InputSection *lsda;
157 };
158 
159 // .o file
160 class ObjFile final : public InputFile {
161 public:
162   ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
163           bool lazy = false, bool forceHidden = false, bool compatArch = true,
164           bool builtFromBitcode = false);
165   ArrayRef<llvm::MachO::data_in_code_entry> getDataInCode() const;
166   ArrayRef<uint8_t> getOptimizationHints() const;
167   template <class LP> void parse();
168   template <class LP>
169   void parseLinkerOptions(llvm::SmallVectorImpl<StringRef> &LinkerOptions);
170 
classof(const InputFile * f)171   static bool classof(const InputFile *f) { return f->kind() == ObjKind; }
172 
173   std::string sourceFile() const;
174   // Parses line table information for diagnostics. compileUnit should be used
175   // for other purposes.
176   lld::DWARFCache *getDwarf();
177 
178   llvm::DWARFUnit *compileUnit = nullptr;
179   std::unique_ptr<lld::DWARFCache> dwarfCache;
180   Section *addrSigSection = nullptr;
181   const uint32_t modTime;
182   bool forceHidden;
183   bool builtFromBitcode;
184   std::vector<ConcatInputSection *> debugSections;
185   std::vector<CallGraphEntry> callGraph;
186   llvm::DenseMap<ConcatInputSection *, FDE> fdes;
187   std::vector<AliasSymbol *> aliases;
188 
189 private:
190   llvm::once_flag initDwarf;
191   template <class LP> void parseLazy();
192   template <class SectionHeader> void parseSections(ArrayRef<SectionHeader>);
193   template <class LP>
194   void parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
195                     ArrayRef<typename LP::nlist> nList, const char *strtab,
196                     bool subsectionsViaSymbols);
197   template <class NList>
198   Symbol *parseNonSectionSymbol(const NList &sym, const char *strtab);
199   template <class SectionHeader>
200   void parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
201                         const SectionHeader &, Section &);
202   void parseDebugInfo();
203   void splitEhFrames(ArrayRef<uint8_t> dataArr, Section &ehFrameSection);
204   void registerCompactUnwind(Section &compactUnwindSection);
205   void registerEhFrames(Section &ehFrameSection);
206 };
207 
208 // command-line -sectcreate file
209 class OpaqueFile final : public InputFile {
210 public:
211   OpaqueFile(MemoryBufferRef mb, StringRef segName, StringRef sectName);
classof(const InputFile * f)212   static bool classof(const InputFile *f) { return f->kind() == OpaqueKind; }
213 };
214 
215 // .dylib or .tbd file
216 class DylibFile final : public InputFile {
217 public:
218   // Mach-O dylibs can re-export other dylibs as sub-libraries, meaning that the
219   // symbols in those sub-libraries will be available under the umbrella
220   // library's namespace. Those sub-libraries can also have their own
221   // re-exports. When loading a re-exported dylib, `umbrella` should be set to
222   // the root dylib to ensure symbols in the child library are correctly bound
223   // to the root. On the other hand, if a dylib is being directly loaded
224   // (through an -lfoo flag), then `umbrella` should be a nullptr.
225   explicit DylibFile(MemoryBufferRef mb, DylibFile *umbrella,
226                      bool isBundleLoader, bool explicitlyLinked);
227   explicit DylibFile(const llvm::MachO::InterfaceFile &interface,
228                      DylibFile *umbrella, bool isBundleLoader,
229                      bool explicitlyLinked);
230   explicit DylibFile(DylibFile *umbrella);
231 
232   void parseLoadCommands(MemoryBufferRef mb);
233   void parseReexports(const llvm::MachO::InterfaceFile &interface);
isReferenced()234   bool isReferenced() const { return numReferencedSymbols > 0; }
235   bool isExplicitlyLinked() const;
setExplicitlyLinked()236   void setExplicitlyLinked() { explicitlyLinked = true; }
237 
classof(const InputFile * f)238   static bool classof(const InputFile *f) { return f->kind() == DylibKind; }
239 
240   StringRef installName;
241   DylibFile *exportingFile = nullptr;
242   DylibFile *umbrella;
243   SmallVector<StringRef, 2> rpaths;
244   uint32_t compatibilityVersion = 0;
245   uint32_t currentVersion = 0;
246   int64_t ordinal = 0; // Ordinal numbering starts from 1, so 0 is a sentinel
247   unsigned numReferencedSymbols = 0;
248   RefState refState;
249   bool reexport = false;
250   bool forceNeeded = false;
251   bool forceWeakImport = false;
252   bool deadStrippable = false;
253 
254 private:
255   bool explicitlyLinked = false; // Access via isExplicitlyLinked().
256 
257 public:
258   // An executable can be used as a bundle loader that will load the output
259   // file being linked, and that contains symbols referenced, but not
260   // implemented in the bundle. When used like this, it is very similar
261   // to a dylib, so we've used the same class to represent it.
262   bool isBundleLoader;
263 
264   // Synthetic Dylib objects created by $ld$previous symbols in this dylib.
265   // Usually empty. These synthetic dylibs won't have synthetic dylibs
266   // themselves.
267   SmallVector<DylibFile *, 2> extraDylibs;
268 
269 private:
270   DylibFile *getSyntheticDylib(StringRef installName, uint32_t currentVersion,
271                                uint32_t compatVersion);
272 
273   bool handleLDSymbol(StringRef originalName);
274   void handleLDPreviousSymbol(StringRef name, StringRef originalName);
275   void handleLDInstallNameSymbol(StringRef name, StringRef originalName);
276   void handleLDHideSymbol(StringRef name, StringRef originalName);
277   void checkAppExtensionSafety(bool dylibIsAppExtensionSafe) const;
278   void parseExportedSymbols(uint32_t offset, uint32_t size);
279   void loadReexport(StringRef path, DylibFile *umbrella,
280                     const llvm::MachO::InterfaceFile *currentTopLevelTapi);
281 
282   llvm::DenseSet<llvm::CachedHashStringRef> hiddenSymbols;
283 };
284 
285 // .a file
286 class ArchiveFile final : public InputFile {
287 public:
288   explicit ArchiveFile(std::unique_ptr<llvm::object::Archive> &&file,
289                        bool forceHidden);
290   void addLazySymbols();
291   void fetch(const llvm::object::Archive::Symbol &);
292   // LLD normally doesn't use Error for error-handling, but the underlying
293   // Archive library does, so this is the cleanest way to wrap it.
294   Error fetch(const llvm::object::Archive::Child &, StringRef reason);
getArchive()295   const llvm::object::Archive &getArchive() const { return *file; };
classof(const InputFile * f)296   static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
297 
298 private:
299   std::unique_ptr<llvm::object::Archive> file;
300   // Keep track of children fetched from the archive by tracking
301   // which address offsets have been fetched already.
302   llvm::DenseSet<uint64_t> seen;
303   // Load all symbols with hidden visibility (-load_hidden).
304   bool forceHidden;
305 };
306 
307 class BitcodeFile final : public InputFile {
308 public:
309   explicit BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
310                        uint64_t offsetInArchive, bool lazy = false,
311                        bool forceHidden = false, bool compatArch = true);
classof(const InputFile * f)312   static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
313   void parse();
314 
315   std::unique_ptr<llvm::lto::InputFile> obj;
316   bool forceHidden;
317 
318 private:
319   void parseLazy();
320 };
321 
322 extern llvm::SetVector<InputFile *> inputFiles;
323 extern llvm::DenseMap<llvm::CachedHashStringRef, MemoryBufferRef> cachedReads;
324 extern llvm::SmallVector<StringRef> unprocessedLCLinkerOptions;
325 
326 std::optional<MemoryBufferRef> readFile(StringRef path);
327 
328 void extract(InputFile &file, StringRef reason);
329 
330 namespace detail {
331 
332 template <class CommandType, class... Types>
333 std::vector<const CommandType *>
findCommands(const void * anyHdr,size_t maxCommands,Types...types)334 findCommands(const void *anyHdr, size_t maxCommands, Types... types) {
335   std::vector<const CommandType *> cmds;
336   std::initializer_list<uint32_t> typesList{types...};
337   const auto *hdr = reinterpret_cast<const llvm::MachO::mach_header *>(anyHdr);
338   const uint8_t *p =
339       reinterpret_cast<const uint8_t *>(hdr) + target->headerSize;
340   for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) {
341     auto *cmd = reinterpret_cast<const CommandType *>(p);
342     if (llvm::is_contained(typesList, cmd->cmd)) {
343       cmds.push_back(cmd);
344       if (cmds.size() == maxCommands)
345         return cmds;
346     }
347     p += cmd->cmdsize;
348   }
349   return cmds;
350 }
351 
352 } // namespace detail
353 
354 // anyHdr should be a pointer to either mach_header or mach_header_64
355 template <class CommandType = llvm::MachO::load_command, class... Types>
findCommand(const void * anyHdr,Types...types)356 const CommandType *findCommand(const void *anyHdr, Types... types) {
357   std::vector<const CommandType *> cmds =
358       detail::findCommands<CommandType>(anyHdr, 1, types...);
359   return cmds.size() ? cmds[0] : nullptr;
360 }
361 
362 template <class CommandType = llvm::MachO::load_command, class... Types>
findCommands(const void * anyHdr,Types...types)363 std::vector<const CommandType *> findCommands(const void *anyHdr,
364                                               Types... types) {
365   return detail::findCommands<CommandType>(anyHdr, 0, types...);
366 }
367 
368 std::string replaceThinLTOSuffix(StringRef path);
369 } // namespace macho
370 
371 std::string toString(const macho::InputFile *file);
372 std::string toString(const macho::Section &);
373 } // namespace lld
374 
375 #endif
376