xref: /openbsd/gnu/llvm/lld/MachO/InputFiles.h (revision dfe94b16)
1bb684c34Spatrick //===- InputFiles.h ---------------------------------------------*- C++ -*-===//
2bb684c34Spatrick //
3bb684c34Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4bb684c34Spatrick // See https://llvm.org/LICENSE.txt for license information.
5bb684c34Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6bb684c34Spatrick //
7bb684c34Spatrick //===----------------------------------------------------------------------===//
8bb684c34Spatrick 
9bb684c34Spatrick #ifndef LLD_MACHO_INPUT_FILES_H
10bb684c34Spatrick #define LLD_MACHO_INPUT_FILES_H
11bb684c34Spatrick 
12bb684c34Spatrick #include "MachOStructs.h"
131cf9926bSpatrick #include "Target.h"
14bb684c34Spatrick 
15*dfe94b16Srobert #include "lld/Common/DWARF.h"
16bb684c34Spatrick #include "lld/Common/LLVM.h"
171cf9926bSpatrick #include "lld/Common/Memory.h"
18*dfe94b16Srobert #include "llvm/ADT/CachedHashString.h"
19bb684c34Spatrick #include "llvm/ADT/DenseSet.h"
201cf9926bSpatrick #include "llvm/ADT/SetVector.h"
21bb684c34Spatrick #include "llvm/BinaryFormat/MachO.h"
221cf9926bSpatrick #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
23bb684c34Spatrick #include "llvm/Object/Archive.h"
24bb684c34Spatrick #include "llvm/Support/MemoryBuffer.h"
25*dfe94b16Srobert #include "llvm/Support/Threading.h"
261cf9926bSpatrick #include "llvm/TextAPI/TextAPIReader.h"
27bb684c34Spatrick 
28bb684c34Spatrick #include <vector>
29bb684c34Spatrick 
301cf9926bSpatrick namespace llvm {
311cf9926bSpatrick namespace lto {
321cf9926bSpatrick class InputFile;
331cf9926bSpatrick } // namespace lto
341cf9926bSpatrick namespace MachO {
351cf9926bSpatrick class InterfaceFile;
361cf9926bSpatrick } // namespace MachO
371cf9926bSpatrick class TarWriter;
381cf9926bSpatrick } // namespace llvm
391cf9926bSpatrick 
40bb684c34Spatrick namespace lld {
41bb684c34Spatrick namespace macho {
42bb684c34Spatrick 
431cf9926bSpatrick struct PlatformInfo;
441cf9926bSpatrick class ConcatInputSection;
45bb684c34Spatrick class Symbol;
46*dfe94b16Srobert class Defined;
47*dfe94b16Srobert class AliasSymbol;
48bb684c34Spatrick struct Reloc;
491cf9926bSpatrick enum class RefState : uint8_t;
501cf9926bSpatrick 
511cf9926bSpatrick // If --reproduce option is given, all input files are written
521cf9926bSpatrick // to this tar archive.
531cf9926bSpatrick extern std::unique_ptr<llvm::TarWriter> tar;
54bb684c34Spatrick 
55bb684c34Spatrick // If .subsections_via_symbols is set, each InputSection will be split along
561cf9926bSpatrick // symbol boundaries. The field offset represents the offset of the subsection
571cf9926bSpatrick // from the start of the original pre-split InputSection.
58*dfe94b16Srobert struct Subsection {
59*dfe94b16Srobert   uint64_t offset = 0;
60*dfe94b16Srobert   InputSection *isec = nullptr;
611cf9926bSpatrick };
62*dfe94b16Srobert 
63*dfe94b16Srobert using Subsections = std::vector<Subsection>;
64*dfe94b16Srobert class InputFile;
65*dfe94b16Srobert 
66*dfe94b16Srobert class Section {
67*dfe94b16Srobert public:
68*dfe94b16Srobert   InputFile *file;
69*dfe94b16Srobert   StringRef segname;
70*dfe94b16Srobert   StringRef name;
71*dfe94b16Srobert   uint32_t flags;
72*dfe94b16Srobert   uint64_t addr;
73*dfe94b16Srobert   Subsections subsections;
74*dfe94b16Srobert 
Section(InputFile * file,StringRef segname,StringRef name,uint32_t flags,uint64_t addr)75*dfe94b16Srobert   Section(InputFile *file, StringRef segname, StringRef name, uint32_t flags,
76*dfe94b16Srobert           uint64_t addr)
77*dfe94b16Srobert       : file(file), segname(segname), name(name), flags(flags), addr(addr) {}
78*dfe94b16Srobert   // Ensure pointers to Sections are never invalidated.
79*dfe94b16Srobert   Section(const Section &) = delete;
80*dfe94b16Srobert   Section &operator=(const Section &) = delete;
81*dfe94b16Srobert   Section(Section &&) = delete;
82*dfe94b16Srobert   Section &operator=(Section &&) = delete;
83*dfe94b16Srobert 
84*dfe94b16Srobert private:
85*dfe94b16Srobert   // Whether we have already split this section into individual subsections.
86*dfe94b16Srobert   // For sections that cannot be split (e.g. literal sections), this is always
87*dfe94b16Srobert   // false.
88*dfe94b16Srobert   bool doneSplitting = false;
89*dfe94b16Srobert   friend class ObjFile;
90*dfe94b16Srobert };
91*dfe94b16Srobert 
92*dfe94b16Srobert // Represents a call graph profile edge.
93*dfe94b16Srobert struct CallGraphEntry {
94*dfe94b16Srobert   // The index of the caller in the symbol table.
95*dfe94b16Srobert   uint32_t fromIndex;
96*dfe94b16Srobert   // The index of the callee in the symbol table.
97*dfe94b16Srobert   uint32_t toIndex;
98*dfe94b16Srobert   // Number of calls from callee to caller in the profile.
99*dfe94b16Srobert   uint64_t count;
100*dfe94b16Srobert 
CallGraphEntryCallGraphEntry101*dfe94b16Srobert   CallGraphEntry(uint32_t fromIndex, uint32_t toIndex, uint64_t count)
102*dfe94b16Srobert       : fromIndex(fromIndex), toIndex(toIndex), count(count) {}
103*dfe94b16Srobert };
104bb684c34Spatrick 
105bb684c34Spatrick class InputFile {
106bb684c34Spatrick public:
107bb684c34Spatrick   enum Kind {
108bb684c34Spatrick     ObjKind,
1091cf9926bSpatrick     OpaqueKind,
110bb684c34Spatrick     DylibKind,
111bb684c34Spatrick     ArchiveKind,
1121cf9926bSpatrick     BitcodeKind,
113bb684c34Spatrick   };
114bb684c34Spatrick 
115bb684c34Spatrick   virtual ~InputFile() = default;
kind()116bb684c34Spatrick   Kind kind() const { return fileKind; }
getName()1171cf9926bSpatrick   StringRef getName() const { return name; }
resetIdCount()118*dfe94b16Srobert   static void resetIdCount() { idCount = 0; }
119bb684c34Spatrick 
120bb684c34Spatrick   MemoryBufferRef mb;
1211cf9926bSpatrick 
122bb684c34Spatrick   std::vector<Symbol *> symbols;
123*dfe94b16Srobert   std::vector<Section *> sections;
124*dfe94b16Srobert   ArrayRef<uint8_t> objCImageInfo;
1251cf9926bSpatrick 
1261cf9926bSpatrick   // If not empty, this stores the name of the archive containing this file.
1271cf9926bSpatrick   // We use this string for creating error messages.
1281cf9926bSpatrick   std::string archiveName;
129bb684c34Spatrick 
130*dfe94b16Srobert   // Provides an easy way to sort InputFiles deterministically.
131*dfe94b16Srobert   const int id;
132*dfe94b16Srobert 
133*dfe94b16Srobert   // True if this is a lazy ObjFile or BitcodeFile.
134*dfe94b16Srobert   bool lazy = false;
135*dfe94b16Srobert 
136bb684c34Spatrick protected:
137*dfe94b16Srobert   InputFile(Kind kind, MemoryBufferRef mb, bool lazy = false)
mb(mb)138*dfe94b16Srobert       : mb(mb), id(idCount++), lazy(lazy), fileKind(kind),
139*dfe94b16Srobert         name(mb.getBufferIdentifier()) {}
140bb684c34Spatrick 
1411cf9926bSpatrick   InputFile(Kind, const llvm::MachO::InterfaceFile &);
142bb684c34Spatrick 
143bb684c34Spatrick private:
144bb684c34Spatrick   const Kind fileKind;
1451cf9926bSpatrick   const StringRef name;
1461cf9926bSpatrick 
1471cf9926bSpatrick   static int idCount;
148bb684c34Spatrick };
149bb684c34Spatrick 
150*dfe94b16Srobert struct FDE {
151*dfe94b16Srobert   uint32_t funcLength;
152*dfe94b16Srobert   Symbol *personality;
153*dfe94b16Srobert   InputSection *lsda;
154*dfe94b16Srobert };
155*dfe94b16Srobert 
156bb684c34Spatrick // .o file
1571cf9926bSpatrick class ObjFile final : public InputFile {
158bb684c34Spatrick public:
159*dfe94b16Srobert   ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
160*dfe94b16Srobert           bool lazy = false, bool forceHidden = false);
161*dfe94b16Srobert   ArrayRef<llvm::MachO::data_in_code_entry> getDataInCode() const;
162*dfe94b16Srobert   ArrayRef<uint8_t> getOptimizationHints() const;
163*dfe94b16Srobert   template <class LP> void parse();
164*dfe94b16Srobert 
classof(const InputFile * f)165bb684c34Spatrick   static bool classof(const InputFile *f) { return f->kind() == ObjKind; }
1661cf9926bSpatrick 
167*dfe94b16Srobert   std::string sourceFile() const;
168*dfe94b16Srobert   // Parses line table information for diagnostics. compileUnit should be used
169*dfe94b16Srobert   // for other purposes.
170*dfe94b16Srobert   lld::DWARFCache *getDwarf();
171*dfe94b16Srobert 
1721cf9926bSpatrick   llvm::DWARFUnit *compileUnit = nullptr;
173*dfe94b16Srobert   std::unique_ptr<lld::DWARFCache> dwarfCache;
174*dfe94b16Srobert   Section *addrSigSection = nullptr;
1751cf9926bSpatrick   const uint32_t modTime;
176*dfe94b16Srobert   bool forceHidden;
1771cf9926bSpatrick   std::vector<ConcatInputSection *> debugSections;
178*dfe94b16Srobert   std::vector<CallGraphEntry> callGraph;
179*dfe94b16Srobert   llvm::DenseMap<ConcatInputSection *, FDE> fdes;
180*dfe94b16Srobert   std::vector<AliasSymbol *> aliases;
1811cf9926bSpatrick 
1821cf9926bSpatrick private:
183*dfe94b16Srobert   llvm::once_flag initDwarf;
184*dfe94b16Srobert   template <class LP> void parseLazy();
185*dfe94b16Srobert   template <class SectionHeader> void parseSections(ArrayRef<SectionHeader>);
1861cf9926bSpatrick   template <class LP>
1871cf9926bSpatrick   void parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
1881cf9926bSpatrick                     ArrayRef<typename LP::nlist> nList, const char *strtab,
1891cf9926bSpatrick                     bool subsectionsViaSymbols);
1901cf9926bSpatrick   template <class NList>
191*dfe94b16Srobert   Symbol *parseNonSectionSymbol(const NList &sym, const char *strtab);
192*dfe94b16Srobert   template <class SectionHeader>
193*dfe94b16Srobert   void parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
194*dfe94b16Srobert                         const SectionHeader &, Section &);
1951cf9926bSpatrick   void parseDebugInfo();
196*dfe94b16Srobert   void splitEhFrames(ArrayRef<uint8_t> dataArr, Section &ehFrameSection);
197*dfe94b16Srobert   void registerCompactUnwind(Section &compactUnwindSection);
198*dfe94b16Srobert   void registerEhFrames(Section &ehFrameSection);
199bb684c34Spatrick };
200bb684c34Spatrick 
2011cf9926bSpatrick // command-line -sectcreate file
2021cf9926bSpatrick class OpaqueFile final : public InputFile {
203bb684c34Spatrick public:
2041cf9926bSpatrick   OpaqueFile(MemoryBufferRef mb, StringRef segName, StringRef sectName);
classof(const InputFile * f)2051cf9926bSpatrick   static bool classof(const InputFile *f) { return f->kind() == OpaqueKind; }
2061cf9926bSpatrick };
207bb684c34Spatrick 
2081cf9926bSpatrick // .dylib or .tbd file
2091cf9926bSpatrick class DylibFile final : public InputFile {
2101cf9926bSpatrick public:
211bb684c34Spatrick   // Mach-O dylibs can re-export other dylibs as sub-libraries, meaning that the
212bb684c34Spatrick   // symbols in those sub-libraries will be available under the umbrella
213bb684c34Spatrick   // library's namespace. Those sub-libraries can also have their own
214bb684c34Spatrick   // re-exports. When loading a re-exported dylib, `umbrella` should be set to
215bb684c34Spatrick   // the root dylib to ensure symbols in the child library are correctly bound
216bb684c34Spatrick   // to the root. On the other hand, if a dylib is being directly loaded
217bb684c34Spatrick   // (through an -lfoo flag), then `umbrella` should be a nullptr.
2181cf9926bSpatrick   explicit DylibFile(MemoryBufferRef mb, DylibFile *umbrella,
219*dfe94b16Srobert                      bool isBundleLoader, bool explicitlyLinked);
2201cf9926bSpatrick   explicit DylibFile(const llvm::MachO::InterfaceFile &interface,
221*dfe94b16Srobert                      DylibFile *umbrella, bool isBundleLoader,
222*dfe94b16Srobert                      bool explicitlyLinked);
223*dfe94b16Srobert   explicit DylibFile(DylibFile *umbrella);
2241cf9926bSpatrick 
2251cf9926bSpatrick   void parseLoadCommands(MemoryBufferRef mb);
2261cf9926bSpatrick   void parseReexports(const llvm::MachO::InterfaceFile &interface);
isReferenced()227*dfe94b16Srobert   bool isReferenced() const { return numReferencedSymbols > 0; }
228*dfe94b16Srobert   bool isExplicitlyLinked() const;
setExplicitlyLinked()229*dfe94b16Srobert   void setExplicitlyLinked() { explicitlyLinked = true; }
230bb684c34Spatrick 
classof(const InputFile * f)231bb684c34Spatrick   static bool classof(const InputFile *f) { return f->kind() == DylibKind; }
232bb684c34Spatrick 
2331cf9926bSpatrick   StringRef installName;
2341cf9926bSpatrick   DylibFile *exportingFile = nullptr;
2351cf9926bSpatrick   DylibFile *umbrella;
2361cf9926bSpatrick   SmallVector<StringRef, 2> rpaths;
2371cf9926bSpatrick   uint32_t compatibilityVersion = 0;
2381cf9926bSpatrick   uint32_t currentVersion = 0;
2391cf9926bSpatrick   int64_t ordinal = 0; // Ordinal numbering starts from 1, so 0 is a sentinel
240*dfe94b16Srobert   unsigned numReferencedSymbols = 0;
2411cf9926bSpatrick   RefState refState;
242bb684c34Spatrick   bool reexport = false;
2431cf9926bSpatrick   bool forceNeeded = false;
2441cf9926bSpatrick   bool forceWeakImport = false;
2451cf9926bSpatrick   bool deadStrippable = false;
2461cf9926bSpatrick 
247*dfe94b16Srobert private:
248*dfe94b16Srobert   bool explicitlyLinked = false; // Access via isExplicitlyLinked().
2491cf9926bSpatrick 
250*dfe94b16Srobert public:
2511cf9926bSpatrick   // An executable can be used as a bundle loader that will load the output
2521cf9926bSpatrick   // file being linked, and that contains symbols referenced, but not
2531cf9926bSpatrick   // implemented in the bundle. When used like this, it is very similar
254*dfe94b16Srobert   // to a dylib, so we've used the same class to represent it.
2551cf9926bSpatrick   bool isBundleLoader;
2561cf9926bSpatrick 
257*dfe94b16Srobert   // Synthetic Dylib objects created by $ld$previous symbols in this dylib.
258*dfe94b16Srobert   // Usually empty. These synthetic dylibs won't have synthetic dylibs
259*dfe94b16Srobert   // themselves.
260*dfe94b16Srobert   SmallVector<DylibFile *, 2> extraDylibs;
261*dfe94b16Srobert 
2621cf9926bSpatrick private:
263*dfe94b16Srobert   DylibFile *getSyntheticDylib(StringRef installName, uint32_t currentVersion,
264*dfe94b16Srobert                                uint32_t compatVersion);
265*dfe94b16Srobert 
2661cf9926bSpatrick   bool handleLDSymbol(StringRef originalName);
2671cf9926bSpatrick   void handleLDPreviousSymbol(StringRef name, StringRef originalName);
2681cf9926bSpatrick   void handleLDInstallNameSymbol(StringRef name, StringRef originalName);
269*dfe94b16Srobert   void handleLDHideSymbol(StringRef name, StringRef originalName);
2701cf9926bSpatrick   void checkAppExtensionSafety(bool dylibIsAppExtensionSafe) const;
271*dfe94b16Srobert   void parseExportedSymbols(uint32_t offset, uint32_t size);
272*dfe94b16Srobert   void loadReexport(StringRef path, DylibFile *umbrella,
273*dfe94b16Srobert                     const llvm::MachO::InterfaceFile *currentTopLevelTapi);
274*dfe94b16Srobert 
275*dfe94b16Srobert   llvm::DenseSet<llvm::CachedHashStringRef> hiddenSymbols;
276bb684c34Spatrick };
277bb684c34Spatrick 
278bb684c34Spatrick // .a file
2791cf9926bSpatrick class ArchiveFile final : public InputFile {
280bb684c34Spatrick public:
281*dfe94b16Srobert   explicit ArchiveFile(std::unique_ptr<llvm::object::Archive> &&file,
282*dfe94b16Srobert                        bool forceHidden);
283*dfe94b16Srobert   void addLazySymbols();
284*dfe94b16Srobert   void fetch(const llvm::object::Archive::Symbol &);
285*dfe94b16Srobert   // LLD normally doesn't use Error for error-handling, but the underlying
286*dfe94b16Srobert   // Archive library does, so this is the cleanest way to wrap it.
287*dfe94b16Srobert   Error fetch(const llvm::object::Archive::Child &, StringRef reason);
getArchive()288*dfe94b16Srobert   const llvm::object::Archive &getArchive() const { return *file; };
classof(const InputFile * f)289bb684c34Spatrick   static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
290bb684c34Spatrick 
291bb684c34Spatrick private:
292bb684c34Spatrick   std::unique_ptr<llvm::object::Archive> file;
293bb684c34Spatrick   // Keep track of children fetched from the archive by tracking
294bb684c34Spatrick   // which address offsets have been fetched already.
295bb684c34Spatrick   llvm::DenseSet<uint64_t> seen;
296*dfe94b16Srobert   // Load all symbols with hidden visibility (-load_hidden).
297*dfe94b16Srobert   bool forceHidden;
298bb684c34Spatrick };
299bb684c34Spatrick 
3001cf9926bSpatrick class BitcodeFile final : public InputFile {
3011cf9926bSpatrick public:
3021cf9926bSpatrick   explicit BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
303*dfe94b16Srobert                        uint64_t offsetInArchive, bool lazy = false,
304*dfe94b16Srobert                        bool forceHidden = false);
classof(const InputFile * f)3051cf9926bSpatrick   static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
306*dfe94b16Srobert   void parse();
3071cf9926bSpatrick 
3081cf9926bSpatrick   std::unique_ptr<llvm::lto::InputFile> obj;
309*dfe94b16Srobert   bool forceHidden;
310*dfe94b16Srobert 
311*dfe94b16Srobert private:
312*dfe94b16Srobert   void parseLazy();
3131cf9926bSpatrick };
3141cf9926bSpatrick 
3151cf9926bSpatrick extern llvm::SetVector<InputFile *> inputFiles;
316*dfe94b16Srobert extern llvm::DenseMap<llvm::CachedHashStringRef, MemoryBufferRef> cachedReads;
317bb684c34Spatrick 
318*dfe94b16Srobert std::optional<MemoryBufferRef> readFile(StringRef path);
319*dfe94b16Srobert 
320*dfe94b16Srobert void extract(InputFile &file, StringRef reason);
321bb684c34Spatrick 
3221cf9926bSpatrick namespace detail {
3231cf9926bSpatrick 
3241cf9926bSpatrick template <class CommandType, class... Types>
3251cf9926bSpatrick std::vector<const CommandType *>
findCommands(const void * anyHdr,size_t maxCommands,Types...types)3261cf9926bSpatrick findCommands(const void *anyHdr, size_t maxCommands, Types... types) {
3271cf9926bSpatrick   std::vector<const CommandType *> cmds;
3281cf9926bSpatrick   std::initializer_list<uint32_t> typesList{types...};
3291cf9926bSpatrick   const auto *hdr = reinterpret_cast<const llvm::MachO::mach_header *>(anyHdr);
3301cf9926bSpatrick   const uint8_t *p =
3311cf9926bSpatrick       reinterpret_cast<const uint8_t *>(hdr) + target->headerSize;
3321cf9926bSpatrick   for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) {
3331cf9926bSpatrick     auto *cmd = reinterpret_cast<const CommandType *>(p);
3341cf9926bSpatrick     if (llvm::is_contained(typesList, cmd->cmd)) {
3351cf9926bSpatrick       cmds.push_back(cmd);
3361cf9926bSpatrick       if (cmds.size() == maxCommands)
3371cf9926bSpatrick         return cmds;
3381cf9926bSpatrick     }
3391cf9926bSpatrick     p += cmd->cmdsize;
3401cf9926bSpatrick   }
3411cf9926bSpatrick   return cmds;
3421cf9926bSpatrick }
3431cf9926bSpatrick 
3441cf9926bSpatrick } // namespace detail
3451cf9926bSpatrick 
3461cf9926bSpatrick // anyHdr should be a pointer to either mach_header or mach_header_64
3471cf9926bSpatrick template <class CommandType = llvm::MachO::load_command, class... Types>
findCommand(const void * anyHdr,Types...types)3481cf9926bSpatrick const CommandType *findCommand(const void *anyHdr, Types... types) {
3491cf9926bSpatrick   std::vector<const CommandType *> cmds =
3501cf9926bSpatrick       detail::findCommands<CommandType>(anyHdr, 1, types...);
3511cf9926bSpatrick   return cmds.size() ? cmds[0] : nullptr;
3521cf9926bSpatrick }
3531cf9926bSpatrick 
3541cf9926bSpatrick template <class CommandType = llvm::MachO::load_command, class... Types>
findCommands(const void * anyHdr,Types...types)3551cf9926bSpatrick std::vector<const CommandType *> findCommands(const void *anyHdr,
3561cf9926bSpatrick                                               Types... types) {
3571cf9926bSpatrick   return detail::findCommands<CommandType>(anyHdr, 0, types...);
3581cf9926bSpatrick }
3591cf9926bSpatrick 
360*dfe94b16Srobert std::string replaceThinLTOSuffix(StringRef path);
361bb684c34Spatrick } // namespace macho
362bb684c34Spatrick 
363bb684c34Spatrick std::string toString(const macho::InputFile *file);
364*dfe94b16Srobert std::string toString(const macho::Section &);
365bb684c34Spatrick } // namespace lld
366bb684c34Spatrick 
367bb684c34Spatrick #endif
368