15ffd83dbSDimitry Andric //===- InputFiles.cpp -----------------------------------------------------===//
25ffd83dbSDimitry Andric //
35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
65ffd83dbSDimitry Andric //
75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
85ffd83dbSDimitry Andric //
95ffd83dbSDimitry Andric // This file contains functions to parse Mach-O object files. In this comment,
105ffd83dbSDimitry Andric // we describe the Mach-O file structure and how we parse it.
115ffd83dbSDimitry Andric //
125ffd83dbSDimitry Andric // Mach-O is not very different from ELF or COFF. The notion of symbols,
135ffd83dbSDimitry Andric // sections and relocations exists in Mach-O as it does in ELF and COFF.
145ffd83dbSDimitry Andric //
155ffd83dbSDimitry Andric // Perhaps the notion that is new to those who know ELF/COFF is "subsections".
165ffd83dbSDimitry Andric // In ELF/COFF, sections are an atomic unit of data copied from input files to
175ffd83dbSDimitry Andric // output files. When we merge or garbage-collect sections, we treat each
185ffd83dbSDimitry Andric // section as an atomic unit. In Mach-O, that's not the case. Sections can
195ffd83dbSDimitry Andric // consist of multiple subsections, and subsections are a unit of merging and
205ffd83dbSDimitry Andric // garbage-collecting. Therefore, Mach-O's subsections are more similar to
215ffd83dbSDimitry Andric // ELF/COFF's sections than Mach-O's sections are.
225ffd83dbSDimitry Andric //
235ffd83dbSDimitry Andric // A section can have multiple symbols. A symbol that does not have the
245ffd83dbSDimitry Andric // N_ALT_ENTRY attribute indicates a beginning of a subsection. Therefore, by
255ffd83dbSDimitry Andric // definition, a symbol is always present at the beginning of each subsection. A
265ffd83dbSDimitry Andric // symbol with N_ALT_ENTRY attribute does not start a new subsection and can
275ffd83dbSDimitry Andric // point to a middle of a subsection.
285ffd83dbSDimitry Andric //
295ffd83dbSDimitry Andric // The notion of subsections also affects how relocations are represented in
305ffd83dbSDimitry Andric // Mach-O. All references within a section need to be explicitly represented as
315ffd83dbSDimitry Andric // relocations if they refer to different subsections, because we obviously need
325ffd83dbSDimitry Andric // to fix up addresses if subsections are laid out in an output file differently
335ffd83dbSDimitry Andric // than they were in object files. To represent that, Mach-O relocations can
345ffd83dbSDimitry Andric // refer to an unnamed location via its address. Scattered relocations (those
355ffd83dbSDimitry Andric // with the R_SCATTERED bit set) always refer to unnamed locations.
365ffd83dbSDimitry Andric // Non-scattered relocations refer to an unnamed location if r_extern is not set
375ffd83dbSDimitry Andric // and r_symbolnum is zero.
385ffd83dbSDimitry Andric //
395ffd83dbSDimitry Andric // Without the above differences, I think you can use your knowledge about ELF
405ffd83dbSDimitry Andric // and COFF for Mach-O.
415ffd83dbSDimitry Andric //
425ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
435ffd83dbSDimitry Andric 
445ffd83dbSDimitry Andric #include "InputFiles.h"
455ffd83dbSDimitry Andric #include "Config.h"
46e8d8bef9SDimitry Andric #include "Driver.h"
47e8d8bef9SDimitry Andric #include "Dwarf.h"
4881ad6265SDimitry Andric #include "EhFrame.h"
495ffd83dbSDimitry Andric #include "ExportTrie.h"
505ffd83dbSDimitry Andric #include "InputSection.h"
515ffd83dbSDimitry Andric #include "MachOStructs.h"
52e8d8bef9SDimitry Andric #include "ObjC.h"
535ffd83dbSDimitry Andric #include "OutputSection.h"
54e8d8bef9SDimitry Andric #include "OutputSegment.h"
555ffd83dbSDimitry Andric #include "SymbolTable.h"
565ffd83dbSDimitry Andric #include "Symbols.h"
57fe6060f1SDimitry Andric #include "SyntheticSections.h"
585ffd83dbSDimitry Andric #include "Target.h"
595ffd83dbSDimitry Andric 
6004eeddc0SDimitry Andric #include "lld/Common/CommonLinkerContext.h"
61e8d8bef9SDimitry Andric #include "lld/Common/DWARF.h"
62e8d8bef9SDimitry Andric #include "lld/Common/Reproduce.h"
63e8d8bef9SDimitry Andric #include "llvm/ADT/iterator.h"
645ffd83dbSDimitry Andric #include "llvm/BinaryFormat/MachO.h"
65e8d8bef9SDimitry Andric #include "llvm/LTO/LTO.h"
6604eeddc0SDimitry Andric #include "llvm/Support/BinaryStreamReader.h"
675ffd83dbSDimitry Andric #include "llvm/Support/Endian.h"
6881ad6265SDimitry Andric #include "llvm/Support/LEB128.h"
695ffd83dbSDimitry Andric #include "llvm/Support/MemoryBuffer.h"
705ffd83dbSDimitry Andric #include "llvm/Support/Path.h"
71e8d8bef9SDimitry Andric #include "llvm/Support/TarWriter.h"
7204eeddc0SDimitry Andric #include "llvm/Support/TimeProfiler.h"
73fe6060f1SDimitry Andric #include "llvm/TextAPI/Architecture.h"
74fe6060f1SDimitry Andric #include "llvm/TextAPI/InterfaceFile.h"
755ffd83dbSDimitry Andric 
76bdd1243dSDimitry Andric #include <optional>
77349cc55cSDimitry Andric #include <type_traits>
78349cc55cSDimitry Andric 
795ffd83dbSDimitry Andric using namespace llvm;
805ffd83dbSDimitry Andric using namespace llvm::MachO;
815ffd83dbSDimitry Andric using namespace llvm::support::endian;
825ffd83dbSDimitry Andric using namespace llvm::sys;
835ffd83dbSDimitry Andric using namespace lld;
845ffd83dbSDimitry Andric using namespace lld::macho;
855ffd83dbSDimitry Andric 
86e8d8bef9SDimitry Andric // Returns "<internal>", "foo.a(bar.o)", or "baz.o".
toString(const InputFile * f)87e8d8bef9SDimitry Andric std::string lld::toString(const InputFile *f) {
88e8d8bef9SDimitry Andric   if (!f)
89e8d8bef9SDimitry Andric     return "<internal>";
90fe6060f1SDimitry Andric 
91fe6060f1SDimitry Andric   // Multiple dylibs can be defined in one .tbd file.
9206c3fb27SDimitry Andric   if (const auto *dylibFile = dyn_cast<DylibFile>(f))
9306c3fb27SDimitry Andric     if (f->getName().ends_with(".tbd"))
94fe6060f1SDimitry Andric       return (f->getName() + "(" + dylibFile->installName + ")").str();
95fe6060f1SDimitry Andric 
96e8d8bef9SDimitry Andric   if (f->archiveName.empty())
97e8d8bef9SDimitry Andric     return std::string(f->getName());
98fe6060f1SDimitry Andric   return (f->archiveName + "(" + path::filename(f->getName()) + ")").str();
99e8d8bef9SDimitry Andric }
100e8d8bef9SDimitry Andric 
toString(const Section & sec)10181ad6265SDimitry Andric std::string lld::toString(const Section &sec) {
10281ad6265SDimitry Andric   return (toString(sec.file) + ":(" + sec.name + ")").str();
10381ad6265SDimitry Andric }
10481ad6265SDimitry Andric 
105e8d8bef9SDimitry Andric SetVector<InputFile *> macho::inputFiles;
106e8d8bef9SDimitry Andric std::unique_ptr<TarWriter> macho::tar;
107e8d8bef9SDimitry Andric int InputFile::idCount = 0;
1085ffd83dbSDimitry Andric 
decodeVersion(uint32_t version)109fe6060f1SDimitry Andric static VersionTuple decodeVersion(uint32_t version) {
110fe6060f1SDimitry Andric   unsigned major = version >> 16;
111fe6060f1SDimitry Andric   unsigned minor = (version >> 8) & 0xffu;
112fe6060f1SDimitry Andric   unsigned subMinor = version & 0xffu;
113fe6060f1SDimitry Andric   return VersionTuple(major, minor, subMinor);
114fe6060f1SDimitry Andric }
115fe6060f1SDimitry Andric 
getPlatformInfos(const InputFile * input)116fe6060f1SDimitry Andric static std::vector<PlatformInfo> getPlatformInfos(const InputFile *input) {
117fe6060f1SDimitry Andric   if (!isa<ObjFile>(input) && !isa<DylibFile>(input))
118fe6060f1SDimitry Andric     return {};
119fe6060f1SDimitry Andric 
120fe6060f1SDimitry Andric   const char *hdr = input->mb.getBufferStart();
121fe6060f1SDimitry Andric 
12281ad6265SDimitry Andric   // "Zippered" object files can have multiple LC_BUILD_VERSION load commands.
123fe6060f1SDimitry Andric   std::vector<PlatformInfo> platformInfos;
124fe6060f1SDimitry Andric   for (auto *cmd : findCommands<build_version_command>(hdr, LC_BUILD_VERSION)) {
125fe6060f1SDimitry Andric     PlatformInfo info;
12604eeddc0SDimitry Andric     info.target.Platform = static_cast<PlatformType>(cmd->platform);
12706c3fb27SDimitry Andric     info.target.MinDeployment = decodeVersion(cmd->minos);
128fe6060f1SDimitry Andric     platformInfos.emplace_back(std::move(info));
129fe6060f1SDimitry Andric   }
130fe6060f1SDimitry Andric   for (auto *cmd : findCommands<version_min_command>(
131fe6060f1SDimitry Andric            hdr, LC_VERSION_MIN_MACOSX, LC_VERSION_MIN_IPHONEOS,
132fe6060f1SDimitry Andric            LC_VERSION_MIN_TVOS, LC_VERSION_MIN_WATCHOS)) {
133fe6060f1SDimitry Andric     PlatformInfo info;
134fe6060f1SDimitry Andric     switch (cmd->cmd) {
135fe6060f1SDimitry Andric     case LC_VERSION_MIN_MACOSX:
13604eeddc0SDimitry Andric       info.target.Platform = PLATFORM_MACOS;
137fe6060f1SDimitry Andric       break;
138fe6060f1SDimitry Andric     case LC_VERSION_MIN_IPHONEOS:
13904eeddc0SDimitry Andric       info.target.Platform = PLATFORM_IOS;
140fe6060f1SDimitry Andric       break;
141fe6060f1SDimitry Andric     case LC_VERSION_MIN_TVOS:
14204eeddc0SDimitry Andric       info.target.Platform = PLATFORM_TVOS;
143fe6060f1SDimitry Andric       break;
144fe6060f1SDimitry Andric     case LC_VERSION_MIN_WATCHOS:
14504eeddc0SDimitry Andric       info.target.Platform = PLATFORM_WATCHOS;
146fe6060f1SDimitry Andric       break;
147fe6060f1SDimitry Andric     }
14806c3fb27SDimitry Andric     info.target.MinDeployment = decodeVersion(cmd->version);
149fe6060f1SDimitry Andric     platformInfos.emplace_back(std::move(info));
150fe6060f1SDimitry Andric   }
151fe6060f1SDimitry Andric 
152fe6060f1SDimitry Andric   return platformInfos;
153fe6060f1SDimitry Andric }
154fe6060f1SDimitry Andric 
checkCompatibility(const InputFile * input)155fe6060f1SDimitry Andric static bool checkCompatibility(const InputFile *input) {
156fe6060f1SDimitry Andric   std::vector<PlatformInfo> platformInfos = getPlatformInfos(input);
157fe6060f1SDimitry Andric   if (platformInfos.empty())
158fe6060f1SDimitry Andric     return true;
159fe6060f1SDimitry Andric 
160fe6060f1SDimitry Andric   auto it = find_if(platformInfos, [&](const PlatformInfo &info) {
161fe6060f1SDimitry Andric     return removeSimulator(info.target.Platform) ==
162fe6060f1SDimitry Andric            removeSimulator(config->platform());
163fe6060f1SDimitry Andric   });
164fe6060f1SDimitry Andric   if (it == platformInfos.end()) {
165fe6060f1SDimitry Andric     std::string platformNames;
166fe6060f1SDimitry Andric     raw_string_ostream os(platformNames);
167fe6060f1SDimitry Andric     interleave(
168fe6060f1SDimitry Andric         platformInfos, os,
169fe6060f1SDimitry Andric         [&](const PlatformInfo &info) {
170fe6060f1SDimitry Andric           os << getPlatformName(info.target.Platform);
171fe6060f1SDimitry Andric         },
172fe6060f1SDimitry Andric         "/");
173fe6060f1SDimitry Andric     error(toString(input) + " has platform " + platformNames +
174fe6060f1SDimitry Andric           Twine(", which is different from target platform ") +
175fe6060f1SDimitry Andric           getPlatformName(config->platform()));
176fe6060f1SDimitry Andric     return false;
177fe6060f1SDimitry Andric   }
178fe6060f1SDimitry Andric 
17906c3fb27SDimitry Andric   if (it->target.MinDeployment > config->platformInfo.target.MinDeployment)
18006c3fb27SDimitry Andric     warn(toString(input) + " has version " +
18106c3fb27SDimitry Andric          it->target.MinDeployment.getAsString() +
182fe6060f1SDimitry Andric          ", which is newer than target minimum of " +
18306c3fb27SDimitry Andric          config->platformInfo.target.MinDeployment.getAsString());
184fe6060f1SDimitry Andric 
185fe6060f1SDimitry Andric   return true;
186fe6060f1SDimitry Andric }
187fe6060f1SDimitry Andric 
188*5f757f3fSDimitry Andric template <class Header>
compatWithTargetArch(const InputFile * file,const Header * hdr)189*5f757f3fSDimitry Andric static bool compatWithTargetArch(const InputFile *file, const Header *hdr) {
190*5f757f3fSDimitry Andric   uint32_t cpuType;
191*5f757f3fSDimitry Andric   std::tie(cpuType, std::ignore) = getCPUTypeFromArchitecture(config->arch());
192*5f757f3fSDimitry Andric 
193*5f757f3fSDimitry Andric   if (hdr->cputype != cpuType) {
194*5f757f3fSDimitry Andric     Architecture arch =
195*5f757f3fSDimitry Andric         getArchitectureFromCpuType(hdr->cputype, hdr->cpusubtype);
196*5f757f3fSDimitry Andric     auto msg = config->errorForArchMismatch
197*5f757f3fSDimitry Andric                    ? static_cast<void (*)(const Twine &)>(error)
198*5f757f3fSDimitry Andric                    : warn;
199*5f757f3fSDimitry Andric 
200*5f757f3fSDimitry Andric     msg(toString(file) + " has architecture " + getArchitectureName(arch) +
201*5f757f3fSDimitry Andric         " which is incompatible with target architecture " +
202*5f757f3fSDimitry Andric         getArchitectureName(config->arch()));
203*5f757f3fSDimitry Andric     return false;
204*5f757f3fSDimitry Andric   }
205*5f757f3fSDimitry Andric 
206*5f757f3fSDimitry Andric   return checkCompatibility(file);
207*5f757f3fSDimitry Andric }
208*5f757f3fSDimitry Andric 
209349cc55cSDimitry Andric // This cache mostly exists to store system libraries (and .tbds) as they're
210349cc55cSDimitry Andric // loaded, rather than the input archives, which are already cached at a higher
211349cc55cSDimitry Andric // level, and other files like the filelist that are only read once.
212349cc55cSDimitry Andric // Theoretically this caching could be more efficient by hoisting it, but that
213349cc55cSDimitry Andric // would require altering many callers to track the state.
214349cc55cSDimitry Andric DenseMap<CachedHashStringRef, MemoryBufferRef> macho::cachedReads;
2155ffd83dbSDimitry Andric // Open a given file path and return it as a memory-mapped file.
readFile(StringRef path)216bdd1243dSDimitry Andric std::optional<MemoryBufferRef> macho::readFile(StringRef path) {
217349cc55cSDimitry Andric   CachedHashStringRef key(path);
218349cc55cSDimitry Andric   auto entry = cachedReads.find(key);
219349cc55cSDimitry Andric   if (entry != cachedReads.end())
220349cc55cSDimitry Andric     return entry->second;
221349cc55cSDimitry Andric 
222fe6060f1SDimitry Andric   ErrorOr<std::unique_ptr<MemoryBuffer>> mbOrErr = MemoryBuffer::getFile(path);
223fe6060f1SDimitry Andric   if (std::error_code ec = mbOrErr.getError()) {
2245ffd83dbSDimitry Andric     error("cannot open " + path + ": " + ec.message());
225bdd1243dSDimitry Andric     return std::nullopt;
2265ffd83dbSDimitry Andric   }
2275ffd83dbSDimitry Andric 
2285ffd83dbSDimitry Andric   std::unique_ptr<MemoryBuffer> &mb = *mbOrErr;
2295ffd83dbSDimitry Andric   MemoryBufferRef mbref = mb->getMemBufferRef();
2305ffd83dbSDimitry Andric   make<std::unique_ptr<MemoryBuffer>>(std::move(mb)); // take mb ownership
2315ffd83dbSDimitry Andric 
2325ffd83dbSDimitry Andric   // If this is a regular non-fat file, return it.
2335ffd83dbSDimitry Andric   const char *buf = mbref.getBufferStart();
234fe6060f1SDimitry Andric   const auto *hdr = reinterpret_cast<const fat_header *>(buf);
235fe6060f1SDimitry Andric   if (mbref.getBufferSize() < sizeof(uint32_t) ||
236fe6060f1SDimitry Andric       read32be(&hdr->magic) != FAT_MAGIC) {
237e8d8bef9SDimitry Andric     if (tar)
238e8d8bef9SDimitry Andric       tar->append(relativeToRoot(path), mbref.getBuffer());
239349cc55cSDimitry Andric     return cachedReads[key] = mbref;
240e8d8bef9SDimitry Andric   }
2415ffd83dbSDimitry Andric 
24204eeddc0SDimitry Andric   llvm::BumpPtrAllocator &bAlloc = lld::bAlloc();
24304eeddc0SDimitry Andric 
244fe6060f1SDimitry Andric   // Object files and archive files may be fat files, which contain multiple
245fe6060f1SDimitry Andric   // real files for different CPU ISAs. Here, we search for a file that matches
246fe6060f1SDimitry Andric   // with the current link target and returns it as a MemoryBufferRef.
247fe6060f1SDimitry Andric   const auto *arch = reinterpret_cast<const fat_arch *>(buf + sizeof(*hdr));
248bdd1243dSDimitry Andric   auto getArchName = [](uint32_t cpuType, uint32_t cpuSubtype) {
249bdd1243dSDimitry Andric     return getArchitectureName(getArchitectureFromCpuType(cpuType, cpuSubtype));
250bdd1243dSDimitry Andric   };
2515ffd83dbSDimitry Andric 
252bdd1243dSDimitry Andric   std::vector<StringRef> archs;
2535ffd83dbSDimitry Andric   for (uint32_t i = 0, n = read32be(&hdr->nfat_arch); i < n; ++i) {
2545ffd83dbSDimitry Andric     if (reinterpret_cast<const char *>(arch + i + 1) >
2555ffd83dbSDimitry Andric         buf + mbref.getBufferSize()) {
2565ffd83dbSDimitry Andric       error(path + ": fat_arch struct extends beyond end of file");
257bdd1243dSDimitry Andric       return std::nullopt;
2585ffd83dbSDimitry Andric     }
2595ffd83dbSDimitry Andric 
260bdd1243dSDimitry Andric     uint32_t cpuType = read32be(&arch[i].cputype);
261bdd1243dSDimitry Andric     uint32_t cpuSubtype =
262bdd1243dSDimitry Andric         read32be(&arch[i].cpusubtype) & ~MachO::CPU_SUBTYPE_MASK;
263bdd1243dSDimitry Andric 
264bdd1243dSDimitry Andric     // FIXME: LD64 has a more complex fallback logic here.
265bdd1243dSDimitry Andric     // Consider implementing that as well?
266bdd1243dSDimitry Andric     if (cpuType != static_cast<uint32_t>(target->cpuType) ||
267bdd1243dSDimitry Andric         cpuSubtype != target->cpuSubtype) {
268bdd1243dSDimitry Andric       archs.emplace_back(getArchName(cpuType, cpuSubtype));
2695ffd83dbSDimitry Andric       continue;
270bdd1243dSDimitry Andric     }
2715ffd83dbSDimitry Andric 
2725ffd83dbSDimitry Andric     uint32_t offset = read32be(&arch[i].offset);
2735ffd83dbSDimitry Andric     uint32_t size = read32be(&arch[i].size);
2745ffd83dbSDimitry Andric     if (offset + size > mbref.getBufferSize())
2755ffd83dbSDimitry Andric       error(path + ": slice extends beyond end of file");
276e8d8bef9SDimitry Andric     if (tar)
277e8d8bef9SDimitry Andric       tar->append(relativeToRoot(path), mbref.getBuffer());
278349cc55cSDimitry Andric     return cachedReads[key] = MemoryBufferRef(StringRef(buf + offset, size),
279349cc55cSDimitry Andric                                               path.copy(bAlloc));
2805ffd83dbSDimitry Andric   }
2815ffd83dbSDimitry Andric 
282bdd1243dSDimitry Andric   auto targetArchName = getArchName(target->cpuType, target->cpuSubtype);
283bdd1243dSDimitry Andric   warn(path + ": ignoring file because it is universal (" + join(archs, ",") +
284bdd1243dSDimitry Andric        ") but does not contain the " + targetArchName + " architecture");
285bdd1243dSDimitry Andric   return std::nullopt;
2865ffd83dbSDimitry Andric }
2875ffd83dbSDimitry Andric 
InputFile(Kind kind,const InterfaceFile & interface)288fe6060f1SDimitry Andric InputFile::InputFile(Kind kind, const InterfaceFile &interface)
28904eeddc0SDimitry Andric     : id(idCount++), fileKind(kind), name(saver().save(interface.getPath())) {}
2905ffd83dbSDimitry Andric 
291349cc55cSDimitry Andric // Some sections comprise of fixed-size records, so instead of splitting them at
292349cc55cSDimitry Andric // symbol boundaries, we split them based on size. Records are distinct from
293349cc55cSDimitry Andric // literals in that they may contain references to other sections, instead of
294349cc55cSDimitry Andric // being leaf nodes in the InputSection graph.
295349cc55cSDimitry Andric //
296349cc55cSDimitry Andric // Note that "record" is a term I came up with. In contrast, "literal" is a term
297349cc55cSDimitry Andric // used by the Mach-O format.
getRecordSize(StringRef segname,StringRef name)298bdd1243dSDimitry Andric static std::optional<size_t> getRecordSize(StringRef segname, StringRef name) {
29981ad6265SDimitry Andric   if (name == section_names::compactUnwind) {
300349cc55cSDimitry Andric     if (segname == segment_names::ld)
301349cc55cSDimitry Andric       return target->wordSize == 8 ? 32 : 20;
302349cc55cSDimitry Andric   }
303bdd1243dSDimitry Andric   if (!config->dedupStrings)
304349cc55cSDimitry Andric     return {};
30581ad6265SDimitry Andric 
30681ad6265SDimitry Andric   if (name == section_names::cfString && segname == segment_names::data)
30781ad6265SDimitry Andric     return target->wordSize == 8 ? 32 : 16;
308fcaf7f86SDimitry Andric 
309fcaf7f86SDimitry Andric   if (config->icfLevel == ICFLevel::none)
310fcaf7f86SDimitry Andric     return {};
311fcaf7f86SDimitry Andric 
31281ad6265SDimitry Andric   if (name == section_names::objcClassRefs && segname == segment_names::data)
31381ad6265SDimitry Andric     return target->wordSize;
314bdd1243dSDimitry Andric 
315bdd1243dSDimitry Andric   if (name == section_names::objcSelrefs && segname == segment_names::data)
316bdd1243dSDimitry Andric     return target->wordSize;
31781ad6265SDimitry Andric   return {};
31881ad6265SDimitry Andric }
31981ad6265SDimitry Andric 
parseCallGraph(ArrayRef<uint8_t> data,std::vector<CallGraphEntry> & callGraph)32081ad6265SDimitry Andric static Error parseCallGraph(ArrayRef<uint8_t> data,
32181ad6265SDimitry Andric                             std::vector<CallGraphEntry> &callGraph) {
32281ad6265SDimitry Andric   TimeTraceScope timeScope("Parsing call graph section");
323*5f757f3fSDimitry Andric   BinaryStreamReader reader(data, llvm::endianness::little);
32481ad6265SDimitry Andric   while (!reader.empty()) {
32581ad6265SDimitry Andric     uint32_t fromIndex, toIndex;
32681ad6265SDimitry Andric     uint64_t count;
32781ad6265SDimitry Andric     if (Error err = reader.readInteger(fromIndex))
32881ad6265SDimitry Andric       return err;
32981ad6265SDimitry Andric     if (Error err = reader.readInteger(toIndex))
33081ad6265SDimitry Andric       return err;
33181ad6265SDimitry Andric     if (Error err = reader.readInteger(count))
33281ad6265SDimitry Andric       return err;
33381ad6265SDimitry Andric     callGraph.emplace_back(fromIndex, toIndex, count);
33481ad6265SDimitry Andric   }
33581ad6265SDimitry Andric   return Error::success();
336349cc55cSDimitry Andric }
337349cc55cSDimitry Andric 
338349cc55cSDimitry Andric // Parse the sequence of sections within a single LC_SEGMENT(_64).
339349cc55cSDimitry Andric // Split each section into subsections.
340349cc55cSDimitry Andric template <class SectionHeader>
parseSections(ArrayRef<SectionHeader> sectionHeaders)341349cc55cSDimitry Andric void ObjFile::parseSections(ArrayRef<SectionHeader> sectionHeaders) {
342349cc55cSDimitry Andric   sections.reserve(sectionHeaders.size());
3435ffd83dbSDimitry Andric   auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
3445ffd83dbSDimitry Andric 
345349cc55cSDimitry Andric   for (const SectionHeader &sec : sectionHeaders) {
346fe6060f1SDimitry Andric     StringRef name =
347e8d8bef9SDimitry Andric         StringRef(sec.sectname, strnlen(sec.sectname, sizeof(sec.sectname)));
348fe6060f1SDimitry Andric     StringRef segname =
349e8d8bef9SDimitry Andric         StringRef(sec.segname, strnlen(sec.segname, sizeof(sec.segname)));
35081ad6265SDimitry Andric     sections.push_back(make<Section>(this, segname, name, sec.flags, sec.addr));
351fe6060f1SDimitry Andric     if (sec.align >= 32) {
352fe6060f1SDimitry Andric       error("alignment " + std::to_string(sec.align) + " of section " + name +
353fe6060f1SDimitry Andric             " is too large");
354fe6060f1SDimitry Andric       continue;
355fe6060f1SDimitry Andric     }
35681ad6265SDimitry Andric     Section &section = *sections.back();
357fe6060f1SDimitry Andric     uint32_t align = 1 << sec.align;
35881ad6265SDimitry Andric     ArrayRef<uint8_t> data = {isZeroFill(sec.flags) ? nullptr
35981ad6265SDimitry Andric                                                     : buf + sec.offset,
36081ad6265SDimitry Andric                               static_cast<size_t>(sec.size)};
361e8d8bef9SDimitry Andric 
362bdd1243dSDimitry Andric     auto splitRecords = [&](size_t recordSize) -> void {
363349cc55cSDimitry Andric       if (data.empty())
364349cc55cSDimitry Andric         return;
36581ad6265SDimitry Andric       Subsections &subsections = section.subsections;
366349cc55cSDimitry Andric       subsections.reserve(data.size() / recordSize);
36781ad6265SDimitry Andric       for (uint64_t off = 0; off < data.size(); off += recordSize) {
368349cc55cSDimitry Andric         auto *isec = make<ConcatInputSection>(
369bdd1243dSDimitry Andric             section, data.slice(off, std::min(data.size(), recordSize)), align);
37081ad6265SDimitry Andric         subsections.push_back({off, isec});
371349cc55cSDimitry Andric       }
37281ad6265SDimitry Andric       section.doneSplitting = true;
373349cc55cSDimitry Andric     };
374349cc55cSDimitry Andric 
375fe6060f1SDimitry Andric     if (sectionType(sec.flags) == S_CSTRING_LITERALS) {
376bdd1243dSDimitry Andric       if (sec.nreloc)
377bdd1243dSDimitry Andric         fatal(toString(this) + ": " + sec.segname + "," + sec.sectname +
378bdd1243dSDimitry Andric               " contains relocations, which is unsupported");
379bdd1243dSDimitry Andric       bool dedupLiterals =
380bdd1243dSDimitry Andric           name == section_names::objcMethname || config->dedupStrings;
381bdd1243dSDimitry Andric       InputSection *isec =
382bdd1243dSDimitry Andric           make<CStringInputSection>(section, data, align, dedupLiterals);
383fe6060f1SDimitry Andric       // FIXME: parallelize this?
384fe6060f1SDimitry Andric       cast<CStringInputSection>(isec)->splitIntoPieces();
385bdd1243dSDimitry Andric       section.subsections.push_back({0, isec});
386bdd1243dSDimitry Andric     } else if (isWordLiteralSection(sec.flags)) {
387bdd1243dSDimitry Andric       if (sec.nreloc)
388bdd1243dSDimitry Andric         fatal(toString(this) + ": " + sec.segname + "," + sec.sectname +
389bdd1243dSDimitry Andric               " contains relocations, which is unsupported");
390bdd1243dSDimitry Andric       InputSection *isec = make<WordLiteralInputSection>(section, data, align);
39181ad6265SDimitry Andric       section.subsections.push_back({0, isec});
392349cc55cSDimitry Andric     } else if (auto recordSize = getRecordSize(segname, name)) {
393349cc55cSDimitry Andric       splitRecords(*recordSize);
394753f127fSDimitry Andric     } else if (name == section_names::ehFrame &&
39581ad6265SDimitry Andric                segname == segment_names::text) {
39681ad6265SDimitry Andric       splitEhFrames(data, *sections.back());
397349cc55cSDimitry Andric     } else if (segname == segment_names::llvm) {
39881ad6265SDimitry Andric       if (config->callGraphProfileSort && name == section_names::cgProfile)
39981ad6265SDimitry Andric         checkError(parseCallGraph(data, callGraph));
400349cc55cSDimitry Andric       // ld64 does not appear to emit contents from sections within the __LLVM
401349cc55cSDimitry Andric       // segment. Symbols within those sections point to bitcode metadata
402349cc55cSDimitry Andric       // instead of actual symbols. Global symbols within those sections could
40381ad6265SDimitry Andric       // have the same name without causing duplicate symbol errors. To avoid
40481ad6265SDimitry Andric       // spurious duplicate symbol errors, we do not parse these sections.
405349cc55cSDimitry Andric       // TODO: Evaluate whether the bitcode metadata is needed.
406fcaf7f86SDimitry Andric     } else if (name == section_names::objCImageInfo &&
407fcaf7f86SDimitry Andric                segname == segment_names::data) {
408fcaf7f86SDimitry Andric       objCImageInfo = data;
409fe6060f1SDimitry Andric     } else {
41081ad6265SDimitry Andric       if (name == section_names::addrSig)
41181ad6265SDimitry Andric         addrSigSection = sections.back();
41281ad6265SDimitry Andric 
41381ad6265SDimitry Andric       auto *isec = make<ConcatInputSection>(section, data, align);
414349cc55cSDimitry Andric       if (isDebugSection(isec->getFlags()) &&
415349cc55cSDimitry Andric           isec->getSegName() == segment_names::dwarf) {
416e8d8bef9SDimitry Andric         // Instead of emitting DWARF sections, we emit STABS symbols to the
417e8d8bef9SDimitry Andric         // object files that contain them. We filter them out early to avoid
41881ad6265SDimitry Andric         // parsing their relocations unnecessarily.
419e8d8bef9SDimitry Andric         debugSections.push_back(isec);
420349cc55cSDimitry Andric       } else {
42181ad6265SDimitry Andric         section.subsections.push_back({0, isec});
422e8d8bef9SDimitry Andric       }
4235ffd83dbSDimitry Andric     }
4245ffd83dbSDimitry Andric   }
425fe6060f1SDimitry Andric }
4265ffd83dbSDimitry Andric 
splitEhFrames(ArrayRef<uint8_t> data,Section & ehFrameSection)42781ad6265SDimitry Andric void ObjFile::splitEhFrames(ArrayRef<uint8_t> data, Section &ehFrameSection) {
42861cfbce3SDimitry Andric   EhReader reader(this, data, /*dataOff=*/0);
42981ad6265SDimitry Andric   size_t off = 0;
43081ad6265SDimitry Andric   while (off < reader.size()) {
43181ad6265SDimitry Andric     uint64_t frameOff = off;
43281ad6265SDimitry Andric     uint64_t length = reader.readLength(&off);
43381ad6265SDimitry Andric     if (length == 0)
43481ad6265SDimitry Andric       break;
43581ad6265SDimitry Andric     uint64_t fullLength = length + (off - frameOff);
43681ad6265SDimitry Andric     off += length;
43781ad6265SDimitry Andric     // We hard-code an alignment of 1 here because we don't actually want our
43881ad6265SDimitry Andric     // EH frames to be aligned to the section alignment. EH frame decoders don't
43981ad6265SDimitry Andric     // expect this alignment. Moreover, each EH frame must start where the
44081ad6265SDimitry Andric     // previous one ends, and where it ends is indicated by the length field.
44181ad6265SDimitry Andric     // Unless we update the length field (troublesome), we should keep the
44281ad6265SDimitry Andric     // alignment to 1.
44381ad6265SDimitry Andric     // Note that we still want to preserve the alignment of the overall section,
44481ad6265SDimitry Andric     // just not of the individual EH frames.
44581ad6265SDimitry Andric     ehFrameSection.subsections.push_back(
44681ad6265SDimitry Andric         {frameOff, make<ConcatInputSection>(ehFrameSection,
44781ad6265SDimitry Andric                                             data.slice(frameOff, fullLength),
44881ad6265SDimitry Andric                                             /*align=*/1)});
44981ad6265SDimitry Andric   }
45081ad6265SDimitry Andric   ehFrameSection.doneSplitting = true;
45181ad6265SDimitry Andric }
45281ad6265SDimitry Andric 
45381ad6265SDimitry Andric template <class T>
findContainingSection(const std::vector<Section * > & sections,T * offset)45481ad6265SDimitry Andric static Section *findContainingSection(const std::vector<Section *> &sections,
45581ad6265SDimitry Andric                                       T *offset) {
45681ad6265SDimitry Andric   static_assert(std::is_same<uint64_t, T>::value ||
45781ad6265SDimitry Andric                     std::is_same<uint32_t, T>::value,
45881ad6265SDimitry Andric                 "unexpected type for offset");
45981ad6265SDimitry Andric   auto it = std::prev(llvm::upper_bound(
46081ad6265SDimitry Andric       sections, *offset,
46181ad6265SDimitry Andric       [](uint64_t value, const Section *sec) { return value < sec->addr; }));
46281ad6265SDimitry Andric   *offset -= (*it)->addr;
46381ad6265SDimitry Andric   return *it;
46481ad6265SDimitry Andric }
46581ad6265SDimitry Andric 
4665ffd83dbSDimitry Andric // Find the subsection corresponding to the greatest section offset that is <=
4675ffd83dbSDimitry Andric // that of the given offset.
4685ffd83dbSDimitry Andric //
4695ffd83dbSDimitry Andric // offset: an offset relative to the start of the original InputSection (before
4705ffd83dbSDimitry Andric // any subsection splitting has occurred). It will be updated to represent the
4715ffd83dbSDimitry Andric // same location as an offset relative to the start of the containing
4725ffd83dbSDimitry Andric // subsection.
473349cc55cSDimitry Andric template <class T>
findContainingSubsection(const Section & section,T * offset)47481ad6265SDimitry Andric static InputSection *findContainingSubsection(const Section &section,
475349cc55cSDimitry Andric                                               T *offset) {
476349cc55cSDimitry Andric   static_assert(std::is_same<uint64_t, T>::value ||
477349cc55cSDimitry Andric                     std::is_same<uint32_t, T>::value,
478349cc55cSDimitry Andric                 "unexpected type for offset");
479fe6060f1SDimitry Andric   auto it = std::prev(llvm::upper_bound(
48081ad6265SDimitry Andric       section.subsections, *offset,
481349cc55cSDimitry Andric       [](uint64_t value, Subsection subsec) { return value < subsec.offset; }));
482fe6060f1SDimitry Andric   *offset -= it->offset;
483fe6060f1SDimitry Andric   return it->isec;
4845ffd83dbSDimitry Andric }
4855ffd83dbSDimitry Andric 
48681ad6265SDimitry Andric // Find a symbol at offset `off` within `isec`.
findSymbolAtOffset(const ConcatInputSection * isec,uint64_t off)48781ad6265SDimitry Andric static Defined *findSymbolAtOffset(const ConcatInputSection *isec,
48881ad6265SDimitry Andric                                    uint64_t off) {
48981ad6265SDimitry Andric   auto it = llvm::lower_bound(isec->symbols, off, [](Defined *d, uint64_t off) {
49081ad6265SDimitry Andric     return d->value < off;
49181ad6265SDimitry Andric   });
49281ad6265SDimitry Andric   // The offset should point at the exact address of a symbol (with no addend.)
49381ad6265SDimitry Andric   if (it == isec->symbols.end() || (*it)->value != off) {
49481ad6265SDimitry Andric     assert(isec->wasCoalesced);
49581ad6265SDimitry Andric     return nullptr;
49681ad6265SDimitry Andric   }
49781ad6265SDimitry Andric   return *it;
49881ad6265SDimitry Andric }
49981ad6265SDimitry Andric 
500349cc55cSDimitry Andric template <class SectionHeader>
validateRelocationInfo(InputFile * file,const SectionHeader & sec,relocation_info rel)501349cc55cSDimitry Andric static bool validateRelocationInfo(InputFile *file, const SectionHeader &sec,
502fe6060f1SDimitry Andric                                    relocation_info rel) {
503fe6060f1SDimitry Andric   const RelocAttrs &relocAttrs = target->getRelocAttrs(rel.r_type);
504fe6060f1SDimitry Andric   bool valid = true;
505fe6060f1SDimitry Andric   auto message = [relocAttrs, file, sec, rel, &valid](const Twine &diagnostic) {
506fe6060f1SDimitry Andric     valid = false;
507fe6060f1SDimitry Andric     return (relocAttrs.name + " relocation " + diagnostic + " at offset " +
508fe6060f1SDimitry Andric             std::to_string(rel.r_address) + " of " + sec.segname + "," +
509fe6060f1SDimitry Andric             sec.sectname + " in " + toString(file))
510fe6060f1SDimitry Andric         .str();
511fe6060f1SDimitry Andric   };
512fe6060f1SDimitry Andric 
513fe6060f1SDimitry Andric   if (!relocAttrs.hasAttr(RelocAttrBits::LOCAL) && !rel.r_extern)
514fe6060f1SDimitry Andric     error(message("must be extern"));
515fe6060f1SDimitry Andric   if (relocAttrs.hasAttr(RelocAttrBits::PCREL) != rel.r_pcrel)
516fe6060f1SDimitry Andric     error(message(Twine("must ") + (rel.r_pcrel ? "not " : "") +
517fe6060f1SDimitry Andric                   "be PC-relative"));
518fe6060f1SDimitry Andric   if (isThreadLocalVariables(sec.flags) &&
519fe6060f1SDimitry Andric       !relocAttrs.hasAttr(RelocAttrBits::UNSIGNED))
520fe6060f1SDimitry Andric     error(message("not allowed in thread-local section, must be UNSIGNED"));
521fe6060f1SDimitry Andric   if (rel.r_length < 2 || rel.r_length > 3 ||
522fe6060f1SDimitry Andric       !relocAttrs.hasAttr(static_cast<RelocAttrBits>(1 << rel.r_length))) {
523fe6060f1SDimitry Andric     static SmallVector<StringRef, 4> widths{"0", "4", "8", "4 or 8"};
524fe6060f1SDimitry Andric     error(message("has width " + std::to_string(1 << rel.r_length) +
525fe6060f1SDimitry Andric                   " bytes, but must be " +
526fe6060f1SDimitry Andric                   widths[(static_cast<int>(relocAttrs.bits) >> 2) & 3] +
527fe6060f1SDimitry Andric                   " bytes"));
528fe6060f1SDimitry Andric   }
529fe6060f1SDimitry Andric   return valid;
530fe6060f1SDimitry Andric }
531fe6060f1SDimitry Andric 
532349cc55cSDimitry Andric template <class SectionHeader>
parseRelocations(ArrayRef<SectionHeader> sectionHeaders,const SectionHeader & sec,Section & section)533349cc55cSDimitry Andric void ObjFile::parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
53481ad6265SDimitry Andric                                const SectionHeader &sec, Section &section) {
5355ffd83dbSDimitry Andric   auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
536e8d8bef9SDimitry Andric   ArrayRef<relocation_info> relInfos(
537e8d8bef9SDimitry Andric       reinterpret_cast<const relocation_info *>(buf + sec.reloff), sec.nreloc);
5385ffd83dbSDimitry Andric 
53981ad6265SDimitry Andric   Subsections &subsections = section.subsections;
540349cc55cSDimitry Andric   auto subsecIt = subsections.rbegin();
541e8d8bef9SDimitry Andric   for (size_t i = 0; i < relInfos.size(); i++) {
542e8d8bef9SDimitry Andric     // Paired relocations serve as Mach-O's method for attaching a
543e8d8bef9SDimitry Andric     // supplemental datum to a primary relocation record. ELF does not
544e8d8bef9SDimitry Andric     // need them because the *_RELOC_RELA records contain the extra
545e8d8bef9SDimitry Andric     // addend field, vs. *_RELOC_REL which omit the addend.
546e8d8bef9SDimitry Andric     //
547e8d8bef9SDimitry Andric     // The {X86_64,ARM64}_RELOC_SUBTRACTOR record holds the subtrahend,
548e8d8bef9SDimitry Andric     // and the paired *_RELOC_UNSIGNED record holds the minuend. The
549fe6060f1SDimitry Andric     // datum for each is a symbolic address. The result is the offset
550fe6060f1SDimitry Andric     // between two addresses.
551e8d8bef9SDimitry Andric     //
552e8d8bef9SDimitry Andric     // The ARM64_RELOC_ADDEND record holds the addend, and the paired
553e8d8bef9SDimitry Andric     // ARM64_RELOC_BRANCH26 or ARM64_RELOC_PAGE21/PAGEOFF12 holds the
554e8d8bef9SDimitry Andric     // base symbolic address.
555e8d8bef9SDimitry Andric     //
556bdd1243dSDimitry Andric     // Note: X86 does not use *_RELOC_ADDEND because it can embed an addend into
557bdd1243dSDimitry Andric     // the instruction stream. On X86, a relocatable address field always
558bdd1243dSDimitry Andric     // occupies an entire contiguous sequence of byte(s), so there is no need to
559bdd1243dSDimitry Andric     // merge opcode bits with address bits. Therefore, it's easy and convenient
560bdd1243dSDimitry Andric     // to store addends in the instruction-stream bytes that would otherwise
561bdd1243dSDimitry Andric     // contain zeroes. By contrast, RISC ISAs such as ARM64 mix opcode bits with
562bdd1243dSDimitry Andric     // address bits so that bitwise arithmetic is necessary to extract and
563bdd1243dSDimitry Andric     // insert them. Storing addends in the instruction stream is possible, but
564bdd1243dSDimitry Andric     // inconvenient and more costly at link time.
565e8d8bef9SDimitry Andric 
566fe6060f1SDimitry Andric     relocation_info relInfo = relInfos[i];
567349cc55cSDimitry Andric     bool isSubtrahend =
568349cc55cSDimitry Andric         target->hasAttr(relInfo.r_type, RelocAttrBits::SUBTRAHEND);
569349cc55cSDimitry Andric     int64_t pairedAddend = 0;
570fe6060f1SDimitry Andric     if (target->hasAttr(relInfo.r_type, RelocAttrBits::ADDEND)) {
571fe6060f1SDimitry Andric       pairedAddend = SignExtend64<24>(relInfo.r_symbolnum);
572fe6060f1SDimitry Andric       relInfo = relInfos[++i];
573fe6060f1SDimitry Andric     }
574e8d8bef9SDimitry Andric     assert(i < relInfos.size());
575fe6060f1SDimitry Andric     if (!validateRelocationInfo(this, sec, relInfo))
576fe6060f1SDimitry Andric       continue;
577e8d8bef9SDimitry Andric     if (relInfo.r_address & R_SCATTERED)
5785ffd83dbSDimitry Andric       fatal("TODO: Scattered relocations not supported");
5795ffd83dbSDimitry Andric 
580fe6060f1SDimitry Andric     int64_t embeddedAddend = target->getEmbeddedAddend(mb, sec.offset, relInfo);
581fe6060f1SDimitry Andric     assert(!(embeddedAddend && pairedAddend));
582fe6060f1SDimitry Andric     int64_t totalAddend = pairedAddend + embeddedAddend;
5835ffd83dbSDimitry Andric     Reloc r;
584e8d8bef9SDimitry Andric     r.type = relInfo.r_type;
585e8d8bef9SDimitry Andric     r.pcrel = relInfo.r_pcrel;
586e8d8bef9SDimitry Andric     r.length = relInfo.r_length;
587e8d8bef9SDimitry Andric     r.offset = relInfo.r_address;
588e8d8bef9SDimitry Andric     if (relInfo.r_extern) {
589e8d8bef9SDimitry Andric       r.referent = symbols[relInfo.r_symbolnum];
590fe6060f1SDimitry Andric       r.addend = isSubtrahend ? 0 : totalAddend;
5915ffd83dbSDimitry Andric     } else {
592fe6060f1SDimitry Andric       assert(!isSubtrahend);
593349cc55cSDimitry Andric       const SectionHeader &referentSecHead =
594349cc55cSDimitry Andric           sectionHeaders[relInfo.r_symbolnum - 1];
595fe6060f1SDimitry Andric       uint64_t referentOffset;
596e8d8bef9SDimitry Andric       if (relInfo.r_pcrel) {
5975ffd83dbSDimitry Andric         // The implicit addend for pcrel section relocations is the pcrel offset
5985ffd83dbSDimitry Andric         // in terms of the addresses in the input file. Here we adjust it so
599e8d8bef9SDimitry Andric         // that it describes the offset from the start of the referent section.
600fe6060f1SDimitry Andric         // FIXME This logic was written around x86_64 behavior -- ARM64 doesn't
601fe6060f1SDimitry Andric         // have pcrel section relocations. We may want to factor this out into
602fe6060f1SDimitry Andric         // the arch-specific .cpp file.
603fe6060f1SDimitry Andric         assert(target->hasAttr(r.type, RelocAttrBits::BYTE4));
604349cc55cSDimitry Andric         referentOffset = sec.addr + relInfo.r_address + 4 + totalAddend -
605349cc55cSDimitry Andric                          referentSecHead.addr;
6065ffd83dbSDimitry Andric       } else {
6075ffd83dbSDimitry Andric         // The addend for a non-pcrel relocation is its absolute address.
608349cc55cSDimitry Andric         referentOffset = totalAddend - referentSecHead.addr;
6095ffd83dbSDimitry Andric       }
61081ad6265SDimitry Andric       r.referent = findContainingSubsection(*sections[relInfo.r_symbolnum - 1],
61181ad6265SDimitry Andric                                             &referentOffset);
612e8d8bef9SDimitry Andric       r.addend = referentOffset;
6135ffd83dbSDimitry Andric     }
6145ffd83dbSDimitry Andric 
615fe6060f1SDimitry Andric     // Find the subsection that this relocation belongs to.
616fe6060f1SDimitry Andric     // Though not required by the Mach-O format, clang and gcc seem to emit
617fe6060f1SDimitry Andric     // relocations in order, so let's take advantage of it. However, ld64 emits
618fe6060f1SDimitry Andric     // unsorted relocations (in `-r` mode), so we have a fallback for that
619fe6060f1SDimitry Andric     // uncommon case.
620fe6060f1SDimitry Andric     InputSection *subsec;
621349cc55cSDimitry Andric     while (subsecIt != subsections.rend() && subsecIt->offset > r.offset)
622fe6060f1SDimitry Andric       ++subsecIt;
623349cc55cSDimitry Andric     if (subsecIt == subsections.rend() ||
624fe6060f1SDimitry Andric         subsecIt->offset + subsecIt->isec->getSize() <= r.offset) {
62581ad6265SDimitry Andric       subsec = findContainingSubsection(section, &r.offset);
626fe6060f1SDimitry Andric       // Now that we know the relocs are unsorted, avoid trying the 'fast path'
627fe6060f1SDimitry Andric       // for the other relocations.
628349cc55cSDimitry Andric       subsecIt = subsections.rend();
629fe6060f1SDimitry Andric     } else {
630fe6060f1SDimitry Andric       subsec = subsecIt->isec;
631fe6060f1SDimitry Andric       r.offset -= subsecIt->offset;
632fe6060f1SDimitry Andric     }
6335ffd83dbSDimitry Andric     subsec->relocs.push_back(r);
634fe6060f1SDimitry Andric 
635fe6060f1SDimitry Andric     if (isSubtrahend) {
636fe6060f1SDimitry Andric       relocation_info minuendInfo = relInfos[++i];
637fe6060f1SDimitry Andric       // SUBTRACTOR relocations should always be followed by an UNSIGNED one
638fe6060f1SDimitry Andric       // attached to the same address.
639fe6060f1SDimitry Andric       assert(target->hasAttr(minuendInfo.r_type, RelocAttrBits::UNSIGNED) &&
640fe6060f1SDimitry Andric              relInfo.r_address == minuendInfo.r_address);
641fe6060f1SDimitry Andric       Reloc p;
642fe6060f1SDimitry Andric       p.type = minuendInfo.r_type;
643fe6060f1SDimitry Andric       if (minuendInfo.r_extern) {
644fe6060f1SDimitry Andric         p.referent = symbols[minuendInfo.r_symbolnum];
645fe6060f1SDimitry Andric         p.addend = totalAddend;
646fe6060f1SDimitry Andric       } else {
647fe6060f1SDimitry Andric         uint64_t referentOffset =
648fe6060f1SDimitry Andric             totalAddend - sectionHeaders[minuendInfo.r_symbolnum - 1].addr;
64981ad6265SDimitry Andric         p.referent = findContainingSubsection(
65081ad6265SDimitry Andric             *sections[minuendInfo.r_symbolnum - 1], &referentOffset);
651fe6060f1SDimitry Andric         p.addend = referentOffset;
652fe6060f1SDimitry Andric       }
653fe6060f1SDimitry Andric       subsec->relocs.push_back(p);
654fe6060f1SDimitry Andric     }
6555ffd83dbSDimitry Andric   }
6565ffd83dbSDimitry Andric }
6575ffd83dbSDimitry Andric 
658fe6060f1SDimitry Andric template <class NList>
createDefined(const NList & sym,StringRef name,InputSection * isec,uint64_t value,uint64_t size,bool forceHidden)659fe6060f1SDimitry Andric static macho::Symbol *createDefined(const NList &sym, StringRef name,
660fe6060f1SDimitry Andric                                     InputSection *isec, uint64_t value,
661972a253aSDimitry Andric                                     uint64_t size, bool forceHidden) {
662e8d8bef9SDimitry Andric   // Symbol scope is determined by sym.n_type & (N_EXT | N_PEXT):
663fe6060f1SDimitry Andric   // N_EXT: Global symbols. These go in the symbol table during the link,
664fe6060f1SDimitry Andric   //        and also in the export table of the output so that the dynamic
665fe6060f1SDimitry Andric   //        linker sees them.
666fe6060f1SDimitry Andric   // N_EXT | N_PEXT: Linkage unit (think: dylib) scoped. These go in the
667fe6060f1SDimitry Andric   //                 symbol table during the link so that duplicates are
668fe6060f1SDimitry Andric   //                 either reported (for non-weak symbols) or merged
669fe6060f1SDimitry Andric   //                 (for weak symbols), but they do not go in the export
670fe6060f1SDimitry Andric   //                 table of the output.
671fe6060f1SDimitry Andric   // N_PEXT: llvm-mc does not emit these, but `ld -r` (wherein ld64 emits
672fe6060f1SDimitry Andric   //         object files) may produce them. LLD does not yet support -r.
673fe6060f1SDimitry Andric   //         These are translation-unit scoped, identical to the `0` case.
674fe6060f1SDimitry Andric   // 0: Translation-unit scoped. These are not in the symbol table during
675fe6060f1SDimitry Andric   //    link, and not in the export table of the output either.
676fe6060f1SDimitry Andric   bool isWeakDefCanBeHidden =
677fe6060f1SDimitry Andric       (sym.n_desc & (N_WEAK_DEF | N_WEAK_REF)) == (N_WEAK_DEF | N_WEAK_REF);
678e8d8bef9SDimitry Andric 
67906c3fb27SDimitry Andric   assert(!(sym.n_desc & N_ARM_THUMB_DEF) && "ARM32 arch is not supported");
68006c3fb27SDimitry Andric 
681fe6060f1SDimitry Andric   if (sym.n_type & N_EXT) {
682972a253aSDimitry Andric     // -load_hidden makes us treat global symbols as linkage unit scoped.
683972a253aSDimitry Andric     // Duplicates are reported but the symbol does not go in the export trie.
684972a253aSDimitry Andric     bool isPrivateExtern = sym.n_type & N_PEXT || forceHidden;
685972a253aSDimitry Andric 
686fe6060f1SDimitry Andric     // lld's behavior for merging symbols is slightly different from ld64:
687fe6060f1SDimitry Andric     // ld64 picks the winning symbol based on several criteria (see
688fe6060f1SDimitry Andric     // pickBetweenRegularAtoms() in ld64's SymbolTable.cpp), while lld
689fe6060f1SDimitry Andric     // just merges metadata and keeps the contents of the first symbol
690fe6060f1SDimitry Andric     // with that name (see SymbolTable::addDefined). For:
691fe6060f1SDimitry Andric     // * inline function F in a TU built with -fvisibility-inlines-hidden
692fe6060f1SDimitry Andric     // * and inline function F in another TU built without that flag
693fe6060f1SDimitry Andric     // ld64 will pick the one from the file built without
694fe6060f1SDimitry Andric     // -fvisibility-inlines-hidden.
695fe6060f1SDimitry Andric     // lld will instead pick the one listed first on the link command line and
696fe6060f1SDimitry Andric     // give it visibility as if the function was built without
697fe6060f1SDimitry Andric     // -fvisibility-inlines-hidden.
698fe6060f1SDimitry Andric     // If both functions have the same contents, this will have the same
699fe6060f1SDimitry Andric     // behavior. If not, it won't, but the input had an ODR violation in
700fe6060f1SDimitry Andric     // that case.
701fe6060f1SDimitry Andric     //
702fe6060f1SDimitry Andric     // Similarly, merging a symbol
703fe6060f1SDimitry Andric     // that's isPrivateExtern and not isWeakDefCanBeHidden with one
704fe6060f1SDimitry Andric     // that's not isPrivateExtern but isWeakDefCanBeHidden technically
705fe6060f1SDimitry Andric     // should produce one
706fe6060f1SDimitry Andric     // that's not isPrivateExtern but isWeakDefCanBeHidden. That matters
707fe6060f1SDimitry Andric     // with ld64's semantics, because it means the non-private-extern
708fe6060f1SDimitry Andric     // definition will continue to take priority if more private extern
709fe6060f1SDimitry Andric     // definitions are encountered. With lld's semantics there's no observable
710349cc55cSDimitry Andric     // difference between a symbol that's isWeakDefCanBeHidden(autohide) or one
711349cc55cSDimitry Andric     // that's privateExtern -- neither makes it into the dynamic symbol table,
712349cc55cSDimitry Andric     // unless the autohide symbol is explicitly exported.
713349cc55cSDimitry Andric     // But if a symbol is both privateExtern and autohide then it can't
714349cc55cSDimitry Andric     // be exported.
715349cc55cSDimitry Andric     // So we nullify the autohide flag when privateExtern is present
716349cc55cSDimitry Andric     // and promote the symbol to privateExtern when it is not already.
717349cc55cSDimitry Andric     if (isWeakDefCanBeHidden && isPrivateExtern)
718349cc55cSDimitry Andric       isWeakDefCanBeHidden = false;
719349cc55cSDimitry Andric     else if (isWeakDefCanBeHidden)
720fe6060f1SDimitry Andric       isPrivateExtern = true;
721fe6060f1SDimitry Andric     return symtab->addDefined(
722fe6060f1SDimitry Andric         name, isec->getFile(), isec, value, size, sym.n_desc & N_WEAK_DEF,
72306c3fb27SDimitry Andric         isPrivateExtern, sym.n_desc & REFERENCED_DYNAMICALLY,
72406c3fb27SDimitry Andric         sym.n_desc & N_NO_DEAD_STRIP, isWeakDefCanBeHidden);
725e8d8bef9SDimitry Andric   }
726bdd1243dSDimitry Andric   bool includeInSymtab = !isPrivateLabel(name) && !isEhFrameSection(isec);
727fe6060f1SDimitry Andric   return make<Defined>(
728fe6060f1SDimitry Andric       name, isec->getFile(), isec, value, size, sym.n_desc & N_WEAK_DEF,
72981ad6265SDimitry Andric       /*isExternal=*/false, /*isPrivateExtern=*/false, includeInSymtab,
73006c3fb27SDimitry Andric       sym.n_desc & REFERENCED_DYNAMICALLY, sym.n_desc & N_NO_DEAD_STRIP);
731e8d8bef9SDimitry Andric }
732e8d8bef9SDimitry Andric 
733e8d8bef9SDimitry Andric // Absolute symbols are defined symbols that do not have an associated
734e8d8bef9SDimitry Andric // InputSection. They cannot be weak.
735fe6060f1SDimitry Andric template <class NList>
createAbsolute(const NList & sym,InputFile * file,StringRef name,bool forceHidden)736fe6060f1SDimitry Andric static macho::Symbol *createAbsolute(const NList &sym, InputFile *file,
737972a253aSDimitry Andric                                      StringRef name, bool forceHidden) {
73806c3fb27SDimitry Andric   assert(!(sym.n_desc & N_ARM_THUMB_DEF) && "ARM32 arch is not supported");
73906c3fb27SDimitry Andric 
740fe6060f1SDimitry Andric   if (sym.n_type & N_EXT) {
741972a253aSDimitry Andric     bool isPrivateExtern = sym.n_type & N_PEXT || forceHidden;
74206c3fb27SDimitry Andric     return symtab->addDefined(name, file, nullptr, sym.n_value, /*size=*/0,
74306c3fb27SDimitry Andric                               /*isWeakDef=*/false, isPrivateExtern,
74406c3fb27SDimitry Andric                               /*isReferencedDynamically=*/false,
74506c3fb27SDimitry Andric                               sym.n_desc & N_NO_DEAD_STRIP,
746349cc55cSDimitry Andric                               /*isWeakDefCanBeHidden=*/false);
747e8d8bef9SDimitry Andric   }
748fe6060f1SDimitry Andric   return make<Defined>(name, file, nullptr, sym.n_value, /*size=*/0,
749fe6060f1SDimitry Andric                        /*isWeakDef=*/false,
750fe6060f1SDimitry Andric                        /*isExternal=*/false, /*isPrivateExtern=*/false,
75106c3fb27SDimitry Andric                        /*includeInSymtab=*/true,
752fe6060f1SDimitry Andric                        /*isReferencedDynamically=*/false,
753fe6060f1SDimitry Andric                        sym.n_desc & N_NO_DEAD_STRIP);
754e8d8bef9SDimitry Andric }
755e8d8bef9SDimitry Andric 
756fe6060f1SDimitry Andric template <class NList>
parseNonSectionSymbol(const NList & sym,const char * strtab)757fe6060f1SDimitry Andric macho::Symbol *ObjFile::parseNonSectionSymbol(const NList &sym,
758bdd1243dSDimitry Andric                                               const char *strtab) {
759bdd1243dSDimitry Andric   StringRef name = StringRef(strtab + sym.n_strx);
760e8d8bef9SDimitry Andric   uint8_t type = sym.n_type & N_TYPE;
761972a253aSDimitry Andric   bool isPrivateExtern = sym.n_type & N_PEXT || forceHidden;
762e8d8bef9SDimitry Andric   switch (type) {
763e8d8bef9SDimitry Andric   case N_UNDF:
764e8d8bef9SDimitry Andric     return sym.n_value == 0
765fe6060f1SDimitry Andric                ? symtab->addUndefined(name, this, sym.n_desc & N_WEAK_REF)
766e8d8bef9SDimitry Andric                : symtab->addCommon(name, this, sym.n_value,
767e8d8bef9SDimitry Andric                                    1 << GET_COMM_ALIGN(sym.n_desc),
768972a253aSDimitry Andric                                    isPrivateExtern);
769e8d8bef9SDimitry Andric   case N_ABS:
770972a253aSDimitry Andric     return createAbsolute(sym, this, name, forceHidden);
771bdd1243dSDimitry Andric   case N_INDR: {
772bdd1243dSDimitry Andric     // Not much point in making local aliases -- relocs in the current file can
773bdd1243dSDimitry Andric     // just refer to the actual symbol itself. ld64 ignores these symbols too.
774bdd1243dSDimitry Andric     if (!(sym.n_type & N_EXT))
775bdd1243dSDimitry Andric       return nullptr;
776bdd1243dSDimitry Andric     StringRef aliasedName = StringRef(strtab + sym.n_value);
777bdd1243dSDimitry Andric     // isPrivateExtern is the only symbol flag that has an impact on the final
778bdd1243dSDimitry Andric     // aliased symbol.
77906c3fb27SDimitry Andric     auto *alias = make<AliasSymbol>(this, name, aliasedName, isPrivateExtern);
780bdd1243dSDimitry Andric     aliases.push_back(alias);
781bdd1243dSDimitry Andric     return alias;
782bdd1243dSDimitry Andric   }
783e8d8bef9SDimitry Andric   case N_PBUD:
784bdd1243dSDimitry Andric     error("TODO: support symbols of type N_PBUD");
785e8d8bef9SDimitry Andric     return nullptr;
786e8d8bef9SDimitry Andric   case N_SECT:
787e8d8bef9SDimitry Andric     llvm_unreachable(
788e8d8bef9SDimitry Andric         "N_SECT symbols should not be passed to parseNonSectionSymbol");
789e8d8bef9SDimitry Andric   default:
790e8d8bef9SDimitry Andric     llvm_unreachable("invalid symbol type");
791e8d8bef9SDimitry Andric   }
792e8d8bef9SDimitry Andric }
793e8d8bef9SDimitry Andric 
isUndef(const NList & sym)794349cc55cSDimitry Andric template <class NList> static bool isUndef(const NList &sym) {
795fe6060f1SDimitry Andric   return (sym.n_type & N_TYPE) == N_UNDF && sym.n_value == 0;
796fe6060f1SDimitry Andric }
797fe6060f1SDimitry Andric 
798fe6060f1SDimitry Andric template <class LP>
parseSymbols(ArrayRef<typename LP::section> sectionHeaders,ArrayRef<typename LP::nlist> nList,const char * strtab,bool subsectionsViaSymbols)799fe6060f1SDimitry Andric void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
800fe6060f1SDimitry Andric                            ArrayRef<typename LP::nlist> nList,
8015ffd83dbSDimitry Andric                            const char *strtab, bool subsectionsViaSymbols) {
802fe6060f1SDimitry Andric   using NList = typename LP::nlist;
803fe6060f1SDimitry Andric 
804fe6060f1SDimitry Andric   // Groups indices of the symbols by the sections that contain them.
805349cc55cSDimitry Andric   std::vector<std::vector<uint32_t>> symbolsBySection(sections.size());
8065ffd83dbSDimitry Andric   symbols.resize(nList.size());
807fe6060f1SDimitry Andric   SmallVector<unsigned, 32> undefineds;
808fe6060f1SDimitry Andric   for (uint32_t i = 0; i < nList.size(); ++i) {
809fe6060f1SDimitry Andric     const NList &sym = nList[i];
8105ffd83dbSDimitry Andric 
811fe6060f1SDimitry Andric     // Ignore debug symbols for now.
812fe6060f1SDimitry Andric     // FIXME: may need special handling.
813fe6060f1SDimitry Andric     if (sym.n_type & N_STAB)
814fe6060f1SDimitry Andric       continue;
815fe6060f1SDimitry Andric 
816fe6060f1SDimitry Andric     if ((sym.n_type & N_TYPE) == N_SECT) {
81781ad6265SDimitry Andric       Subsections &subsections = sections[sym.n_sect - 1]->subsections;
818fe6060f1SDimitry Andric       // parseSections() may have chosen not to parse this section.
819349cc55cSDimitry Andric       if (subsections.empty())
820fe6060f1SDimitry Andric         continue;
821fe6060f1SDimitry Andric       symbolsBySection[sym.n_sect - 1].push_back(i);
822fe6060f1SDimitry Andric     } else if (isUndef(sym)) {
823fe6060f1SDimitry Andric       undefineds.push_back(i);
824fe6060f1SDimitry Andric     } else {
825bdd1243dSDimitry Andric       symbols[i] = parseNonSectionSymbol(sym, strtab);
826fe6060f1SDimitry Andric     }
827fe6060f1SDimitry Andric   }
8285ffd83dbSDimitry Andric 
829349cc55cSDimitry Andric   for (size_t i = 0; i < sections.size(); ++i) {
83081ad6265SDimitry Andric     Subsections &subsections = sections[i]->subsections;
831349cc55cSDimitry Andric     if (subsections.empty())
832fe6060f1SDimitry Andric       continue;
833fe6060f1SDimitry Andric     std::vector<uint32_t> &symbolIndices = symbolsBySection[i];
834fe6060f1SDimitry Andric     uint64_t sectionAddr = sectionHeaders[i].addr;
835fe6060f1SDimitry Andric     uint32_t sectionAlign = 1u << sectionHeaders[i].align;
836fe6060f1SDimitry Andric 
83781ad6265SDimitry Andric     // Some sections have already been split into subsections during
838fe6060f1SDimitry Andric     // parseSections(), so we simply need to match Symbols to the corresponding
839fe6060f1SDimitry Andric     // subsection here.
84081ad6265SDimitry Andric     if (sections[i]->doneSplitting) {
841fe6060f1SDimitry Andric       for (size_t j = 0; j < symbolIndices.size(); ++j) {
842bdd1243dSDimitry Andric         const uint32_t symIndex = symbolIndices[j];
843fe6060f1SDimitry Andric         const NList &sym = nList[symIndex];
844fe6060f1SDimitry Andric         StringRef name = strtab + sym.n_strx;
845fe6060f1SDimitry Andric         uint64_t symbolOffset = sym.n_value - sectionAddr;
846349cc55cSDimitry Andric         InputSection *isec =
84781ad6265SDimitry Andric             findContainingSubsection(*sections[i], &symbolOffset);
848fe6060f1SDimitry Andric         if (symbolOffset != 0) {
84981ad6265SDimitry Andric           error(toString(*sections[i]) + ":  symbol " + name +
850fe6060f1SDimitry Andric                 " at misaligned offset");
851fe6060f1SDimitry Andric           continue;
852fe6060f1SDimitry Andric         }
853972a253aSDimitry Andric         symbols[symIndex] =
854972a253aSDimitry Andric             createDefined(sym, name, isec, 0, isec->getSize(), forceHidden);
855fe6060f1SDimitry Andric       }
8565ffd83dbSDimitry Andric       continue;
8575ffd83dbSDimitry Andric     }
85881ad6265SDimitry Andric     sections[i]->doneSplitting = true;
8595ffd83dbSDimitry Andric 
860bdd1243dSDimitry Andric     auto getSymName = [strtab](const NList& sym) -> StringRef {
861bdd1243dSDimitry Andric       return StringRef(strtab + sym.n_strx);
862bdd1243dSDimitry Andric     };
863bdd1243dSDimitry Andric 
864fe6060f1SDimitry Andric     // Calculate symbol sizes and create subsections by splitting the sections
865fe6060f1SDimitry Andric     // along symbol boundaries.
866349cc55cSDimitry Andric     // We populate subsections by repeatedly splitting the last (highest
867349cc55cSDimitry Andric     // address) subsection.
868fe6060f1SDimitry Andric     llvm::stable_sort(symbolIndices, [&](uint32_t lhs, uint32_t rhs) {
86906c3fb27SDimitry Andric       // Put extern weak symbols after other symbols at the same address so
87006c3fb27SDimitry Andric       // that weak symbol coalescing works correctly. See
87106c3fb27SDimitry Andric       // SymbolTable::addDefined() for details.
87206c3fb27SDimitry Andric       if (nList[lhs].n_value == nList[rhs].n_value &&
87306c3fb27SDimitry Andric           nList[lhs].n_type & N_EXT && nList[rhs].n_type & N_EXT)
87406c3fb27SDimitry Andric         return !(nList[lhs].n_desc & N_WEAK_DEF) && (nList[rhs].n_desc & N_WEAK_DEF);
875fe6060f1SDimitry Andric       return nList[lhs].n_value < nList[rhs].n_value;
876fe6060f1SDimitry Andric     });
877fe6060f1SDimitry Andric     for (size_t j = 0; j < symbolIndices.size(); ++j) {
878bdd1243dSDimitry Andric       const uint32_t symIndex = symbolIndices[j];
879fe6060f1SDimitry Andric       const NList &sym = nList[symIndex];
880bdd1243dSDimitry Andric       StringRef name = getSymName(sym);
881349cc55cSDimitry Andric       Subsection &subsec = subsections.back();
882349cc55cSDimitry Andric       InputSection *isec = subsec.isec;
883fe6060f1SDimitry Andric 
884349cc55cSDimitry Andric       uint64_t subsecAddr = sectionAddr + subsec.offset;
885fe6060f1SDimitry Andric       size_t symbolOffset = sym.n_value - subsecAddr;
886fe6060f1SDimitry Andric       uint64_t symbolSize =
887fe6060f1SDimitry Andric           j + 1 < symbolIndices.size()
888fe6060f1SDimitry Andric               ? nList[symbolIndices[j + 1]].n_value - sym.n_value
889fe6060f1SDimitry Andric               : isec->data.size() - symbolOffset;
890fe6060f1SDimitry Andric       // There are 4 cases where we do not need to create a new subsection:
891fe6060f1SDimitry Andric       //   1. If the input file does not use subsections-via-symbols.
892fe6060f1SDimitry Andric       //   2. Multiple symbols at the same address only induce one subsection.
893fe6060f1SDimitry Andric       //      (The symbolOffset == 0 check covers both this case as well as
894fe6060f1SDimitry Andric       //      the first loop iteration.)
895fe6060f1SDimitry Andric       //   3. Alternative entry points do not induce new subsections.
896fe6060f1SDimitry Andric       //   4. If we have a literal section (e.g. __cstring and __literal4).
897fe6060f1SDimitry Andric       if (!subsectionsViaSymbols || symbolOffset == 0 ||
898fe6060f1SDimitry Andric           sym.n_desc & N_ALT_ENTRY || !isa<ConcatInputSection>(isec)) {
899bdd1243dSDimitry Andric         isec->hasAltEntry = symbolOffset != 0;
900972a253aSDimitry Andric         symbols[symIndex] = createDefined(sym, name, isec, symbolOffset,
901972a253aSDimitry Andric                                           symbolSize, forceHidden);
9025ffd83dbSDimitry Andric         continue;
9035ffd83dbSDimitry Andric       }
904fe6060f1SDimitry Andric       auto *concatIsec = cast<ConcatInputSection>(isec);
9055ffd83dbSDimitry Andric 
906fe6060f1SDimitry Andric       auto *nextIsec = make<ConcatInputSection>(*concatIsec);
907fe6060f1SDimitry Andric       nextIsec->wasCoalesced = false;
908fe6060f1SDimitry Andric       if (isZeroFill(isec->getFlags())) {
909fe6060f1SDimitry Andric         // Zero-fill sections have NULL data.data() non-zero data.size()
910fe6060f1SDimitry Andric         nextIsec->data = {nullptr, isec->data.size() - symbolOffset};
911fe6060f1SDimitry Andric         isec->data = {nullptr, symbolOffset};
912fe6060f1SDimitry Andric       } else {
913fe6060f1SDimitry Andric         nextIsec->data = isec->data.slice(symbolOffset);
914fe6060f1SDimitry Andric         isec->data = isec->data.slice(0, symbolOffset);
9155ffd83dbSDimitry Andric       }
9165ffd83dbSDimitry Andric 
917fe6060f1SDimitry Andric       // By construction, the symbol will be at offset zero in the new
918fe6060f1SDimitry Andric       // subsection.
919972a253aSDimitry Andric       symbols[symIndex] = createDefined(sym, name, nextIsec, /*value=*/0,
920972a253aSDimitry Andric                                         symbolSize, forceHidden);
9215ffd83dbSDimitry Andric       // TODO: ld64 appears to preserve the original alignment as well as each
9225ffd83dbSDimitry Andric       // subsection's offset from the last aligned address. We should consider
9235ffd83dbSDimitry Andric       // emulating that behavior.
924fe6060f1SDimitry Andric       nextIsec->align = MinAlign(sectionAlign, sym.n_value);
925349cc55cSDimitry Andric       subsections.push_back({sym.n_value - sectionAddr, nextIsec});
926fe6060f1SDimitry Andric     }
9275ffd83dbSDimitry Andric   }
9285ffd83dbSDimitry Andric 
929fe6060f1SDimitry Andric   // Undefined symbols can trigger recursive fetch from Archives due to
930fe6060f1SDimitry Andric   // LazySymbols. Process defined symbols first so that the relative order
931fe6060f1SDimitry Andric   // between a defined symbol and an undefined symbol does not change the
932fe6060f1SDimitry Andric   // symbol resolution behavior. In addition, a set of interconnected symbols
933fe6060f1SDimitry Andric   // will all be resolved to the same file, instead of being resolved to
934fe6060f1SDimitry Andric   // different files.
935bdd1243dSDimitry Andric   for (unsigned i : undefineds)
936bdd1243dSDimitry Andric     symbols[i] = parseNonSectionSymbol(nList[i], strtab);
9375ffd83dbSDimitry Andric }
9385ffd83dbSDimitry Andric 
OpaqueFile(MemoryBufferRef mb,StringRef segName,StringRef sectName)939e8d8bef9SDimitry Andric OpaqueFile::OpaqueFile(MemoryBufferRef mb, StringRef segName,
940e8d8bef9SDimitry Andric                        StringRef sectName)
941e8d8bef9SDimitry Andric     : InputFile(OpaqueKind, mb) {
942e8d8bef9SDimitry Andric   const auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
943fe6060f1SDimitry Andric   ArrayRef<uint8_t> data = {buf, mb.getBufferSize()};
94481ad6265SDimitry Andric   sections.push_back(make<Section>(/*file=*/this, segName.take_front(16),
94581ad6265SDimitry Andric                                    sectName.take_front(16),
94681ad6265SDimitry Andric                                    /*flags=*/0, /*addr=*/0));
94781ad6265SDimitry Andric   Section &section = *sections.back();
94881ad6265SDimitry Andric   ConcatInputSection *isec = make<ConcatInputSection>(section, data);
949fe6060f1SDimitry Andric   isec->live = true;
95081ad6265SDimitry Andric   section.subsections.push_back({0, isec});
951e8d8bef9SDimitry Andric }
952e8d8bef9SDimitry Andric 
953*5f757f3fSDimitry Andric template <class LP>
parseLinkerOptions(SmallVectorImpl<StringRef> & LCLinkerOptions)954*5f757f3fSDimitry Andric void ObjFile::parseLinkerOptions(SmallVectorImpl<StringRef> &LCLinkerOptions) {
955*5f757f3fSDimitry Andric   using Header = typename LP::mach_header;
956*5f757f3fSDimitry Andric   auto *hdr = reinterpret_cast<const Header *>(mb.getBufferStart());
957*5f757f3fSDimitry Andric 
958*5f757f3fSDimitry Andric   for (auto *cmd : findCommands<linker_option_command>(hdr, LC_LINKER_OPTION)) {
959*5f757f3fSDimitry Andric     StringRef data{reinterpret_cast<const char *>(cmd + 1),
960*5f757f3fSDimitry Andric                    cmd->cmdsize - sizeof(linker_option_command)};
961*5f757f3fSDimitry Andric     parseLCLinkerOption(LCLinkerOptions, this, cmd->count, data);
962*5f757f3fSDimitry Andric   }
963*5f757f3fSDimitry Andric }
964*5f757f3fSDimitry Andric 
965*5f757f3fSDimitry Andric SmallVector<StringRef> macho::unprocessedLCLinkerOptions;
ObjFile(MemoryBufferRef mb,uint32_t modTime,StringRef archiveName,bool lazy,bool forceHidden,bool compatArch,bool builtFromBitcode)96604eeddc0SDimitry Andric ObjFile::ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
967*5f757f3fSDimitry Andric                  bool lazy, bool forceHidden, bool compatArch,
968*5f757f3fSDimitry Andric                  bool builtFromBitcode)
969*5f757f3fSDimitry Andric     : InputFile(ObjKind, mb, lazy), modTime(modTime), forceHidden(forceHidden),
970*5f757f3fSDimitry Andric       builtFromBitcode(builtFromBitcode) {
971e8d8bef9SDimitry Andric   this->archiveName = std::string(archiveName);
972*5f757f3fSDimitry Andric   this->compatArch = compatArch;
97304eeddc0SDimitry Andric   if (lazy) {
97404eeddc0SDimitry Andric     if (target->wordSize == 8)
97504eeddc0SDimitry Andric       parseLazy<LP64>();
97604eeddc0SDimitry Andric     else
97704eeddc0SDimitry Andric       parseLazy<ILP32>();
97804eeddc0SDimitry Andric   } else {
979fe6060f1SDimitry Andric     if (target->wordSize == 8)
980fe6060f1SDimitry Andric       parse<LP64>();
981fe6060f1SDimitry Andric     else
982fe6060f1SDimitry Andric       parse<ILP32>();
983e8d8bef9SDimitry Andric   }
98404eeddc0SDimitry Andric }
985e8d8bef9SDimitry Andric 
parse()986fe6060f1SDimitry Andric template <class LP> void ObjFile::parse() {
987fe6060f1SDimitry Andric   using Header = typename LP::mach_header;
988fe6060f1SDimitry Andric   using SegmentCommand = typename LP::segment_command;
989349cc55cSDimitry Andric   using SectionHeader = typename LP::section;
990fe6060f1SDimitry Andric   using NList = typename LP::nlist;
991fe6060f1SDimitry Andric 
992fe6060f1SDimitry Andric   auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
993fe6060f1SDimitry Andric   auto *hdr = reinterpret_cast<const Header *>(mb.getBufferStart());
994fe6060f1SDimitry Andric 
995*5f757f3fSDimitry Andric   // If we've already checked the arch, then don't need to check again.
996*5f757f3fSDimitry Andric   if (!compatArch)
997fe6060f1SDimitry Andric     return;
998*5f757f3fSDimitry Andric   if (!(compatArch = compatWithTargetArch(this, hdr)))
999fe6060f1SDimitry Andric     return;
1000fe6060f1SDimitry Andric 
1001*5f757f3fSDimitry Andric   // We will resolve LC linker options once all native objects are loaded after
1002*5f757f3fSDimitry Andric   // LTO is finished.
1003*5f757f3fSDimitry Andric   SmallVector<StringRef, 4> LCLinkerOptions;
1004*5f757f3fSDimitry Andric   parseLinkerOptions<LP>(LCLinkerOptions);
1005*5f757f3fSDimitry Andric   unprocessedLCLinkerOptions.append(LCLinkerOptions);
1006fe6060f1SDimitry Andric 
1007349cc55cSDimitry Andric   ArrayRef<SectionHeader> sectionHeaders;
1008fe6060f1SDimitry Andric   if (const load_command *cmd = findCommand(hdr, LP::segmentLCType)) {
1009fe6060f1SDimitry Andric     auto *c = reinterpret_cast<const SegmentCommand *>(cmd);
1010349cc55cSDimitry Andric     sectionHeaders = ArrayRef<SectionHeader>{
1011349cc55cSDimitry Andric         reinterpret_cast<const SectionHeader *>(c + 1), c->nsects};
10125ffd83dbSDimitry Andric     parseSections(sectionHeaders);
10135ffd83dbSDimitry Andric   }
10145ffd83dbSDimitry Andric 
10155ffd83dbSDimitry Andric   // TODO: Error on missing LC_SYMTAB?
10165ffd83dbSDimitry Andric   if (const load_command *cmd = findCommand(hdr, LC_SYMTAB)) {
10175ffd83dbSDimitry Andric     auto *c = reinterpret_cast<const symtab_command *>(cmd);
1018fe6060f1SDimitry Andric     ArrayRef<NList> nList(reinterpret_cast<const NList *>(buf + c->symoff),
1019fe6060f1SDimitry Andric                           c->nsyms);
10205ffd83dbSDimitry Andric     const char *strtab = reinterpret_cast<const char *>(buf) + c->stroff;
10215ffd83dbSDimitry Andric     bool subsectionsViaSymbols = hdr->flags & MH_SUBSECTIONS_VIA_SYMBOLS;
1022fe6060f1SDimitry Andric     parseSymbols<LP>(sectionHeaders, nList, strtab, subsectionsViaSymbols);
10235ffd83dbSDimitry Andric   }
10245ffd83dbSDimitry Andric 
10255ffd83dbSDimitry Andric   // The relocations may refer to the symbols, so we parse them after we have
10265ffd83dbSDimitry Andric   // parsed all the symbols.
1027349cc55cSDimitry Andric   for (size_t i = 0, n = sections.size(); i < n; ++i)
102881ad6265SDimitry Andric     if (!sections[i]->subsections.empty())
102981ad6265SDimitry Andric       parseRelocations(sectionHeaders, sectionHeaders[i], *sections[i]);
103081ad6265SDimitry Andric 
1031e8d8bef9SDimitry Andric   parseDebugInfo();
103281ad6265SDimitry Andric 
103381ad6265SDimitry Andric   Section *ehFrameSection = nullptr;
103481ad6265SDimitry Andric   Section *compactUnwindSection = nullptr;
103581ad6265SDimitry Andric   for (Section *sec : sections) {
103681ad6265SDimitry Andric     Section **s = StringSwitch<Section **>(sec->name)
103781ad6265SDimitry Andric                       .Case(section_names::compactUnwind, &compactUnwindSection)
103881ad6265SDimitry Andric                       .Case(section_names::ehFrame, &ehFrameSection)
103981ad6265SDimitry Andric                       .Default(nullptr);
104081ad6265SDimitry Andric     if (s)
104181ad6265SDimitry Andric       *s = sec;
104281ad6265SDimitry Andric   }
1043349cc55cSDimitry Andric   if (compactUnwindSection)
104481ad6265SDimitry Andric     registerCompactUnwind(*compactUnwindSection);
1045753f127fSDimitry Andric   if (ehFrameSection)
104681ad6265SDimitry Andric     registerEhFrames(*ehFrameSection);
1047e8d8bef9SDimitry Andric }
1048e8d8bef9SDimitry Andric 
parseLazy()104904eeddc0SDimitry Andric template <class LP> void ObjFile::parseLazy() {
105004eeddc0SDimitry Andric   using Header = typename LP::mach_header;
105104eeddc0SDimitry Andric   using NList = typename LP::nlist;
105204eeddc0SDimitry Andric 
105304eeddc0SDimitry Andric   auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
105404eeddc0SDimitry Andric   auto *hdr = reinterpret_cast<const Header *>(mb.getBufferStart());
1055*5f757f3fSDimitry Andric 
1056*5f757f3fSDimitry Andric   if (!compatArch)
1057*5f757f3fSDimitry Andric     return;
1058*5f757f3fSDimitry Andric   if (!(compatArch = compatWithTargetArch(this, hdr)))
1059*5f757f3fSDimitry Andric     return;
1060*5f757f3fSDimitry Andric 
106104eeddc0SDimitry Andric   const load_command *cmd = findCommand(hdr, LC_SYMTAB);
106204eeddc0SDimitry Andric   if (!cmd)
106304eeddc0SDimitry Andric     return;
106404eeddc0SDimitry Andric   auto *c = reinterpret_cast<const symtab_command *>(cmd);
106504eeddc0SDimitry Andric   ArrayRef<NList> nList(reinterpret_cast<const NList *>(buf + c->symoff),
106604eeddc0SDimitry Andric                         c->nsyms);
106704eeddc0SDimitry Andric   const char *strtab = reinterpret_cast<const char *>(buf) + c->stroff;
106804eeddc0SDimitry Andric   symbols.resize(nList.size());
1069bdd1243dSDimitry Andric   for (const auto &[i, sym] : llvm::enumerate(nList)) {
107004eeddc0SDimitry Andric     if ((sym.n_type & N_EXT) && !isUndef(sym)) {
107104eeddc0SDimitry Andric       // TODO: Bound checking
107204eeddc0SDimitry Andric       StringRef name = strtab + sym.n_strx;
1073bdd1243dSDimitry Andric       symbols[i] = symtab->addLazyObject(name, *this);
107404eeddc0SDimitry Andric       if (!lazy)
107504eeddc0SDimitry Andric         break;
107604eeddc0SDimitry Andric     }
107704eeddc0SDimitry Andric   }
107804eeddc0SDimitry Andric }
107904eeddc0SDimitry Andric 
parseDebugInfo()1080e8d8bef9SDimitry Andric void ObjFile::parseDebugInfo() {
1081e8d8bef9SDimitry Andric   std::unique_ptr<DwarfObject> dObj = DwarfObject::create(this);
1082e8d8bef9SDimitry Andric   if (!dObj)
1083e8d8bef9SDimitry Andric     return;
1084e8d8bef9SDimitry Andric 
108581ad6265SDimitry Andric   // We do not re-use the context from getDwarf() here as that function
108681ad6265SDimitry Andric   // constructs an expensive DWARFCache object.
1087e8d8bef9SDimitry Andric   auto *ctx = make<DWARFContext>(
1088e8d8bef9SDimitry Andric       std::move(dObj), "",
1089e8d8bef9SDimitry Andric       [&](Error err) {
1090e8d8bef9SDimitry Andric         warn(toString(this) + ": " + toString(std::move(err)));
1091e8d8bef9SDimitry Andric       },
1092e8d8bef9SDimitry Andric       [&](Error warning) {
1093e8d8bef9SDimitry Andric         warn(toString(this) + ": " + toString(std::move(warning)));
1094e8d8bef9SDimitry Andric       });
1095e8d8bef9SDimitry Andric 
1096e8d8bef9SDimitry Andric   // TODO: Since object files can contain a lot of DWARF info, we should verify
1097e8d8bef9SDimitry Andric   // that we are parsing just the info we need
1098e8d8bef9SDimitry Andric   const DWARFContext::compile_unit_range &units = ctx->compile_units();
1099fe6060f1SDimitry Andric   // FIXME: There can be more than one compile unit per object file. See
1100fe6060f1SDimitry Andric   // PR48637.
1101e8d8bef9SDimitry Andric   auto it = units.begin();
110281ad6265SDimitry Andric   compileUnit = it != units.end() ? it->get() : nullptr;
1103fe6060f1SDimitry Andric }
1104fe6060f1SDimitry Andric 
getDataInCode() const11050eae32dcSDimitry Andric ArrayRef<data_in_code_entry> ObjFile::getDataInCode() const {
1106fe6060f1SDimitry Andric   const auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
1107fe6060f1SDimitry Andric   const load_command *cmd = findCommand(buf, LC_DATA_IN_CODE);
1108fe6060f1SDimitry Andric   if (!cmd)
11090eae32dcSDimitry Andric     return {};
1110fe6060f1SDimitry Andric   const auto *c = reinterpret_cast<const linkedit_data_command *>(cmd);
11110eae32dcSDimitry Andric   return {reinterpret_cast<const data_in_code_entry *>(buf + c->dataoff),
1112fe6060f1SDimitry Andric           c->datasize / sizeof(data_in_code_entry)};
1113e8d8bef9SDimitry Andric }
1114e8d8bef9SDimitry Andric 
getOptimizationHints() const1115bdd1243dSDimitry Andric ArrayRef<uint8_t> ObjFile::getOptimizationHints() const {
1116bdd1243dSDimitry Andric   const auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
1117bdd1243dSDimitry Andric   if (auto *cmd =
1118bdd1243dSDimitry Andric           findCommand<linkedit_data_command>(buf, LC_LINKER_OPTIMIZATION_HINT))
1119bdd1243dSDimitry Andric     return {buf + cmd->dataoff, cmd->datasize};
1120bdd1243dSDimitry Andric   return {};
1121bdd1243dSDimitry Andric }
1122bdd1243dSDimitry Andric 
1123349cc55cSDimitry Andric // Create pointers from symbols to their associated compact unwind entries.
registerCompactUnwind(Section & compactUnwindSection)112481ad6265SDimitry Andric void ObjFile::registerCompactUnwind(Section &compactUnwindSection) {
112581ad6265SDimitry Andric   for (const Subsection &subsection : compactUnwindSection.subsections) {
1126349cc55cSDimitry Andric     ConcatInputSection *isec = cast<ConcatInputSection>(subsection.isec);
1127fcaf7f86SDimitry Andric     // Hack!! Each compact unwind entry (CUE) has its UNSIGNED relocations embed
1128fcaf7f86SDimitry Andric     // their addends in its data. Thus if ICF operated naively and compared the
1129fcaf7f86SDimitry Andric     // entire contents of each CUE, entries with identical unwind info but e.g.
1130fcaf7f86SDimitry Andric     // belonging to different functions would never be considered equivalent. To
1131fcaf7f86SDimitry Andric     // work around this problem, we remove some parts of the data containing the
1132fcaf7f86SDimitry Andric     // embedded addends. In particular, we remove the function address and LSDA
1133fcaf7f86SDimitry Andric     // pointers.  Since these locations are at the start and end of the entry,
1134fcaf7f86SDimitry Andric     // we can do this using a simple, efficient slice rather than performing a
1135fcaf7f86SDimitry Andric     // copy.  We are not losing any information here because the embedded
1136fcaf7f86SDimitry Andric     // addends have already been parsed in the corresponding Reloc structs.
1137fcaf7f86SDimitry Andric     //
1138fcaf7f86SDimitry Andric     // Removing these pointers would not be safe if they were pointers to
1139fcaf7f86SDimitry Andric     // absolute symbols. In that case, there would be no corresponding
1140fcaf7f86SDimitry Andric     // relocation. However, (AFAIK) MC cannot emit references to absolute
1141fcaf7f86SDimitry Andric     // symbols for either the function address or the LSDA. However, it *can* do
1142fcaf7f86SDimitry Andric     // so for the personality pointer, so we are not slicing that field away.
1143fcaf7f86SDimitry Andric     //
1144fcaf7f86SDimitry Andric     // Note that we do not adjust the offsets of the corresponding relocations;
1145fcaf7f86SDimitry Andric     // instead, we rely on `relocateCompactUnwind()` to correctly handle these
1146fcaf7f86SDimitry Andric     // truncated input sections.
1147fcaf7f86SDimitry Andric     isec->data = isec->data.slice(target->wordSize, 8 + target->wordSize);
114881ad6265SDimitry Andric     uint32_t encoding = read32le(isec->data.data() + sizeof(uint32_t));
114981ad6265SDimitry Andric     // llvm-mc omits CU entries for functions that need DWARF encoding, but
115081ad6265SDimitry Andric     // `ld -r` doesn't. We can ignore them because we will re-synthesize these
115181ad6265SDimitry Andric     // CU entries from the DWARF info during the output phase.
1152bdd1243dSDimitry Andric     if ((encoding & static_cast<uint32_t>(UNWIND_MODE_MASK)) ==
1153bdd1243dSDimitry Andric         target->modeDwarfEncoding)
115481ad6265SDimitry Andric       continue;
1155349cc55cSDimitry Andric 
1156349cc55cSDimitry Andric     ConcatInputSection *referentIsec;
1157349cc55cSDimitry Andric     for (auto it = isec->relocs.begin(); it != isec->relocs.end();) {
1158349cc55cSDimitry Andric       Reloc &r = *it;
1159349cc55cSDimitry Andric       // CUE::functionAddress is at offset 0. Skip personality & LSDA relocs.
1160349cc55cSDimitry Andric       if (r.offset != 0) {
1161349cc55cSDimitry Andric         ++it;
1162349cc55cSDimitry Andric         continue;
1163349cc55cSDimitry Andric       }
1164349cc55cSDimitry Andric       uint64_t add = r.addend;
1165349cc55cSDimitry Andric       if (auto *sym = cast_or_null<Defined>(r.referent.dyn_cast<Symbol *>())) {
1166349cc55cSDimitry Andric         // Check whether the symbol defined in this file is the prevailing one.
1167349cc55cSDimitry Andric         // Skip if it is e.g. a weak def that didn't prevail.
1168349cc55cSDimitry Andric         if (sym->getFile() != this) {
1169349cc55cSDimitry Andric           ++it;
1170349cc55cSDimitry Andric           continue;
1171349cc55cSDimitry Andric         }
1172349cc55cSDimitry Andric         add += sym->value;
1173349cc55cSDimitry Andric         referentIsec = cast<ConcatInputSection>(sym->isec);
1174349cc55cSDimitry Andric       } else {
1175349cc55cSDimitry Andric         referentIsec =
1176349cc55cSDimitry Andric             cast<ConcatInputSection>(r.referent.dyn_cast<InputSection *>());
1177349cc55cSDimitry Andric       }
117881ad6265SDimitry Andric       // Unwind info lives in __DATA, and finalization of __TEXT will occur
117981ad6265SDimitry Andric       // before finalization of __DATA. Moreover, the finalization of unwind
118081ad6265SDimitry Andric       // info depends on the exact addresses that it references. So it is safe
118181ad6265SDimitry Andric       // for compact unwind to reference addresses in __TEXT, but not addresses
118281ad6265SDimitry Andric       // in any other segment.
1183349cc55cSDimitry Andric       if (referentIsec->getSegName() != segment_names::text)
118481ad6265SDimitry Andric         error(isec->getLocation(r.offset) + " references section " +
118581ad6265SDimitry Andric               referentIsec->getName() + " which is not in segment __TEXT");
1186349cc55cSDimitry Andric       // The functionAddress relocations are typically section relocations.
1187349cc55cSDimitry Andric       // However, unwind info operates on a per-symbol basis, so we search for
1188349cc55cSDimitry Andric       // the function symbol here.
118981ad6265SDimitry Andric       Defined *d = findSymbolAtOffset(referentIsec, add);
119081ad6265SDimitry Andric       if (!d) {
1191349cc55cSDimitry Andric         ++it;
1192349cc55cSDimitry Andric         continue;
1193349cc55cSDimitry Andric       }
119481ad6265SDimitry Andric       d->unwindEntry = isec;
1195fcaf7f86SDimitry Andric       // Now that the symbol points to the unwind entry, we can remove the reloc
1196fcaf7f86SDimitry Andric       // that points from the unwind entry back to the symbol.
1197fcaf7f86SDimitry Andric       //
1198fcaf7f86SDimitry Andric       // First, the symbol keeps the unwind entry alive (and not vice versa), so
1199fcaf7f86SDimitry Andric       // this keeps dead-stripping simple.
1200fcaf7f86SDimitry Andric       //
1201fcaf7f86SDimitry Andric       // Moreover, it reduces the work that ICF needs to do to figure out if
1202fcaf7f86SDimitry Andric       // functions with unwind info are foldable.
1203fcaf7f86SDimitry Andric       //
1204fcaf7f86SDimitry Andric       // However, this does make it possible for ICF to fold CUEs that point to
1205fcaf7f86SDimitry Andric       // distinct functions (if the CUEs are otherwise identical).
1206fcaf7f86SDimitry Andric       // UnwindInfoSection takes care of this by re-duplicating the CUEs so that
1207fcaf7f86SDimitry Andric       // each one can hold a distinct functionAddress value.
1208fcaf7f86SDimitry Andric       //
1209fcaf7f86SDimitry Andric       // Given that clang emits relocations in reverse order of address, this
1210fcaf7f86SDimitry Andric       // relocation should be at the end of the vector for most of our input
1211fcaf7f86SDimitry Andric       // object files, so this erase() is typically an O(1) operation.
1212349cc55cSDimitry Andric       it = isec->relocs.erase(it);
1213349cc55cSDimitry Andric     }
1214349cc55cSDimitry Andric   }
1215349cc55cSDimitry Andric }
1216349cc55cSDimitry Andric 
121781ad6265SDimitry Andric struct CIE {
121881ad6265SDimitry Andric   macho::Symbol *personalitySymbol = nullptr;
121981ad6265SDimitry Andric   bool fdesHaveAug = false;
122061cfbce3SDimitry Andric   uint8_t lsdaPtrSize = 0; // 0 => no LSDA
122161cfbce3SDimitry Andric   uint8_t funcPtrSize = 0;
122281ad6265SDimitry Andric };
122381ad6265SDimitry Andric 
pointerEncodingToSize(uint8_t enc)122461cfbce3SDimitry Andric static uint8_t pointerEncodingToSize(uint8_t enc) {
122561cfbce3SDimitry Andric   switch (enc & 0xf) {
122661cfbce3SDimitry Andric   case dwarf::DW_EH_PE_absptr:
122761cfbce3SDimitry Andric     return target->wordSize;
122861cfbce3SDimitry Andric   case dwarf::DW_EH_PE_sdata4:
122961cfbce3SDimitry Andric     return 4;
123061cfbce3SDimitry Andric   case dwarf::DW_EH_PE_sdata8:
123161cfbce3SDimitry Andric     // ld64 doesn't actually support sdata8, but this seems simple enough...
123261cfbce3SDimitry Andric     return 8;
123361cfbce3SDimitry Andric   default:
123461cfbce3SDimitry Andric     return 0;
123561cfbce3SDimitry Andric   };
123661cfbce3SDimitry Andric }
123761cfbce3SDimitry Andric 
parseCIE(const InputSection * isec,const EhReader & reader,size_t off)123881ad6265SDimitry Andric static CIE parseCIE(const InputSection *isec, const EhReader &reader,
123981ad6265SDimitry Andric                     size_t off) {
124081ad6265SDimitry Andric   // Handling the full generality of possible DWARF encodings would be a major
124181ad6265SDimitry Andric   // pain. We instead take advantage of our knowledge of how llvm-mc encodes
124281ad6265SDimitry Andric   // DWARF and handle just that.
124381ad6265SDimitry Andric   constexpr uint8_t expectedPersonalityEnc =
124481ad6265SDimitry Andric       dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_sdata4;
124581ad6265SDimitry Andric 
124681ad6265SDimitry Andric   CIE cie;
124781ad6265SDimitry Andric   uint8_t version = reader.readByte(&off);
124881ad6265SDimitry Andric   if (version != 1 && version != 3)
124981ad6265SDimitry Andric     fatal("Expected CIE version of 1 or 3, got " + Twine(version));
125081ad6265SDimitry Andric   StringRef aug = reader.readString(&off);
125181ad6265SDimitry Andric   reader.skipLeb128(&off); // skip code alignment
125281ad6265SDimitry Andric   reader.skipLeb128(&off); // skip data alignment
125381ad6265SDimitry Andric   reader.skipLeb128(&off); // skip return address register
125481ad6265SDimitry Andric   reader.skipLeb128(&off); // skip aug data length
125581ad6265SDimitry Andric   uint64_t personalityAddrOff = 0;
125681ad6265SDimitry Andric   for (char c : aug) {
125781ad6265SDimitry Andric     switch (c) {
125881ad6265SDimitry Andric     case 'z':
125981ad6265SDimitry Andric       cie.fdesHaveAug = true;
126081ad6265SDimitry Andric       break;
126181ad6265SDimitry Andric     case 'P': {
126281ad6265SDimitry Andric       uint8_t personalityEnc = reader.readByte(&off);
126381ad6265SDimitry Andric       if (personalityEnc != expectedPersonalityEnc)
126481ad6265SDimitry Andric         reader.failOn(off, "unexpected personality encoding 0x" +
126581ad6265SDimitry Andric                                Twine::utohexstr(personalityEnc));
126681ad6265SDimitry Andric       personalityAddrOff = off;
126781ad6265SDimitry Andric       off += 4;
126881ad6265SDimitry Andric       break;
126981ad6265SDimitry Andric     }
127081ad6265SDimitry Andric     case 'L': {
127181ad6265SDimitry Andric       uint8_t lsdaEnc = reader.readByte(&off);
127261cfbce3SDimitry Andric       cie.lsdaPtrSize = pointerEncodingToSize(lsdaEnc);
127361cfbce3SDimitry Andric       if (cie.lsdaPtrSize == 0)
127481ad6265SDimitry Andric         reader.failOn(off, "unexpected LSDA encoding 0x" +
127581ad6265SDimitry Andric                                Twine::utohexstr(lsdaEnc));
127681ad6265SDimitry Andric       break;
127781ad6265SDimitry Andric     }
127881ad6265SDimitry Andric     case 'R': {
127981ad6265SDimitry Andric       uint8_t pointerEnc = reader.readByte(&off);
128061cfbce3SDimitry Andric       cie.funcPtrSize = pointerEncodingToSize(pointerEnc);
128161cfbce3SDimitry Andric       if (cie.funcPtrSize == 0 || !(pointerEnc & dwarf::DW_EH_PE_pcrel))
128281ad6265SDimitry Andric         reader.failOn(off, "unexpected pointer encoding 0x" +
128381ad6265SDimitry Andric                                Twine::utohexstr(pointerEnc));
128481ad6265SDimitry Andric       break;
128581ad6265SDimitry Andric     }
128681ad6265SDimitry Andric     default:
128781ad6265SDimitry Andric       break;
128881ad6265SDimitry Andric     }
128981ad6265SDimitry Andric   }
129081ad6265SDimitry Andric   if (personalityAddrOff != 0) {
129106c3fb27SDimitry Andric     const auto *personalityReloc = isec->getRelocAt(personalityAddrOff);
129206c3fb27SDimitry Andric     if (!personalityReloc)
129381ad6265SDimitry Andric       reader.failOn(off, "Failed to locate relocation for personality symbol");
129406c3fb27SDimitry Andric     cie.personalitySymbol = personalityReloc->referent.get<macho::Symbol *>();
129581ad6265SDimitry Andric   }
129681ad6265SDimitry Andric   return cie;
129781ad6265SDimitry Andric }
129881ad6265SDimitry Andric 
129981ad6265SDimitry Andric // EH frame target addresses may be encoded as pcrel offsets. However, instead
130081ad6265SDimitry Andric // of using an actual pcrel reloc, ld64 emits subtractor relocations instead.
130181ad6265SDimitry Andric // This function recovers the target address from the subtractors, essentially
130281ad6265SDimitry Andric // performing the inverse operation of EhRelocator.
130381ad6265SDimitry Andric //
130481ad6265SDimitry Andric // Concretely, we expect our relocations to write the value of `PC -
130581ad6265SDimitry Andric // target_addr` to `PC`. `PC` itself is denoted by a minuend relocation that
130681ad6265SDimitry Andric // points to a symbol plus an addend.
130781ad6265SDimitry Andric //
130881ad6265SDimitry Andric // It is important that the minuend relocation point to a symbol within the
130981ad6265SDimitry Andric // same section as the fixup value, since sections may get moved around.
131081ad6265SDimitry Andric //
131181ad6265SDimitry Andric // For example, for arm64, llvm-mc emits relocations for the target function
131281ad6265SDimitry Andric // address like so:
131381ad6265SDimitry Andric //
131481ad6265SDimitry Andric //   ltmp:
131581ad6265SDimitry Andric //     <CIE start>
131681ad6265SDimitry Andric //     ...
131781ad6265SDimitry Andric //     <CIE end>
131881ad6265SDimitry Andric //     ... multiple FDEs ...
131981ad6265SDimitry Andric //     <FDE start>
132081ad6265SDimitry Andric //     <target function address - (ltmp + pcrel offset)>
132181ad6265SDimitry Andric //     ...
132281ad6265SDimitry Andric //
132381ad6265SDimitry Andric // If any of the FDEs in `multiple FDEs` get dead-stripped, then `FDE start`
132481ad6265SDimitry Andric // will move to an earlier address, and `ltmp + pcrel offset` will no longer
132581ad6265SDimitry Andric // reflect an accurate pcrel value. To avoid this problem, we "canonicalize"
132681ad6265SDimitry Andric // our relocation by adding an `EH_Frame` symbol at `FDE start`, and updating
132781ad6265SDimitry Andric // the reloc to be `target function address - (EH_Frame + new pcrel offset)`.
132881ad6265SDimitry Andric //
132981ad6265SDimitry Andric // If `Invert` is set, then we instead expect `target_addr - PC` to be written
133081ad6265SDimitry Andric // to `PC`.
133181ad6265SDimitry Andric template <bool Invert = false>
133281ad6265SDimitry Andric Defined *
targetSymFromCanonicalSubtractor(const InputSection * isec,std::vector<macho::Reloc>::iterator relocIt)133381ad6265SDimitry Andric targetSymFromCanonicalSubtractor(const InputSection *isec,
133481ad6265SDimitry Andric                                  std::vector<macho::Reloc>::iterator relocIt) {
133581ad6265SDimitry Andric   macho::Reloc &subtrahend = *relocIt;
133681ad6265SDimitry Andric   macho::Reloc &minuend = *std::next(relocIt);
133781ad6265SDimitry Andric   assert(target->hasAttr(subtrahend.type, RelocAttrBits::SUBTRAHEND));
133881ad6265SDimitry Andric   assert(target->hasAttr(minuend.type, RelocAttrBits::UNSIGNED));
133981ad6265SDimitry Andric   // Note: pcSym may *not* be exactly at the PC; there's usually a non-zero
134081ad6265SDimitry Andric   // addend.
134181ad6265SDimitry Andric   auto *pcSym = cast<Defined>(subtrahend.referent.get<macho::Symbol *>());
134281ad6265SDimitry Andric   Defined *target =
134381ad6265SDimitry Andric       cast_or_null<Defined>(minuend.referent.dyn_cast<macho::Symbol *>());
134481ad6265SDimitry Andric   if (!pcSym) {
134581ad6265SDimitry Andric     auto *targetIsec =
134681ad6265SDimitry Andric         cast<ConcatInputSection>(minuend.referent.get<InputSection *>());
134781ad6265SDimitry Andric     target = findSymbolAtOffset(targetIsec, minuend.addend);
134881ad6265SDimitry Andric   }
134981ad6265SDimitry Andric   if (Invert)
135081ad6265SDimitry Andric     std::swap(pcSym, target);
135181ad6265SDimitry Andric   if (pcSym->isec == isec) {
135281ad6265SDimitry Andric     if (pcSym->value - (Invert ? -1 : 1) * minuend.addend != subtrahend.offset)
135381ad6265SDimitry Andric       fatal("invalid FDE relocation in __eh_frame");
135481ad6265SDimitry Andric   } else {
135581ad6265SDimitry Andric     // Ensure the pcReloc points to a symbol within the current EH frame.
135681ad6265SDimitry Andric     // HACK: we should really verify that the original relocation's semantics
135781ad6265SDimitry Andric     // are preserved. In particular, we should have
135881ad6265SDimitry Andric     // `oldSym->value + oldOffset == newSym + newOffset`. However, we don't
135981ad6265SDimitry Andric     // have an easy way to access the offsets from this point in the code; some
136081ad6265SDimitry Andric     // refactoring is needed for that.
136181ad6265SDimitry Andric     macho::Reloc &pcReloc = Invert ? minuend : subtrahend;
136281ad6265SDimitry Andric     pcReloc.referent = isec->symbols[0];
136381ad6265SDimitry Andric     assert(isec->symbols[0]->value == 0);
136481ad6265SDimitry Andric     minuend.addend = pcReloc.offset * (Invert ? 1LL : -1LL);
136581ad6265SDimitry Andric   }
136681ad6265SDimitry Andric   return target;
136781ad6265SDimitry Andric }
136881ad6265SDimitry Andric 
findSymbolAtAddress(const std::vector<Section * > & sections,uint64_t addr)136981ad6265SDimitry Andric Defined *findSymbolAtAddress(const std::vector<Section *> &sections,
137081ad6265SDimitry Andric                              uint64_t addr) {
137181ad6265SDimitry Andric   Section *sec = findContainingSection(sections, &addr);
137281ad6265SDimitry Andric   auto *isec = cast<ConcatInputSection>(findContainingSubsection(*sec, &addr));
137381ad6265SDimitry Andric   return findSymbolAtOffset(isec, addr);
137481ad6265SDimitry Andric }
137581ad6265SDimitry Andric 
137681ad6265SDimitry Andric // For symbols that don't have compact unwind info, associate them with the more
137781ad6265SDimitry Andric // general-purpose (and verbose) DWARF unwind info found in __eh_frame.
137881ad6265SDimitry Andric //
137981ad6265SDimitry Andric // This requires us to parse the contents of __eh_frame. See EhFrame.h for a
138081ad6265SDimitry Andric // description of its format.
138181ad6265SDimitry Andric //
138281ad6265SDimitry Andric // While parsing, we also look for what MC calls "abs-ified" relocations -- they
138381ad6265SDimitry Andric // are relocations which are implicitly encoded as offsets in the section data.
138481ad6265SDimitry Andric // We convert them into explicit Reloc structs so that the EH frames can be
138581ad6265SDimitry Andric // handled just like a regular ConcatInputSection later in our output phase.
138681ad6265SDimitry Andric //
138781ad6265SDimitry Andric // We also need to handle the case where our input object file has explicit
138881ad6265SDimitry Andric // relocations. This is the case when e.g. it's the output of `ld -r`. We only
138981ad6265SDimitry Andric // look for the "abs-ified" relocation if an explicit relocation is absent.
registerEhFrames(Section & ehFrameSection)139081ad6265SDimitry Andric void ObjFile::registerEhFrames(Section &ehFrameSection) {
139181ad6265SDimitry Andric   DenseMap<const InputSection *, CIE> cieMap;
139281ad6265SDimitry Andric   for (const Subsection &subsec : ehFrameSection.subsections) {
139381ad6265SDimitry Andric     auto *isec = cast<ConcatInputSection>(subsec.isec);
139481ad6265SDimitry Andric     uint64_t isecOff = subsec.offset;
139581ad6265SDimitry Andric 
139681ad6265SDimitry Andric     // Subtractor relocs require the subtrahend to be a symbol reloc. Ensure
139781ad6265SDimitry Andric     // that all EH frames have an associated symbol so that we can generate
139881ad6265SDimitry Andric     // subtractor relocs that reference them.
139981ad6265SDimitry Andric     if (isec->symbols.size() == 0)
1400bdd1243dSDimitry Andric       make<Defined>("EH_Frame", isec->getFile(), isec, /*value=*/0,
1401bdd1243dSDimitry Andric                     isec->getSize(), /*isWeakDef=*/false, /*isExternal=*/false,
1402bdd1243dSDimitry Andric                     /*isPrivateExtern=*/false, /*includeInSymtab=*/false,
140306c3fb27SDimitry Andric                     /*isReferencedDynamically=*/false,
1404bdd1243dSDimitry Andric                     /*noDeadStrip=*/false);
140581ad6265SDimitry Andric     else if (isec->symbols[0]->value != 0)
140681ad6265SDimitry Andric       fatal("found symbol at unexpected offset in __eh_frame");
140781ad6265SDimitry Andric 
140861cfbce3SDimitry Andric     EhReader reader(this, isec->data, subsec.offset);
140981ad6265SDimitry Andric     size_t dataOff = 0; // Offset from the start of the EH frame.
141081ad6265SDimitry Andric     reader.skipValidLength(&dataOff); // readLength() already validated this.
141181ad6265SDimitry Andric     // cieOffOff is the offset from the start of the EH frame to the cieOff
141281ad6265SDimitry Andric     // value, which is itself an offset from the current PC to a CIE.
141381ad6265SDimitry Andric     const size_t cieOffOff = dataOff;
141481ad6265SDimitry Andric 
141581ad6265SDimitry Andric     EhRelocator ehRelocator(isec);
141681ad6265SDimitry Andric     auto cieOffRelocIt = llvm::find_if(
141781ad6265SDimitry Andric         isec->relocs, [=](const Reloc &r) { return r.offset == cieOffOff; });
141881ad6265SDimitry Andric     InputSection *cieIsec = nullptr;
141981ad6265SDimitry Andric     if (cieOffRelocIt != isec->relocs.end()) {
142081ad6265SDimitry Andric       // We already have an explicit relocation for the CIE offset.
142181ad6265SDimitry Andric       cieIsec =
142281ad6265SDimitry Andric           targetSymFromCanonicalSubtractor</*Invert=*/true>(isec, cieOffRelocIt)
142381ad6265SDimitry Andric               ->isec;
142481ad6265SDimitry Andric       dataOff += sizeof(uint32_t);
142581ad6265SDimitry Andric     } else {
142681ad6265SDimitry Andric       // If we haven't found a relocation, then the CIE offset is most likely
142781ad6265SDimitry Andric       // embedded in the section data (AKA an "abs-ified" reloc.). Parse that
142881ad6265SDimitry Andric       // and generate a Reloc struct.
142981ad6265SDimitry Andric       uint32_t cieMinuend = reader.readU32(&dataOff);
1430bdd1243dSDimitry Andric       if (cieMinuend == 0) {
143181ad6265SDimitry Andric         cieIsec = isec;
1432bdd1243dSDimitry Andric       } else {
143381ad6265SDimitry Andric         uint32_t cieOff = isecOff + dataOff - cieMinuend;
143481ad6265SDimitry Andric         cieIsec = findContainingSubsection(ehFrameSection, &cieOff);
143581ad6265SDimitry Andric         if (cieIsec == nullptr)
143681ad6265SDimitry Andric           fatal("failed to find CIE");
143781ad6265SDimitry Andric       }
143881ad6265SDimitry Andric       if (cieIsec != isec)
143981ad6265SDimitry Andric         ehRelocator.makeNegativePcRel(cieOffOff, cieIsec->symbols[0],
144081ad6265SDimitry Andric                                       /*length=*/2);
144181ad6265SDimitry Andric     }
144281ad6265SDimitry Andric     if (cieIsec == isec) {
144381ad6265SDimitry Andric       cieMap[cieIsec] = parseCIE(isec, reader, dataOff);
144481ad6265SDimitry Andric       continue;
144581ad6265SDimitry Andric     }
144681ad6265SDimitry Andric 
144781ad6265SDimitry Andric     assert(cieMap.count(cieIsec));
144881ad6265SDimitry Andric     const CIE &cie = cieMap[cieIsec];
144961cfbce3SDimitry Andric     // Offset of the function address within the EH frame.
145061cfbce3SDimitry Andric     const size_t funcAddrOff = dataOff;
145161cfbce3SDimitry Andric     uint64_t funcAddr = reader.readPointer(&dataOff, cie.funcPtrSize) +
145261cfbce3SDimitry Andric                         ehFrameSection.addr + isecOff + funcAddrOff;
145361cfbce3SDimitry Andric     uint32_t funcLength = reader.readPointer(&dataOff, cie.funcPtrSize);
145461cfbce3SDimitry Andric     size_t lsdaAddrOff = 0; // Offset of the LSDA address within the EH frame.
1455bdd1243dSDimitry Andric     std::optional<uint64_t> lsdaAddrOpt;
145681ad6265SDimitry Andric     if (cie.fdesHaveAug) {
145781ad6265SDimitry Andric       reader.skipLeb128(&dataOff);
145881ad6265SDimitry Andric       lsdaAddrOff = dataOff;
145961cfbce3SDimitry Andric       if (cie.lsdaPtrSize != 0) {
146061cfbce3SDimitry Andric         uint64_t lsdaOff = reader.readPointer(&dataOff, cie.lsdaPtrSize);
146181ad6265SDimitry Andric         if (lsdaOff != 0) // FIXME possible to test this?
146281ad6265SDimitry Andric           lsdaAddrOpt = ehFrameSection.addr + isecOff + lsdaAddrOff + lsdaOff;
146381ad6265SDimitry Andric       }
146481ad6265SDimitry Andric     }
146581ad6265SDimitry Andric 
146681ad6265SDimitry Andric     auto funcAddrRelocIt = isec->relocs.end();
146781ad6265SDimitry Andric     auto lsdaAddrRelocIt = isec->relocs.end();
146881ad6265SDimitry Andric     for (auto it = isec->relocs.begin(); it != isec->relocs.end(); ++it) {
146981ad6265SDimitry Andric       if (it->offset == funcAddrOff)
147081ad6265SDimitry Andric         funcAddrRelocIt = it++; // Found subtrahend; skip over minuend reloc
147181ad6265SDimitry Andric       else if (lsdaAddrOpt && it->offset == lsdaAddrOff)
147281ad6265SDimitry Andric         lsdaAddrRelocIt = it++; // Found subtrahend; skip over minuend reloc
147381ad6265SDimitry Andric     }
147481ad6265SDimitry Andric 
147581ad6265SDimitry Andric     Defined *funcSym;
147681ad6265SDimitry Andric     if (funcAddrRelocIt != isec->relocs.end()) {
147781ad6265SDimitry Andric       funcSym = targetSymFromCanonicalSubtractor(isec, funcAddrRelocIt);
1478fcaf7f86SDimitry Andric       // Canonicalize the symbol. If there are multiple symbols at the same
1479fcaf7f86SDimitry Andric       // address, we want both `registerEhFrame` and `registerCompactUnwind`
1480fcaf7f86SDimitry Andric       // to register the unwind entry under same symbol.
1481fcaf7f86SDimitry Andric       // This is not particularly efficient, but we should run into this case
1482fcaf7f86SDimitry Andric       // infrequently (only when handling the output of `ld -r`).
1483fcaf7f86SDimitry Andric       if (funcSym->isec)
1484fcaf7f86SDimitry Andric         funcSym = findSymbolAtOffset(cast<ConcatInputSection>(funcSym->isec),
1485fcaf7f86SDimitry Andric                                      funcSym->value);
148681ad6265SDimitry Andric     } else {
148781ad6265SDimitry Andric       funcSym = findSymbolAtAddress(sections, funcAddr);
148881ad6265SDimitry Andric       ehRelocator.makePcRel(funcAddrOff, funcSym, target->p2WordSize);
148981ad6265SDimitry Andric     }
149081ad6265SDimitry Andric     // The symbol has been coalesced, or already has a compact unwind entry.
149181ad6265SDimitry Andric     if (!funcSym || funcSym->getFile() != this || funcSym->unwindEntry) {
149281ad6265SDimitry Andric       // We must prune unused FDEs for correctness, so we cannot rely on
149381ad6265SDimitry Andric       // -dead_strip being enabled.
149481ad6265SDimitry Andric       isec->live = false;
149581ad6265SDimitry Andric       continue;
149681ad6265SDimitry Andric     }
149781ad6265SDimitry Andric 
149881ad6265SDimitry Andric     InputSection *lsdaIsec = nullptr;
149981ad6265SDimitry Andric     if (lsdaAddrRelocIt != isec->relocs.end()) {
150081ad6265SDimitry Andric       lsdaIsec = targetSymFromCanonicalSubtractor(isec, lsdaAddrRelocIt)->isec;
150181ad6265SDimitry Andric     } else if (lsdaAddrOpt) {
150281ad6265SDimitry Andric       uint64_t lsdaAddr = *lsdaAddrOpt;
150381ad6265SDimitry Andric       Section *sec = findContainingSection(sections, &lsdaAddr);
150481ad6265SDimitry Andric       lsdaIsec =
150581ad6265SDimitry Andric           cast<ConcatInputSection>(findContainingSubsection(*sec, &lsdaAddr));
150681ad6265SDimitry Andric       ehRelocator.makePcRel(lsdaAddrOff, lsdaIsec, target->p2WordSize);
150781ad6265SDimitry Andric     }
150881ad6265SDimitry Andric 
150981ad6265SDimitry Andric     fdes[isec] = {funcLength, cie.personalitySymbol, lsdaIsec};
151081ad6265SDimitry Andric     funcSym->unwindEntry = isec;
151181ad6265SDimitry Andric     ehRelocator.commit();
151281ad6265SDimitry Andric   }
15136246ae0bSDimitry Andric 
15146246ae0bSDimitry Andric   // __eh_frame is marked as S_ATTR_LIVE_SUPPORT in input files, because FDEs
15156246ae0bSDimitry Andric   // are normally required to be kept alive if they reference a live symbol.
15166246ae0bSDimitry Andric   // However, we've explicitly created a dependency from a symbol to its FDE, so
15176246ae0bSDimitry Andric   // dead-stripping will just work as usual, and S_ATTR_LIVE_SUPPORT will only
15186246ae0bSDimitry Andric   // serve to incorrectly prevent us from dead-stripping duplicate FDEs for a
15196246ae0bSDimitry Andric   // live symbol (e.g. if there were multiple weak copies). Remove this flag to
15206246ae0bSDimitry Andric   // let dead-stripping proceed correctly.
15216246ae0bSDimitry Andric   ehFrameSection.flags &= ~S_ATTR_LIVE_SUPPORT;
152281ad6265SDimitry Andric }
152381ad6265SDimitry Andric 
sourceFile() const152481ad6265SDimitry Andric std::string ObjFile::sourceFile() const {
1525*5f757f3fSDimitry Andric   const char *unitName = compileUnit->getUnitDIE().getShortName();
1526*5f757f3fSDimitry Andric   // DWARF allows DW_AT_name to be absolute, in which case nothing should be
1527*5f757f3fSDimitry Andric   // prepended. As for the styles, debug info can contain paths from any OS, not
1528*5f757f3fSDimitry Andric   // necessarily an OS we're currently running on. Moreover different
1529*5f757f3fSDimitry Andric   // compilation units can be compiled on different operating systems and linked
1530*5f757f3fSDimitry Andric   // together later.
1531*5f757f3fSDimitry Andric   if (sys::path::is_absolute(unitName, llvm::sys::path::Style::posix) ||
1532*5f757f3fSDimitry Andric       sys::path::is_absolute(unitName, llvm::sys::path::Style::windows))
1533*5f757f3fSDimitry Andric     return unitName;
153481ad6265SDimitry Andric   SmallString<261> dir(compileUnit->getCompilationDir());
153581ad6265SDimitry Andric   StringRef sep = sys::path::get_separator();
153681ad6265SDimitry Andric   // We don't use `path::append` here because we want an empty `dir` to result
153781ad6265SDimitry Andric   // in an absolute path. `append` would give us a relative path for that case.
1538*5f757f3fSDimitry Andric   if (!dir.ends_with(sep))
153981ad6265SDimitry Andric     dir += sep;
1540*5f757f3fSDimitry Andric   return (dir + unitName).str();
154181ad6265SDimitry Andric }
154281ad6265SDimitry Andric 
getDwarf()154381ad6265SDimitry Andric lld::DWARFCache *ObjFile::getDwarf() {
154481ad6265SDimitry Andric   llvm::call_once(initDwarf, [this]() {
154581ad6265SDimitry Andric     auto dwObj = DwarfObject::create(this);
154681ad6265SDimitry Andric     if (!dwObj)
154781ad6265SDimitry Andric       return;
154881ad6265SDimitry Andric     dwarfCache = std::make_unique<DWARFCache>(std::make_unique<DWARFContext>(
154981ad6265SDimitry Andric         std::move(dwObj), "",
155081ad6265SDimitry Andric         [&](Error err) { warn(getName() + ": " + toString(std::move(err))); },
155181ad6265SDimitry Andric         [&](Error warning) {
155281ad6265SDimitry Andric           warn(getName() + ": " + toString(std::move(warning)));
155381ad6265SDimitry Andric         }));
155481ad6265SDimitry Andric   });
155581ad6265SDimitry Andric 
155681ad6265SDimitry Andric   return dwarfCache.get();
155781ad6265SDimitry Andric }
1558e8d8bef9SDimitry Andric // The path can point to either a dylib or a .tbd file.
loadDylib(StringRef path,DylibFile * umbrella)1559fe6060f1SDimitry Andric static DylibFile *loadDylib(StringRef path, DylibFile *umbrella) {
1560bdd1243dSDimitry Andric   std::optional<MemoryBufferRef> mbref = readFile(path);
1561e8d8bef9SDimitry Andric   if (!mbref) {
1562e8d8bef9SDimitry Andric     error("could not read dylib file at " + path);
1563fe6060f1SDimitry Andric     return nullptr;
1564e8d8bef9SDimitry Andric   }
1565e8d8bef9SDimitry Andric   return loadDylib(*mbref, umbrella);
1566e8d8bef9SDimitry Andric }
1567e8d8bef9SDimitry Andric 
1568e8d8bef9SDimitry Andric // TBD files are parsed into a series of TAPI documents (InterfaceFiles), with
1569e8d8bef9SDimitry Andric // the first document storing child pointers to the rest of them. When we are
1570fe6060f1SDimitry Andric // processing a given TBD file, we store that top-level document in
1571fe6060f1SDimitry Andric // currentTopLevelTapi. When processing re-exports, we search its children for
1572fe6060f1SDimitry Andric // potentially matching documents in the same TBD file. Note that the children
1573fe6060f1SDimitry Andric // themselves don't point to further documents, i.e. this is a two-level tree.
1574e8d8bef9SDimitry Andric //
1575e8d8bef9SDimitry Andric // Re-exports can either refer to on-disk files, or to documents within .tbd
1576e8d8bef9SDimitry Andric // files.
findDylib(StringRef path,DylibFile * umbrella,const InterfaceFile * currentTopLevelTapi)1577fe6060f1SDimitry Andric static DylibFile *findDylib(StringRef path, DylibFile *umbrella,
1578fe6060f1SDimitry Andric                             const InterfaceFile *currentTopLevelTapi) {
1579fe6060f1SDimitry Andric   // Search order:
1580fe6060f1SDimitry Andric   // 1. Install name basename in -F / -L directories.
1581fe6060f1SDimitry Andric   {
1582fe6060f1SDimitry Andric     StringRef stem = path::stem(path);
1583fe6060f1SDimitry Andric     SmallString<128> frameworkName;
1584fe6060f1SDimitry Andric     path::append(frameworkName, path::Style::posix, stem + ".framework", stem);
158506c3fb27SDimitry Andric     bool isFramework = path.ends_with(frameworkName);
1586fe6060f1SDimitry Andric     if (isFramework) {
1587fe6060f1SDimitry Andric       for (StringRef dir : config->frameworkSearchPaths) {
1588fe6060f1SDimitry Andric         SmallString<128> candidate = dir;
1589fe6060f1SDimitry Andric         path::append(candidate, frameworkName);
1590bdd1243dSDimitry Andric         if (std::optional<StringRef> dylibPath =
1591bdd1243dSDimitry Andric                 resolveDylibPath(candidate.str()))
1592fe6060f1SDimitry Andric           return loadDylib(*dylibPath, umbrella);
1593fe6060f1SDimitry Andric       }
1594bdd1243dSDimitry Andric     } else if (std::optional<StringRef> dylibPath = findPathCombination(
159506c3fb27SDimitry Andric                    stem, config->librarySearchPaths, {".tbd", ".dylib", ".so"}))
1596fe6060f1SDimitry Andric       return loadDylib(*dylibPath, umbrella);
1597fe6060f1SDimitry Andric   }
1598fe6060f1SDimitry Andric 
1599fe6060f1SDimitry Andric   // 2. As absolute path.
1600e8d8bef9SDimitry Andric   if (path::is_absolute(path, path::Style::posix))
1601e8d8bef9SDimitry Andric     for (StringRef root : config->systemLibraryRoots)
1602bdd1243dSDimitry Andric       if (std::optional<StringRef> dylibPath =
1603bdd1243dSDimitry Andric               resolveDylibPath((root + path).str()))
1604e8d8bef9SDimitry Andric         return loadDylib(*dylibPath, umbrella);
1605e8d8bef9SDimitry Andric 
1606fe6060f1SDimitry Andric   // 3. As relative path.
1607e8d8bef9SDimitry Andric 
1608fe6060f1SDimitry Andric   // TODO: Handle -dylib_file
1609fe6060f1SDimitry Andric 
1610fe6060f1SDimitry Andric   // Replace @executable_path, @loader_path, @rpath prefixes in install name.
1611fe6060f1SDimitry Andric   SmallString<128> newPath;
1612fe6060f1SDimitry Andric   if (config->outputType == MH_EXECUTE &&
1613fe6060f1SDimitry Andric       path.consume_front("@executable_path/")) {
1614fe6060f1SDimitry Andric     // ld64 allows overriding this with the undocumented flag -executable_path.
1615fe6060f1SDimitry Andric     // lld doesn't currently implement that flag.
1616fe6060f1SDimitry Andric     // FIXME: Consider using finalOutput instead of outputFile.
1617fe6060f1SDimitry Andric     path::append(newPath, path::parent_path(config->outputFile), path);
1618fe6060f1SDimitry Andric     path = newPath;
1619fe6060f1SDimitry Andric   } else if (path.consume_front("@loader_path/")) {
1620fe6060f1SDimitry Andric     fs::real_path(umbrella->getName(), newPath);
1621fe6060f1SDimitry Andric     path::remove_filename(newPath);
1622fe6060f1SDimitry Andric     path::append(newPath, path);
1623fe6060f1SDimitry Andric     path = newPath;
162406c3fb27SDimitry Andric   } else if (path.starts_with("@rpath/")) {
1625fe6060f1SDimitry Andric     for (StringRef rpath : umbrella->rpaths) {
1626fe6060f1SDimitry Andric       newPath.clear();
1627fe6060f1SDimitry Andric       if (rpath.consume_front("@loader_path/")) {
1628fe6060f1SDimitry Andric         fs::real_path(umbrella->getName(), newPath);
1629fe6060f1SDimitry Andric         path::remove_filename(newPath);
1630fe6060f1SDimitry Andric       }
1631fe6060f1SDimitry Andric       path::append(newPath, rpath, path.drop_front(strlen("@rpath/")));
1632bdd1243dSDimitry Andric       if (std::optional<StringRef> dylibPath = resolveDylibPath(newPath.str()))
1633fe6060f1SDimitry Andric         return loadDylib(*dylibPath, umbrella);
1634fe6060f1SDimitry Andric     }
1635fe6060f1SDimitry Andric   }
1636fe6060f1SDimitry Andric 
1637fe6060f1SDimitry Andric   // FIXME: Should this be further up?
1638e8d8bef9SDimitry Andric   if (currentTopLevelTapi) {
1639e8d8bef9SDimitry Andric     for (InterfaceFile &child :
1640e8d8bef9SDimitry Andric          make_pointee_range(currentTopLevelTapi->documents())) {
1641e8d8bef9SDimitry Andric       assert(child.documents().empty());
1642fe6060f1SDimitry Andric       if (path == child.getInstallName()) {
164306c3fb27SDimitry Andric         auto *file = make<DylibFile>(child, umbrella, /*isBundleLoader=*/false,
164481ad6265SDimitry Andric                                      /*explicitlyLinked=*/false);
1645fe6060f1SDimitry Andric         file->parseReexports(child);
1646fe6060f1SDimitry Andric         return file;
1647fe6060f1SDimitry Andric       }
1648e8d8bef9SDimitry Andric     }
1649e8d8bef9SDimitry Andric   }
1650e8d8bef9SDimitry Andric 
1651bdd1243dSDimitry Andric   if (std::optional<StringRef> dylibPath = resolveDylibPath(path))
1652e8d8bef9SDimitry Andric     return loadDylib(*dylibPath, umbrella);
1653e8d8bef9SDimitry Andric 
1654fe6060f1SDimitry Andric   return nullptr;
1655e8d8bef9SDimitry Andric }
1656e8d8bef9SDimitry Andric 
1657e8d8bef9SDimitry Andric // If a re-exported dylib is public (lives in /usr/lib or
1658e8d8bef9SDimitry Andric // /System/Library/Frameworks), then it is considered implicitly linked: we
1659e8d8bef9SDimitry Andric // should bind to its symbols directly instead of via the re-exporting umbrella
1660e8d8bef9SDimitry Andric // library.
isImplicitlyLinked(StringRef path)1661e8d8bef9SDimitry Andric static bool isImplicitlyLinked(StringRef path) {
1662e8d8bef9SDimitry Andric   if (!config->implicitDylibs)
1663e8d8bef9SDimitry Andric     return false;
1664e8d8bef9SDimitry Andric 
1665e8d8bef9SDimitry Andric   if (path::parent_path(path) == "/usr/lib")
1666e8d8bef9SDimitry Andric     return true;
1667e8d8bef9SDimitry Andric 
1668e8d8bef9SDimitry Andric   // Match /System/Library/Frameworks/$FOO.framework/**/$FOO
1669e8d8bef9SDimitry Andric   if (path.consume_front("/System/Library/Frameworks/")) {
1670e8d8bef9SDimitry Andric     StringRef frameworkName = path.take_until([](char c) { return c == '.'; });
1671e8d8bef9SDimitry Andric     return path::filename(path) == frameworkName;
1672e8d8bef9SDimitry Andric   }
1673e8d8bef9SDimitry Andric 
1674e8d8bef9SDimitry Andric   return false;
1675e8d8bef9SDimitry Andric }
1676e8d8bef9SDimitry Andric 
loadReexport(StringRef path,DylibFile * umbrella,const InterfaceFile * currentTopLevelTapi)1677bdd1243dSDimitry Andric void DylibFile::loadReexport(StringRef path, DylibFile *umbrella,
1678fe6060f1SDimitry Andric                          const InterfaceFile *currentTopLevelTapi) {
1679fe6060f1SDimitry Andric   DylibFile *reexport = findDylib(path, umbrella, currentTopLevelTapi);
1680fe6060f1SDimitry Andric   if (!reexport)
1681bdd1243dSDimitry Andric     error(toString(this) + ": unable to locate re-export with install name " +
1682bdd1243dSDimitry Andric           path);
16835ffd83dbSDimitry Andric }
16845ffd83dbSDimitry Andric 
DylibFile(MemoryBufferRef mb,DylibFile * umbrella,bool isBundleLoader,bool explicitlyLinked)1685fe6060f1SDimitry Andric DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella,
168681ad6265SDimitry Andric                      bool isBundleLoader, bool explicitlyLinked)
1687fe6060f1SDimitry Andric     : InputFile(DylibKind, mb), refState(RefState::Unreferenced),
168881ad6265SDimitry Andric       explicitlyLinked(explicitlyLinked), isBundleLoader(isBundleLoader) {
1689fe6060f1SDimitry Andric   assert(!isBundleLoader || !umbrella);
16905ffd83dbSDimitry Andric   if (umbrella == nullptr)
16915ffd83dbSDimitry Andric     umbrella = this;
1692fe6060f1SDimitry Andric   this->umbrella = umbrella;
16935ffd83dbSDimitry Andric 
1694fe6060f1SDimitry Andric   auto *hdr = reinterpret_cast<const mach_header *>(mb.getBufferStart());
16955ffd83dbSDimitry Andric 
1696fe6060f1SDimitry Andric   // Initialize installName.
16975ffd83dbSDimitry Andric   if (const load_command *cmd = findCommand(hdr, LC_ID_DYLIB)) {
16985ffd83dbSDimitry Andric     auto *c = reinterpret_cast<const dylib_command *>(cmd);
1699e8d8bef9SDimitry Andric     currentVersion = read32le(&c->dylib.current_version);
1700e8d8bef9SDimitry Andric     compatibilityVersion = read32le(&c->dylib.compatibility_version);
1701fe6060f1SDimitry Andric     installName =
1702fe6060f1SDimitry Andric         reinterpret_cast<const char *>(cmd) + read32le(&c->dylib.name);
1703fe6060f1SDimitry Andric   } else if (!isBundleLoader) {
1704fe6060f1SDimitry Andric     // macho_executable and macho_bundle don't have LC_ID_DYLIB,
1705fe6060f1SDimitry Andric     // so it's OK.
1706bdd1243dSDimitry Andric     error(toString(this) + ": dylib missing LC_ID_DYLIB load command");
17075ffd83dbSDimitry Andric     return;
17085ffd83dbSDimitry Andric   }
17095ffd83dbSDimitry Andric 
1710fe6060f1SDimitry Andric   if (config->printEachFile)
1711fe6060f1SDimitry Andric     message(toString(this));
1712fe6060f1SDimitry Andric   inputFiles.insert(this);
1713fe6060f1SDimitry Andric 
1714fe6060f1SDimitry Andric   deadStrippable = hdr->flags & MH_DEAD_STRIPPABLE_DYLIB;
1715fe6060f1SDimitry Andric 
1716fe6060f1SDimitry Andric   if (!checkCompatibility(this))
1717fe6060f1SDimitry Andric     return;
1718fe6060f1SDimitry Andric 
1719fe6060f1SDimitry Andric   checkAppExtensionSafety(hdr->flags & MH_APP_EXTENSION_SAFE);
1720fe6060f1SDimitry Andric 
1721fe6060f1SDimitry Andric   for (auto *cmd : findCommands<rpath_command>(hdr, LC_RPATH)) {
1722fe6060f1SDimitry Andric     StringRef rpath{reinterpret_cast<const char *>(cmd) + cmd->path};
1723fe6060f1SDimitry Andric     rpaths.push_back(rpath);
1724fe6060f1SDimitry Andric   }
1725fe6060f1SDimitry Andric 
17265ffd83dbSDimitry Andric   // Initialize symbols.
1727fe6060f1SDimitry Andric   exportingFile = isImplicitlyLinked(installName) ? this : this->umbrella;
1728753f127fSDimitry Andric 
1729753f127fSDimitry Andric   const auto *dyldInfo = findCommand<dyld_info_command>(hdr, LC_DYLD_INFO_ONLY);
1730753f127fSDimitry Andric   const auto *exportsTrie =
1731753f127fSDimitry Andric       findCommand<linkedit_data_command>(hdr, LC_DYLD_EXPORTS_TRIE);
1732753f127fSDimitry Andric   if (dyldInfo && exportsTrie) {
1733753f127fSDimitry Andric     // It's unclear what should happen in this case. Maybe we should only error
1734753f127fSDimitry Andric     // out if the two load commands refer to different data?
1735bdd1243dSDimitry Andric     error(toString(this) +
1736bdd1243dSDimitry Andric           ": dylib has both LC_DYLD_INFO_ONLY and LC_DYLD_EXPORTS_TRIE");
1737753f127fSDimitry Andric     return;
173806c3fb27SDimitry Andric   }
173906c3fb27SDimitry Andric 
174006c3fb27SDimitry Andric   if (dyldInfo) {
1741753f127fSDimitry Andric     parseExportedSymbols(dyldInfo->export_off, dyldInfo->export_size);
1742753f127fSDimitry Andric   } else if (exportsTrie) {
1743753f127fSDimitry Andric     parseExportedSymbols(exportsTrie->dataoff, exportsTrie->datasize);
1744753f127fSDimitry Andric   } else {
1745753f127fSDimitry Andric     error("No LC_DYLD_INFO_ONLY or LC_DYLD_EXPORTS_TRIE found in " +
1746753f127fSDimitry Andric           toString(this));
1747753f127fSDimitry Andric   }
1748753f127fSDimitry Andric }
1749753f127fSDimitry Andric 
parseExportedSymbols(uint32_t offset,uint32_t size)1750753f127fSDimitry Andric void DylibFile::parseExportedSymbols(uint32_t offset, uint32_t size) {
17510eae32dcSDimitry Andric   struct TrieEntry {
17520eae32dcSDimitry Andric     StringRef name;
17530eae32dcSDimitry Andric     uint64_t flags;
17540eae32dcSDimitry Andric   };
17550eae32dcSDimitry Andric 
1756753f127fSDimitry Andric   auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
17570eae32dcSDimitry Andric   std::vector<TrieEntry> entries;
17580eae32dcSDimitry Andric   // Find all the $ld$* symbols to process first.
1759753f127fSDimitry Andric   parseTrie(buf + offset, size, [&](const Twine &name, uint64_t flags) {
176004eeddc0SDimitry Andric     StringRef savedName = saver().save(name);
1761fe6060f1SDimitry Andric     if (handleLDSymbol(savedName))
1762fe6060f1SDimitry Andric       return;
17630eae32dcSDimitry Andric     entries.push_back({savedName, flags});
17645ffd83dbSDimitry Andric   });
17650eae32dcSDimitry Andric 
17660eae32dcSDimitry Andric   // Process the "normal" symbols.
17670eae32dcSDimitry Andric   for (TrieEntry &entry : entries) {
1768753f127fSDimitry Andric     if (exportingFile->hiddenSymbols.contains(CachedHashStringRef(entry.name)))
17690eae32dcSDimitry Andric       continue;
17700eae32dcSDimitry Andric 
17710eae32dcSDimitry Andric     bool isWeakDef = entry.flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION;
17720eae32dcSDimitry Andric     bool isTlv = entry.flags & EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL;
17730eae32dcSDimitry Andric 
17740eae32dcSDimitry Andric     symbols.push_back(
17750eae32dcSDimitry Andric         symtab->addDylib(entry.name, exportingFile, isWeakDef, isTlv));
17760eae32dcSDimitry Andric   }
1777fe6060f1SDimitry Andric }
17785ffd83dbSDimitry Andric 
parseLoadCommands(MemoryBufferRef mb)1779fe6060f1SDimitry Andric void DylibFile::parseLoadCommands(MemoryBufferRef mb) {
1780fe6060f1SDimitry Andric   auto *hdr = reinterpret_cast<const mach_header *>(mb.getBufferStart());
1781fe6060f1SDimitry Andric   const uint8_t *p = reinterpret_cast<const uint8_t *>(mb.getBufferStart()) +
1782fe6060f1SDimitry Andric                      target->headerSize;
17835ffd83dbSDimitry Andric   for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) {
17845ffd83dbSDimitry Andric     auto *cmd = reinterpret_cast<const load_command *>(p);
17855ffd83dbSDimitry Andric     p += cmd->cmdsize;
17865ffd83dbSDimitry Andric 
1787fe6060f1SDimitry Andric     if (!(hdr->flags & MH_NO_REEXPORTED_DYLIBS) &&
1788fe6060f1SDimitry Andric         cmd->cmd == LC_REEXPORT_DYLIB) {
1789fe6060f1SDimitry Andric       const auto *c = reinterpret_cast<const dylib_command *>(cmd);
17905ffd83dbSDimitry Andric       StringRef reexportPath =
17915ffd83dbSDimitry Andric           reinterpret_cast<const char *>(c) + read32le(&c->dylib.name);
1792fe6060f1SDimitry Andric       loadReexport(reexportPath, exportingFile, nullptr);
1793fe6060f1SDimitry Andric     }
1794fe6060f1SDimitry Andric 
1795fe6060f1SDimitry Andric     // FIXME: What about LC_LOAD_UPWARD_DYLIB, LC_LAZY_LOAD_DYLIB,
1796fe6060f1SDimitry Andric     // LC_LOAD_WEAK_DYLIB, LC_REEXPORT_DYLIB (..are reexports from dylibs with
1797fe6060f1SDimitry Andric     // MH_NO_REEXPORTED_DYLIBS loaded for -flat_namespace)?
1798fe6060f1SDimitry Andric     if (config->namespaceKind == NamespaceKind::flat &&
1799fe6060f1SDimitry Andric         cmd->cmd == LC_LOAD_DYLIB) {
1800fe6060f1SDimitry Andric       const auto *c = reinterpret_cast<const dylib_command *>(cmd);
1801fe6060f1SDimitry Andric       StringRef dylibPath =
1802fe6060f1SDimitry Andric           reinterpret_cast<const char *>(c) + read32le(&c->dylib.name);
1803fe6060f1SDimitry Andric       DylibFile *dylib = findDylib(dylibPath, umbrella, nullptr);
1804fe6060f1SDimitry Andric       if (!dylib)
1805fe6060f1SDimitry Andric         error(Twine("unable to locate library '") + dylibPath +
1806fe6060f1SDimitry Andric               "' loaded from '" + toString(this) + "' for -flat_namespace");
1807fe6060f1SDimitry Andric     }
18085ffd83dbSDimitry Andric   }
18095ffd83dbSDimitry Andric }
18105ffd83dbSDimitry Andric 
181181ad6265SDimitry Andric // Some versions of Xcode ship with .tbd files that don't have the right
1812fe6060f1SDimitry Andric // platform settings.
181381ad6265SDimitry Andric constexpr std::array<StringRef, 3> skipPlatformChecks{
1814fe6060f1SDimitry Andric     "/usr/lib/system/libsystem_kernel.dylib",
1815fe6060f1SDimitry Andric     "/usr/lib/system/libsystem_platform.dylib",
1816fe6060f1SDimitry Andric     "/usr/lib/system/libsystem_pthread.dylib"};
1817fe6060f1SDimitry Andric 
skipPlatformCheckForCatalyst(const InterfaceFile & interface,bool explicitlyLinked)181881ad6265SDimitry Andric static bool skipPlatformCheckForCatalyst(const InterfaceFile &interface,
181981ad6265SDimitry Andric                                          bool explicitlyLinked) {
182081ad6265SDimitry Andric   // Catalyst outputs can link against implicitly linked macOS-only libraries.
182181ad6265SDimitry Andric   if (config->platform() != PLATFORM_MACCATALYST || explicitlyLinked)
182281ad6265SDimitry Andric     return false;
182381ad6265SDimitry Andric   return is_contained(interface.targets(),
182481ad6265SDimitry Andric                       MachO::Target(config->arch(), PLATFORM_MACOS));
182581ad6265SDimitry Andric }
182681ad6265SDimitry Andric 
isArchABICompatible(ArchitectureSet archSet,Architecture targetArch)1827bdd1243dSDimitry Andric static bool isArchABICompatible(ArchitectureSet archSet,
1828bdd1243dSDimitry Andric                                 Architecture targetArch) {
1829bdd1243dSDimitry Andric   uint32_t cpuType;
1830bdd1243dSDimitry Andric   uint32_t targetCpuType;
1831bdd1243dSDimitry Andric   std::tie(targetCpuType, std::ignore) = getCPUTypeFromArchitecture(targetArch);
1832bdd1243dSDimitry Andric 
1833bdd1243dSDimitry Andric   return llvm::any_of(archSet, [&](const auto &p) {
1834bdd1243dSDimitry Andric     std::tie(cpuType, std::ignore) = getCPUTypeFromArchitecture(p);
1835bdd1243dSDimitry Andric     return cpuType == targetCpuType;
1836bdd1243dSDimitry Andric   });
1837bdd1243dSDimitry Andric }
1838bdd1243dSDimitry Andric 
isTargetPlatformArchCompatible(InterfaceFile::const_target_range interfaceTargets,Target target)1839bdd1243dSDimitry Andric static bool isTargetPlatformArchCompatible(
1840bdd1243dSDimitry Andric     InterfaceFile::const_target_range interfaceTargets, Target target) {
1841bdd1243dSDimitry Andric   if (is_contained(interfaceTargets, target))
1842bdd1243dSDimitry Andric     return true;
1843bdd1243dSDimitry Andric 
1844bdd1243dSDimitry Andric   if (config->forceExactCpuSubtypeMatch)
1845bdd1243dSDimitry Andric     return false;
1846bdd1243dSDimitry Andric 
1847bdd1243dSDimitry Andric   ArchitectureSet archSet;
1848bdd1243dSDimitry Andric   for (const auto &p : interfaceTargets)
1849bdd1243dSDimitry Andric     if (p.Platform == target.Platform)
1850bdd1243dSDimitry Andric       archSet.set(p.Arch);
1851bdd1243dSDimitry Andric   if (archSet.empty())
1852bdd1243dSDimitry Andric     return false;
1853bdd1243dSDimitry Andric 
1854bdd1243dSDimitry Andric   return isArchABICompatible(archSet, target.Arch);
1855bdd1243dSDimitry Andric }
1856bdd1243dSDimitry Andric 
DylibFile(const InterfaceFile & interface,DylibFile * umbrella,bool isBundleLoader,bool explicitlyLinked)1857fe6060f1SDimitry Andric DylibFile::DylibFile(const InterfaceFile &interface, DylibFile *umbrella,
185881ad6265SDimitry Andric                      bool isBundleLoader, bool explicitlyLinked)
1859fe6060f1SDimitry Andric     : InputFile(DylibKind, interface), refState(RefState::Unreferenced),
186081ad6265SDimitry Andric       explicitlyLinked(explicitlyLinked), isBundleLoader(isBundleLoader) {
1861fe6060f1SDimitry Andric   // FIXME: Add test for the missing TBD code path.
1862fe6060f1SDimitry Andric 
18635ffd83dbSDimitry Andric   if (umbrella == nullptr)
18645ffd83dbSDimitry Andric     umbrella = this;
1865fe6060f1SDimitry Andric   this->umbrella = umbrella;
18665ffd83dbSDimitry Andric 
186704eeddc0SDimitry Andric   installName = saver().save(interface.getInstallName());
1868e8d8bef9SDimitry Andric   compatibilityVersion = interface.getCompatibilityVersion().rawValue();
1869e8d8bef9SDimitry Andric   currentVersion = interface.getCurrentVersion().rawValue();
1870fe6060f1SDimitry Andric 
1871fe6060f1SDimitry Andric   if (config->printEachFile)
1872fe6060f1SDimitry Andric     message(toString(this));
1873fe6060f1SDimitry Andric   inputFiles.insert(this);
1874fe6060f1SDimitry Andric 
1875fe6060f1SDimitry Andric   if (!is_contained(skipPlatformChecks, installName) &&
1876bdd1243dSDimitry Andric       !isTargetPlatformArchCompatible(interface.targets(),
1877bdd1243dSDimitry Andric                                       config->platformInfo.target) &&
187881ad6265SDimitry Andric       !skipPlatformCheckForCatalyst(interface, explicitlyLinked)) {
1879fe6060f1SDimitry Andric     error(toString(this) + " is incompatible with " +
1880fe6060f1SDimitry Andric           std::string(config->platformInfo.target));
1881fe6060f1SDimitry Andric     return;
1882fe6060f1SDimitry Andric   }
1883fe6060f1SDimitry Andric 
1884fe6060f1SDimitry Andric   checkAppExtensionSafety(interface.isApplicationExtensionSafe());
1885fe6060f1SDimitry Andric 
1886fe6060f1SDimitry Andric   exportingFile = isImplicitlyLinked(installName) ? this : umbrella;
1887bdd1243dSDimitry Andric   auto addSymbol = [&](const llvm::MachO::Symbol &symbol,
1888bdd1243dSDimitry Andric                        const Twine &name) -> void {
188904eeddc0SDimitry Andric     StringRef savedName = saver().save(name);
18900eae32dcSDimitry Andric     if (exportingFile->hiddenSymbols.contains(CachedHashStringRef(savedName)))
18910eae32dcSDimitry Andric       return;
18920eae32dcSDimitry Andric 
18930eae32dcSDimitry Andric     symbols.push_back(symtab->addDylib(savedName, exportingFile,
1894bdd1243dSDimitry Andric                                        symbol.isWeakDefined(),
1895bdd1243dSDimitry Andric                                        symbol.isThreadLocalValue()));
1896e8d8bef9SDimitry Andric   };
18970eae32dcSDimitry Andric 
18980eae32dcSDimitry Andric   std::vector<const llvm::MachO::Symbol *> normalSymbols;
18990eae32dcSDimitry Andric   normalSymbols.reserve(interface.symbolsCount());
1900fe6060f1SDimitry Andric   for (const auto *symbol : interface.symbols()) {
1901bdd1243dSDimitry Andric     if (!isArchABICompatible(symbol->getArchitectures(), config->arch()))
1902fe6060f1SDimitry Andric       continue;
1903fe6060f1SDimitry Andric     if (handleLDSymbol(symbol->getName()))
1904e8d8bef9SDimitry Andric       continue;
1905e8d8bef9SDimitry Andric 
1906e8d8bef9SDimitry Andric     switch (symbol->getKind()) {
1907bdd1243dSDimitry Andric     case SymbolKind::GlobalSymbol:
1908bdd1243dSDimitry Andric     case SymbolKind::ObjectiveCClass:
1909bdd1243dSDimitry Andric     case SymbolKind::ObjectiveCClassEHType:
1910bdd1243dSDimitry Andric     case SymbolKind::ObjectiveCInstanceVariable:
19110eae32dcSDimitry Andric       normalSymbols.push_back(symbol);
19120eae32dcSDimitry Andric     }
19130eae32dcSDimitry Andric   }
19140eae32dcSDimitry Andric 
19150eae32dcSDimitry Andric   // TODO(compnerd) filter out symbols based on the target platform
19160eae32dcSDimitry Andric   for (const auto *symbol : normalSymbols) {
19170eae32dcSDimitry Andric     switch (symbol->getKind()) {
1918e8d8bef9SDimitry Andric     case SymbolKind::GlobalSymbol:
1919bdd1243dSDimitry Andric       addSymbol(*symbol, symbol->getName());
1920e8d8bef9SDimitry Andric       break;
1921e8d8bef9SDimitry Andric     case SymbolKind::ObjectiveCClass:
1922e8d8bef9SDimitry Andric       // XXX ld64 only creates these symbols when -ObjC is passed in. We may
1923e8d8bef9SDimitry Andric       // want to emulate that.
1924bdd1243dSDimitry Andric       addSymbol(*symbol, objc::klass + symbol->getName());
1925bdd1243dSDimitry Andric       addSymbol(*symbol, objc::metaclass + symbol->getName());
1926e8d8bef9SDimitry Andric       break;
1927e8d8bef9SDimitry Andric     case SymbolKind::ObjectiveCClassEHType:
1928bdd1243dSDimitry Andric       addSymbol(*symbol, objc::ehtype + symbol->getName());
1929e8d8bef9SDimitry Andric       break;
1930e8d8bef9SDimitry Andric     case SymbolKind::ObjectiveCInstanceVariable:
1931bdd1243dSDimitry Andric       addSymbol(*symbol, objc::ivar + symbol->getName());
1932e8d8bef9SDimitry Andric       break;
1933e8d8bef9SDimitry Andric     }
19345ffd83dbSDimitry Andric   }
1935e8d8bef9SDimitry Andric }
1936e8d8bef9SDimitry Andric 
DylibFile(DylibFile * umbrella)193761cfbce3SDimitry Andric DylibFile::DylibFile(DylibFile *umbrella)
193861cfbce3SDimitry Andric     : InputFile(DylibKind, MemoryBufferRef{}), refState(RefState::Unreferenced),
193961cfbce3SDimitry Andric       explicitlyLinked(false), isBundleLoader(false) {
194061cfbce3SDimitry Andric   if (umbrella == nullptr)
194161cfbce3SDimitry Andric     umbrella = this;
194261cfbce3SDimitry Andric   this->umbrella = umbrella;
194361cfbce3SDimitry Andric }
194461cfbce3SDimitry Andric 
parseReexports(const InterfaceFile & interface)1945fe6060f1SDimitry Andric void DylibFile::parseReexports(const InterfaceFile &interface) {
1946fe6060f1SDimitry Andric   const InterfaceFile *topLevel =
1947fe6060f1SDimitry Andric       interface.getParent() == nullptr ? &interface : interface.getParent();
1948349cc55cSDimitry Andric   for (const InterfaceFileRef &intfRef : interface.reexportedLibraries()) {
1949fe6060f1SDimitry Andric     InterfaceFile::const_target_range targets = intfRef.targets();
1950fe6060f1SDimitry Andric     if (is_contained(skipPlatformChecks, intfRef.getInstallName()) ||
1951bdd1243dSDimitry Andric         isTargetPlatformArchCompatible(targets, config->platformInfo.target))
1952fe6060f1SDimitry Andric       loadReexport(intfRef.getInstallName(), exportingFile, topLevel);
1953fe6060f1SDimitry Andric   }
1954fe6060f1SDimitry Andric }
1955e8d8bef9SDimitry Andric 
isExplicitlyLinked() const195661cfbce3SDimitry Andric bool DylibFile::isExplicitlyLinked() const {
195761cfbce3SDimitry Andric   if (!explicitlyLinked)
195861cfbce3SDimitry Andric     return false;
195961cfbce3SDimitry Andric 
196061cfbce3SDimitry Andric   // If this dylib was explicitly linked, but at least one of the symbols
196161cfbce3SDimitry Andric   // of the synthetic dylibs it created via $ld$previous symbols is
196261cfbce3SDimitry Andric   // referenced, then that synthetic dylib fulfils the explicit linkedness
196361cfbce3SDimitry Andric   // and we can deadstrip this dylib if it's unreferenced.
196461cfbce3SDimitry Andric   for (const auto *dylib : extraDylibs)
196561cfbce3SDimitry Andric     if (dylib->isReferenced())
196661cfbce3SDimitry Andric       return false;
196761cfbce3SDimitry Andric 
196861cfbce3SDimitry Andric   return true;
196961cfbce3SDimitry Andric }
197061cfbce3SDimitry Andric 
getSyntheticDylib(StringRef installName,uint32_t currentVersion,uint32_t compatVersion)197161cfbce3SDimitry Andric DylibFile *DylibFile::getSyntheticDylib(StringRef installName,
197261cfbce3SDimitry Andric                                         uint32_t currentVersion,
197361cfbce3SDimitry Andric                                         uint32_t compatVersion) {
197461cfbce3SDimitry Andric   for (DylibFile *dylib : extraDylibs)
197561cfbce3SDimitry Andric     if (dylib->installName == installName) {
197661cfbce3SDimitry Andric       // FIXME: Check what to do if different $ld$previous symbols
197761cfbce3SDimitry Andric       // request the same dylib, but with different versions.
197861cfbce3SDimitry Andric       return dylib;
197961cfbce3SDimitry Andric     }
198061cfbce3SDimitry Andric 
198161cfbce3SDimitry Andric   auto *dylib = make<DylibFile>(umbrella == this ? nullptr : umbrella);
198261cfbce3SDimitry Andric   dylib->installName = saver().save(installName);
198361cfbce3SDimitry Andric   dylib->currentVersion = currentVersion;
198461cfbce3SDimitry Andric   dylib->compatibilityVersion = compatVersion;
198561cfbce3SDimitry Andric   extraDylibs.push_back(dylib);
198661cfbce3SDimitry Andric   return dylib;
198761cfbce3SDimitry Andric }
198861cfbce3SDimitry Andric 
1989fe6060f1SDimitry Andric // $ld$ symbols modify the properties/behavior of the library (e.g. its install
1990fe6060f1SDimitry Andric // name, compatibility version or hide/add symbols) for specific target
1991fe6060f1SDimitry Andric // versions.
handleLDSymbol(StringRef originalName)1992fe6060f1SDimitry Andric bool DylibFile::handleLDSymbol(StringRef originalName) {
199306c3fb27SDimitry Andric   if (!originalName.starts_with("$ld$"))
1994fe6060f1SDimitry Andric     return false;
1995fe6060f1SDimitry Andric 
1996fe6060f1SDimitry Andric   StringRef action;
1997fe6060f1SDimitry Andric   StringRef name;
1998fe6060f1SDimitry Andric   std::tie(action, name) = originalName.drop_front(strlen("$ld$")).split('$');
1999fe6060f1SDimitry Andric   if (action == "previous")
2000fe6060f1SDimitry Andric     handleLDPreviousSymbol(name, originalName);
2001fe6060f1SDimitry Andric   else if (action == "install_name")
2002fe6060f1SDimitry Andric     handleLDInstallNameSymbol(name, originalName);
20030eae32dcSDimitry Andric   else if (action == "hide")
20040eae32dcSDimitry Andric     handleLDHideSymbol(name, originalName);
2005fe6060f1SDimitry Andric   return true;
2006fe6060f1SDimitry Andric }
2007fe6060f1SDimitry Andric 
handleLDPreviousSymbol(StringRef name,StringRef originalName)2008fe6060f1SDimitry Andric void DylibFile::handleLDPreviousSymbol(StringRef name, StringRef originalName) {
2009fe6060f1SDimitry Andric   // originalName: $ld$ previous $ <installname> $ <compatversion> $
2010fe6060f1SDimitry Andric   // <platformstr> $ <startversion> $ <endversion> $ <symbol-name> $
2011fe6060f1SDimitry Andric   StringRef installName;
2012fe6060f1SDimitry Andric   StringRef compatVersion;
2013fe6060f1SDimitry Andric   StringRef platformStr;
2014fe6060f1SDimitry Andric   StringRef startVersion;
2015fe6060f1SDimitry Andric   StringRef endVersion;
2016fe6060f1SDimitry Andric   StringRef symbolName;
2017fe6060f1SDimitry Andric   StringRef rest;
2018fe6060f1SDimitry Andric 
2019fe6060f1SDimitry Andric   std::tie(installName, name) = name.split('$');
2020fe6060f1SDimitry Andric   std::tie(compatVersion, name) = name.split('$');
2021fe6060f1SDimitry Andric   std::tie(platformStr, name) = name.split('$');
2022fe6060f1SDimitry Andric   std::tie(startVersion, name) = name.split('$');
2023fe6060f1SDimitry Andric   std::tie(endVersion, name) = name.split('$');
202461cfbce3SDimitry Andric   std::tie(symbolName, rest) = name.rsplit('$');
202561cfbce3SDimitry Andric 
202661cfbce3SDimitry Andric   // FIXME: Does this do the right thing for zippered files?
2027fe6060f1SDimitry Andric   unsigned platform;
2028fe6060f1SDimitry Andric   if (platformStr.getAsInteger(10, platform) ||
2029fe6060f1SDimitry Andric       platform != static_cast<unsigned>(config->platform()))
2030fe6060f1SDimitry Andric     return;
2031fe6060f1SDimitry Andric 
2032fe6060f1SDimitry Andric   VersionTuple start;
2033fe6060f1SDimitry Andric   if (start.tryParse(startVersion)) {
2034bdd1243dSDimitry Andric     warn(toString(this) + ": failed to parse start version, symbol '" +
2035bdd1243dSDimitry Andric          originalName + "' ignored");
2036fe6060f1SDimitry Andric     return;
2037fe6060f1SDimitry Andric   }
2038fe6060f1SDimitry Andric   VersionTuple end;
2039fe6060f1SDimitry Andric   if (end.tryParse(endVersion)) {
2040bdd1243dSDimitry Andric     warn(toString(this) + ": failed to parse end version, symbol '" +
2041bdd1243dSDimitry Andric          originalName + "' ignored");
2042fe6060f1SDimitry Andric     return;
2043fe6060f1SDimitry Andric   }
204406c3fb27SDimitry Andric   if (config->platformInfo.target.MinDeployment < start ||
204506c3fb27SDimitry Andric       config->platformInfo.target.MinDeployment >= end)
2046fe6060f1SDimitry Andric     return;
2047fe6060f1SDimitry Andric 
204861cfbce3SDimitry Andric   // Initialized to compatibilityVersion for the symbolName branch below.
204961cfbce3SDimitry Andric   uint32_t newCompatibilityVersion = compatibilityVersion;
205061cfbce3SDimitry Andric   uint32_t newCurrentVersionForSymbol = currentVersion;
2051fe6060f1SDimitry Andric   if (!compatVersion.empty()) {
2052fe6060f1SDimitry Andric     VersionTuple cVersion;
2053fe6060f1SDimitry Andric     if (cVersion.tryParse(compatVersion)) {
2054bdd1243dSDimitry Andric       warn(toString(this) +
2055bdd1243dSDimitry Andric            ": failed to parse compatibility version, symbol '" + originalName +
2056fe6060f1SDimitry Andric            "' ignored");
2057fe6060f1SDimitry Andric       return;
2058fe6060f1SDimitry Andric     }
205961cfbce3SDimitry Andric     newCompatibilityVersion = encodeVersion(cVersion);
206061cfbce3SDimitry Andric     newCurrentVersionForSymbol = newCompatibilityVersion;
2061fe6060f1SDimitry Andric   }
206261cfbce3SDimitry Andric 
206361cfbce3SDimitry Andric   if (!symbolName.empty()) {
206461cfbce3SDimitry Andric     // A $ld$previous$ symbol with symbol name adds a symbol with that name to
206561cfbce3SDimitry Andric     // a dylib with given name and version.
206661cfbce3SDimitry Andric     auto *dylib = getSyntheticDylib(installName, newCurrentVersionForSymbol,
206761cfbce3SDimitry Andric                                     newCompatibilityVersion);
206861cfbce3SDimitry Andric 
2069bdd1243dSDimitry Andric     // The tbd file usually contains the $ld$previous symbol for an old version,
2070bdd1243dSDimitry Andric     // and then the symbol itself later, for newer deployment targets, like so:
2071bdd1243dSDimitry Andric     //    symbols: [
2072bdd1243dSDimitry Andric     //      '$ld$previous$/Another$$1$3.0$14.0$_zzz$',
2073bdd1243dSDimitry Andric     //      _zzz,
2074bdd1243dSDimitry Andric     //    ]
2075bdd1243dSDimitry Andric     // Since the symbols are sorted, adding them to the symtab in the given
2076bdd1243dSDimitry Andric     // order means the $ld$previous version of _zzz will prevail, as desired.
207761cfbce3SDimitry Andric     dylib->symbols.push_back(symtab->addDylib(
207861cfbce3SDimitry Andric         saver().save(symbolName), dylib, /*isWeakDef=*/false, /*isTlv=*/false));
207961cfbce3SDimitry Andric     return;
208061cfbce3SDimitry Andric   }
208161cfbce3SDimitry Andric 
208261cfbce3SDimitry Andric   // A $ld$previous$ symbol without symbol name modifies the dylib it's in.
208361cfbce3SDimitry Andric   this->installName = saver().save(installName);
208461cfbce3SDimitry Andric   this->compatibilityVersion = newCompatibilityVersion;
2085fe6060f1SDimitry Andric }
2086fe6060f1SDimitry Andric 
handleLDInstallNameSymbol(StringRef name,StringRef originalName)2087fe6060f1SDimitry Andric void DylibFile::handleLDInstallNameSymbol(StringRef name,
2088fe6060f1SDimitry Andric                                           StringRef originalName) {
2089fe6060f1SDimitry Andric   // originalName: $ld$ install_name $ os<version> $ install_name
2090fe6060f1SDimitry Andric   StringRef condition, installName;
2091fe6060f1SDimitry Andric   std::tie(condition, installName) = name.split('$');
2092fe6060f1SDimitry Andric   VersionTuple version;
2093fe6060f1SDimitry Andric   if (!condition.consume_front("os") || version.tryParse(condition))
2094bdd1243dSDimitry Andric     warn(toString(this) + ": failed to parse os version, symbol '" +
2095bdd1243dSDimitry Andric          originalName + "' ignored");
209606c3fb27SDimitry Andric   else if (version == config->platformInfo.target.MinDeployment)
209704eeddc0SDimitry Andric     this->installName = saver().save(installName);
2098fe6060f1SDimitry Andric }
2099fe6060f1SDimitry Andric 
handleLDHideSymbol(StringRef name,StringRef originalName)21000eae32dcSDimitry Andric void DylibFile::handleLDHideSymbol(StringRef name, StringRef originalName) {
21010eae32dcSDimitry Andric   StringRef symbolName;
21020eae32dcSDimitry Andric   bool shouldHide = true;
210306c3fb27SDimitry Andric   if (name.starts_with("os")) {
21040eae32dcSDimitry Andric     // If it's hidden based on versions.
21050eae32dcSDimitry Andric     name = name.drop_front(2);
21060eae32dcSDimitry Andric     StringRef minVersion;
21070eae32dcSDimitry Andric     std::tie(minVersion, symbolName) = name.split('$');
21080eae32dcSDimitry Andric     VersionTuple versionTup;
21090eae32dcSDimitry Andric     if (versionTup.tryParse(minVersion)) {
2110bdd1243dSDimitry Andric       warn(toString(this) + ": failed to parse hidden version, symbol `" + originalName +
21110eae32dcSDimitry Andric            "` ignored.");
21120eae32dcSDimitry Andric       return;
21130eae32dcSDimitry Andric     }
211406c3fb27SDimitry Andric     shouldHide = versionTup == config->platformInfo.target.MinDeployment;
21150eae32dcSDimitry Andric   } else {
21160eae32dcSDimitry Andric     symbolName = name;
21170eae32dcSDimitry Andric   }
21180eae32dcSDimitry Andric 
21190eae32dcSDimitry Andric   if (shouldHide)
21200eae32dcSDimitry Andric     exportingFile->hiddenSymbols.insert(CachedHashStringRef(symbolName));
21210eae32dcSDimitry Andric }
21220eae32dcSDimitry Andric 
checkAppExtensionSafety(bool dylibIsAppExtensionSafe) const2123fe6060f1SDimitry Andric void DylibFile::checkAppExtensionSafety(bool dylibIsAppExtensionSafe) const {
2124fe6060f1SDimitry Andric   if (config->applicationExtension && !dylibIsAppExtensionSafe)
2125fe6060f1SDimitry Andric     warn("using '-application_extension' with unsafe dylib: " + toString(this));
2126e8d8bef9SDimitry Andric }
2127e8d8bef9SDimitry Andric 
ArchiveFile(std::unique_ptr<object::Archive> && f,bool forceHidden)2128972a253aSDimitry Andric ArchiveFile::ArchiveFile(std::unique_ptr<object::Archive> &&f, bool forceHidden)
2129972a253aSDimitry Andric     : InputFile(ArchiveKind, f->getMemoryBufferRef()), file(std::move(f)),
2130972a253aSDimitry Andric       forceHidden(forceHidden) {}
2131349cc55cSDimitry Andric 
addLazySymbols()2132349cc55cSDimitry Andric void ArchiveFile::addLazySymbols() {
2133*5f757f3fSDimitry Andric   // Avoid calling getMemoryBufferRef() on zero-symbol archive
2134*5f757f3fSDimitry Andric   // since that crashes.
2135*5f757f3fSDimitry Andric   if (file->isEmpty() || file->getNumberOfSymbols() == 0)
2136*5f757f3fSDimitry Andric     return;
2137*5f757f3fSDimitry Andric 
2138*5f757f3fSDimitry Andric   Error err = Error::success();
2139*5f757f3fSDimitry Andric   auto child = file->child_begin(err);
2140*5f757f3fSDimitry Andric   // Ignore the I/O error here - will be reported later.
2141*5f757f3fSDimitry Andric   if (!err) {
2142*5f757f3fSDimitry Andric     Expected<MemoryBufferRef> mbOrErr = child->getMemoryBufferRef();
2143*5f757f3fSDimitry Andric     if (!mbOrErr) {
2144*5f757f3fSDimitry Andric       llvm::consumeError(mbOrErr.takeError());
2145*5f757f3fSDimitry Andric     } else {
2146*5f757f3fSDimitry Andric       if (identify_magic(mbOrErr->getBuffer()) == file_magic::macho_object) {
2147*5f757f3fSDimitry Andric         if (target->wordSize == 8)
2148*5f757f3fSDimitry Andric           compatArch = compatWithTargetArch(
2149*5f757f3fSDimitry Andric               this, reinterpret_cast<const LP64::mach_header *>(
2150*5f757f3fSDimitry Andric                         mbOrErr->getBufferStart()));
2151*5f757f3fSDimitry Andric         else
2152*5f757f3fSDimitry Andric           compatArch = compatWithTargetArch(
2153*5f757f3fSDimitry Andric               this, reinterpret_cast<const ILP32::mach_header *>(
2154*5f757f3fSDimitry Andric                         mbOrErr->getBufferStart()));
2155*5f757f3fSDimitry Andric         if (!compatArch)
2156*5f757f3fSDimitry Andric           return;
2157*5f757f3fSDimitry Andric       }
2158*5f757f3fSDimitry Andric     }
2159*5f757f3fSDimitry Andric   }
2160*5f757f3fSDimitry Andric 
21615ffd83dbSDimitry Andric   for (const object::Archive::Symbol &sym : file->symbols())
216204eeddc0SDimitry Andric     symtab->addLazyArchive(sym.getName(), this, sym);
21635ffd83dbSDimitry Andric }
21645ffd83dbSDimitry Andric 
2165972a253aSDimitry Andric static Expected<InputFile *>
loadArchiveMember(MemoryBufferRef mb,uint32_t modTime,StringRef archiveName,uint64_t offsetInArchive,bool forceHidden,bool compatArch)2166972a253aSDimitry Andric loadArchiveMember(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
2167*5f757f3fSDimitry Andric                   uint64_t offsetInArchive, bool forceHidden, bool compatArch) {
2168349cc55cSDimitry Andric   if (config->zeroModTime)
2169349cc55cSDimitry Andric     modTime = 0;
2170349cc55cSDimitry Andric 
2171349cc55cSDimitry Andric   switch (identify_magic(mb.getBuffer())) {
2172349cc55cSDimitry Andric   case file_magic::macho_object:
2173*5f757f3fSDimitry Andric     return make<ObjFile>(mb, modTime, archiveName, /*lazy=*/false, forceHidden,
2174*5f757f3fSDimitry Andric                          compatArch);
2175349cc55cSDimitry Andric   case file_magic::bitcode:
2176972a253aSDimitry Andric     return make<BitcodeFile>(mb, archiveName, offsetInArchive, /*lazy=*/false,
2177*5f757f3fSDimitry Andric                              forceHidden, compatArch);
2178349cc55cSDimitry Andric   default:
2179349cc55cSDimitry Andric     return createStringError(inconvertibleErrorCode(),
2180349cc55cSDimitry Andric                              mb.getBufferIdentifier() +
2181349cc55cSDimitry Andric                                  " has unhandled file type");
2182349cc55cSDimitry Andric   }
2183349cc55cSDimitry Andric }
2184349cc55cSDimitry Andric 
fetch(const object::Archive::Child & c,StringRef reason)2185349cc55cSDimitry Andric Error ArchiveFile::fetch(const object::Archive::Child &c, StringRef reason) {
2186349cc55cSDimitry Andric   if (!seen.insert(c.getChildOffset()).second)
2187349cc55cSDimitry Andric     return Error::success();
2188349cc55cSDimitry Andric 
2189349cc55cSDimitry Andric   Expected<MemoryBufferRef> mb = c.getMemoryBufferRef();
2190349cc55cSDimitry Andric   if (!mb)
2191349cc55cSDimitry Andric     return mb.takeError();
2192349cc55cSDimitry Andric 
2193349cc55cSDimitry Andric   // Thin archives refer to .o files, so --reproduce needs the .o files too.
2194349cc55cSDimitry Andric   if (tar && c.getParent()->isThin())
2195349cc55cSDimitry Andric     tar->append(relativeToRoot(CHECK(c.getFullName(), this)), mb->getBuffer());
2196349cc55cSDimitry Andric 
2197349cc55cSDimitry Andric   Expected<TimePoint<std::chrono::seconds>> modTime = c.getLastModified();
2198349cc55cSDimitry Andric   if (!modTime)
2199349cc55cSDimitry Andric     return modTime.takeError();
2200349cc55cSDimitry Andric 
2201*5f757f3fSDimitry Andric   Expected<InputFile *> file =
2202*5f757f3fSDimitry Andric       loadArchiveMember(*mb, toTimeT(*modTime), getName(), c.getChildOffset(),
2203*5f757f3fSDimitry Andric                         forceHidden, compatArch);
2204349cc55cSDimitry Andric 
2205349cc55cSDimitry Andric   if (!file)
2206349cc55cSDimitry Andric     return file.takeError();
2207349cc55cSDimitry Andric 
2208349cc55cSDimitry Andric   inputFiles.insert(*file);
2209349cc55cSDimitry Andric   printArchiveMemberLoad(reason, *file);
2210349cc55cSDimitry Andric   return Error::success();
2211349cc55cSDimitry Andric }
2212349cc55cSDimitry Andric 
fetch(const object::Archive::Symbol & sym)22135ffd83dbSDimitry Andric void ArchiveFile::fetch(const object::Archive::Symbol &sym) {
22145ffd83dbSDimitry Andric   object::Archive::Child c =
22155ffd83dbSDimitry Andric       CHECK(sym.getMember(), toString(this) +
2216349cc55cSDimitry Andric                                  ": could not get the member defining symbol " +
2217e8d8bef9SDimitry Andric                                  toMachOString(sym));
22185ffd83dbSDimitry Andric 
2219fe6060f1SDimitry Andric   // `sym` is owned by a LazySym, which will be replace<>()d by make<ObjFile>
2220e8d8bef9SDimitry Andric   // and become invalid after that call. Copy it to the stack so we can refer
2221e8d8bef9SDimitry Andric   // to it later.
2222fe6060f1SDimitry Andric   const object::Archive::Symbol symCopy = sym;
2223e8d8bef9SDimitry Andric 
2224fe6060f1SDimitry Andric   // ld64 doesn't demangle sym here even with -demangle.
2225fe6060f1SDimitry Andric   // Match that: intentionally don't call toMachOString().
2226349cc55cSDimitry Andric   if (Error e = fetch(c, symCopy.getName()))
2227349cc55cSDimitry Andric     error(toString(this) + ": could not get the member defining symbol " +
2228349cc55cSDimitry Andric           toMachOString(symCopy) + ": " + toString(std::move(e)));
22295ffd83dbSDimitry Andric }
22305ffd83dbSDimitry Andric 
createBitcodeSymbol(const lto::InputFile::Symbol & objSym,BitcodeFile & file)2231fe6060f1SDimitry Andric static macho::Symbol *createBitcodeSymbol(const lto::InputFile::Symbol &objSym,
2232fe6060f1SDimitry Andric                                           BitcodeFile &file) {
223304eeddc0SDimitry Andric   StringRef name = saver().save(objSym.getName());
2234fe6060f1SDimitry Andric 
2235fe6060f1SDimitry Andric   if (objSym.isUndefined())
22360eae32dcSDimitry Andric     return symtab->addUndefined(name, &file, /*isWeakRef=*/objSym.isWeak());
2237fe6060f1SDimitry Andric 
2238fe6060f1SDimitry Andric   // TODO: Write a test demonstrating why computing isPrivateExtern before
2239fe6060f1SDimitry Andric   // LTO compilation is important.
2240fe6060f1SDimitry Andric   bool isPrivateExtern = false;
2241fe6060f1SDimitry Andric   switch (objSym.getVisibility()) {
2242fe6060f1SDimitry Andric   case GlobalValue::HiddenVisibility:
2243fe6060f1SDimitry Andric     isPrivateExtern = true;
2244fe6060f1SDimitry Andric     break;
2245fe6060f1SDimitry Andric   case GlobalValue::ProtectedVisibility:
2246fe6060f1SDimitry Andric     error(name + " has protected visibility, which is not supported by Mach-O");
2247fe6060f1SDimitry Andric     break;
2248fe6060f1SDimitry Andric   case GlobalValue::DefaultVisibility:
2249fe6060f1SDimitry Andric     break;
2250fe6060f1SDimitry Andric   }
2251972a253aSDimitry Andric   isPrivateExtern = isPrivateExtern || objSym.canBeOmittedFromSymbolTable() ||
2252972a253aSDimitry Andric                     file.forceHidden;
2253fe6060f1SDimitry Andric 
2254349cc55cSDimitry Andric   if (objSym.isCommon())
2255349cc55cSDimitry Andric     return symtab->addCommon(name, &file, objSym.getCommonSize(),
2256349cc55cSDimitry Andric                              objSym.getCommonAlignment(), isPrivateExtern);
2257349cc55cSDimitry Andric 
2258fe6060f1SDimitry Andric   return symtab->addDefined(name, &file, /*isec=*/nullptr, /*value=*/0,
2259fe6060f1SDimitry Andric                             /*size=*/0, objSym.isWeak(), isPrivateExtern,
2260fe6060f1SDimitry Andric                             /*isReferencedDynamically=*/false,
2261349cc55cSDimitry Andric                             /*noDeadStrip=*/false,
2262349cc55cSDimitry Andric                             /*isWeakDefCanBeHidden=*/false);
2263fe6060f1SDimitry Andric }
2264fe6060f1SDimitry Andric 
BitcodeFile(MemoryBufferRef mb,StringRef archiveName,uint64_t offsetInArchive,bool lazy,bool forceHidden,bool compatArch)2265fe6060f1SDimitry Andric BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
2266*5f757f3fSDimitry Andric                          uint64_t offsetInArchive, bool lazy, bool forceHidden,
2267*5f757f3fSDimitry Andric                          bool compatArch)
2268972a253aSDimitry Andric     : InputFile(BitcodeKind, mb, lazy), forceHidden(forceHidden) {
22690eae32dcSDimitry Andric   this->archiveName = std::string(archiveName);
2270*5f757f3fSDimitry Andric   this->compatArch = compatArch;
2271fe6060f1SDimitry Andric   std::string path = mb.getBufferIdentifier().str();
2272bdd1243dSDimitry Andric   if (config->thinLTOIndexOnly)
2273bdd1243dSDimitry Andric     path = replaceThinLTOSuffix(mb.getBufferIdentifier());
2274bdd1243dSDimitry Andric 
2275*5f757f3fSDimitry Andric   // If the parent archive already determines that the arch is not compat with
2276*5f757f3fSDimitry Andric   // target, then just return.
2277*5f757f3fSDimitry Andric   if (!compatArch)
2278*5f757f3fSDimitry Andric     return;
2279*5f757f3fSDimitry Andric 
2280fe6060f1SDimitry Andric   // ThinLTO assumes that all MemoryBufferRefs given to it have a unique
2281fe6060f1SDimitry Andric   // name. If two members with the same name are provided, this causes a
2282fe6060f1SDimitry Andric   // collision and ThinLTO can't proceed.
2283fe6060f1SDimitry Andric   // So, we append the archive name to disambiguate two members with the same
2284fe6060f1SDimitry Andric   // name from multiple different archives, and offset within the archive to
2285fe6060f1SDimitry Andric   // disambiguate two members of the same name from a single archive.
228604eeddc0SDimitry Andric   MemoryBufferRef mbref(mb.getBuffer(),
228704eeddc0SDimitry Andric                         saver().save(archiveName.empty()
228804eeddc0SDimitry Andric                                          ? path
228906c3fb27SDimitry Andric                                          : archiveName + "(" +
229006c3fb27SDimitry Andric                                                sys::path::filename(path) + ")" +
2291fe6060f1SDimitry Andric                                                utostr(offsetInArchive)));
2292e8d8bef9SDimitry Andric   obj = check(lto::InputFile::create(mbref));
229304eeddc0SDimitry Andric   if (lazy)
229404eeddc0SDimitry Andric     parseLazy();
229504eeddc0SDimitry Andric   else
229604eeddc0SDimitry Andric     parse();
229704eeddc0SDimitry Andric }
2298fe6060f1SDimitry Andric 
parse()229904eeddc0SDimitry Andric void BitcodeFile::parse() {
2300fe6060f1SDimitry Andric   // Convert LTO Symbols to LLD Symbols in order to perform resolution. The
2301fe6060f1SDimitry Andric   // "winning" symbol will then be marked as Prevailing at LTO compilation
2302fe6060f1SDimitry Andric   // time.
2303*5f757f3fSDimitry Andric   symbols.resize(obj->symbols().size());
2304*5f757f3fSDimitry Andric 
2305*5f757f3fSDimitry Andric   // Process defined symbols first. See the comment at the end of
2306*5f757f3fSDimitry Andric   // ObjFile<>::parseSymbols.
2307*5f757f3fSDimitry Andric   for (auto it : llvm::enumerate(obj->symbols()))
2308*5f757f3fSDimitry Andric     if (!it.value().isUndefined())
2309*5f757f3fSDimitry Andric       symbols[it.index()] = createBitcodeSymbol(it.value(), *this);
2310*5f757f3fSDimitry Andric   for (auto it : llvm::enumerate(obj->symbols()))
2311*5f757f3fSDimitry Andric     if (it.value().isUndefined())
2312*5f757f3fSDimitry Andric       symbols[it.index()] = createBitcodeSymbol(it.value(), *this);
23135ffd83dbSDimitry Andric }
2314fe6060f1SDimitry Andric 
parseLazy()231504eeddc0SDimitry Andric void BitcodeFile::parseLazy() {
231604eeddc0SDimitry Andric   symbols.resize(obj->symbols().size());
2317bdd1243dSDimitry Andric   for (const auto &[i, objSym] : llvm::enumerate(obj->symbols())) {
231804eeddc0SDimitry Andric     if (!objSym.isUndefined()) {
2319bdd1243dSDimitry Andric       symbols[i] = symtab->addLazyObject(saver().save(objSym.getName()), *this);
232004eeddc0SDimitry Andric       if (!lazy)
232104eeddc0SDimitry Andric         break;
232204eeddc0SDimitry Andric     }
232304eeddc0SDimitry Andric   }
232404eeddc0SDimitry Andric }
232504eeddc0SDimitry Andric 
replaceThinLTOSuffix(StringRef path)2326bdd1243dSDimitry Andric std::string macho::replaceThinLTOSuffix(StringRef path) {
2327bdd1243dSDimitry Andric   auto [suffix, repl] = config->thinLTOObjectSuffixReplace;
2328bdd1243dSDimitry Andric   if (path.consume_back(suffix))
2329bdd1243dSDimitry Andric     return (path + repl).str();
2330bdd1243dSDimitry Andric   return std::string(path);
2331bdd1243dSDimitry Andric }
2332bdd1243dSDimitry Andric 
extract(InputFile & file,StringRef reason)233304eeddc0SDimitry Andric void macho::extract(InputFile &file, StringRef reason) {
2334bdd1243dSDimitry Andric   if (!file.lazy)
2335bdd1243dSDimitry Andric     return;
233604eeddc0SDimitry Andric   file.lazy = false;
2337bdd1243dSDimitry Andric 
233804eeddc0SDimitry Andric   printArchiveMemberLoad(reason, &file);
233904eeddc0SDimitry Andric   if (auto *bitcode = dyn_cast<BitcodeFile>(&file)) {
234004eeddc0SDimitry Andric     bitcode->parse();
234104eeddc0SDimitry Andric   } else {
234204eeddc0SDimitry Andric     auto &f = cast<ObjFile>(file);
234304eeddc0SDimitry Andric     if (target->wordSize == 8)
234404eeddc0SDimitry Andric       f.parse<LP64>();
234504eeddc0SDimitry Andric     else
234604eeddc0SDimitry Andric       f.parse<ILP32>();
234704eeddc0SDimitry Andric   }
234804eeddc0SDimitry Andric }
234904eeddc0SDimitry Andric 
2350fe6060f1SDimitry Andric template void ObjFile::parse<LP64>();
2351