1 //===- lib/ReaderWriter/MachO/File.h ----------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLD_READER_WRITER_MACHO_FILE_H
10 #define LLD_READER_WRITER_MACHO_FILE_H
11 
12 #include "Atoms.h"
13 #include "DebugInfo.h"
14 #include "MachONormalizedFile.h"
15 #include "lld/Core/SharedLibraryFile.h"
16 #include "lld/Core/Simple.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/StringMap.h"
19 #include "llvm/Support/Format.h"
20 #include "llvm/TextAPI/MachO/InterfaceFile.h"
21 #include "llvm/TextAPI/MachO/TextAPIReader.h"
22 #include <unordered_map>
23 
24 namespace lld {
25 namespace mach_o {
26 
27 using lld::mach_o::normalized::Section;
28 
29 class MachOFile : public SimpleFile {
30 public:
31 
32   /// Real file constructor - for on-disk files.
MachOFile(std::unique_ptr<MemoryBuffer> mb,MachOLinkingContext * ctx)33   MachOFile(std::unique_ptr<MemoryBuffer> mb, MachOLinkingContext *ctx)
34     : SimpleFile(mb->getBufferIdentifier(), File::kindMachObject),
35       _mb(std::move(mb)), _ctx(ctx) {}
36 
37   /// Dummy file constructor - for virtual files.
MachOFile(StringRef path)38   MachOFile(StringRef path)
39     : SimpleFile(path, File::kindMachObject) {}
40 
addDefinedAtom(StringRef name,Atom::Scope scope,DefinedAtom::ContentType type,DefinedAtom::Merge merge,uint64_t sectionOffset,uint64_t contentSize,bool thumb,bool noDeadStrip,bool copyRefs,const Section * inSection)41   void addDefinedAtom(StringRef name, Atom::Scope scope,
42                       DefinedAtom::ContentType type, DefinedAtom::Merge merge,
43                       uint64_t sectionOffset, uint64_t contentSize, bool thumb,
44                       bool noDeadStrip, bool copyRefs,
45                       const Section *inSection) {
46     assert(sectionOffset+contentSize <= inSection->content.size());
47     ArrayRef<uint8_t> content = inSection->content.slice(sectionOffset,
48                                                         contentSize);
49     if (copyRefs) {
50       // Make a copy of the atom's name and content that is owned by this file.
51       name = name.copy(allocator());
52       content = content.copy(allocator());
53     }
54     DefinedAtom::Alignment align(
55         inSection->alignment,
56         sectionOffset % inSection->alignment);
57     auto *atom =
58         new (allocator()) MachODefinedAtom(*this, name, scope, type, merge,
59                                            thumb, noDeadStrip, content, align);
60     addAtomForSection(inSection, atom, sectionOffset);
61   }
62 
addDefinedAtomInCustomSection(StringRef name,Atom::Scope scope,DefinedAtom::ContentType type,DefinedAtom::Merge merge,bool thumb,bool noDeadStrip,uint64_t sectionOffset,uint64_t contentSize,StringRef sectionName,bool copyRefs,const Section * inSection)63   void addDefinedAtomInCustomSection(StringRef name, Atom::Scope scope,
64                       DefinedAtom::ContentType type, DefinedAtom::Merge merge,
65                       bool thumb, bool noDeadStrip, uint64_t sectionOffset,
66                       uint64_t contentSize, StringRef sectionName,
67                       bool copyRefs, const Section *inSection) {
68     assert(sectionOffset+contentSize <= inSection->content.size());
69     ArrayRef<uint8_t> content = inSection->content.slice(sectionOffset,
70                                                         contentSize);
71    if (copyRefs) {
72       // Make a copy of the atom's name and content that is owned by this file.
73       name = name.copy(allocator());
74       content = content.copy(allocator());
75       sectionName = sectionName.copy(allocator());
76     }
77     DefinedAtom::Alignment align(
78         inSection->alignment,
79         sectionOffset % inSection->alignment);
80     auto *atom =
81         new (allocator()) MachODefinedCustomSectionAtom(*this, name, scope, type,
82                                                         merge, thumb,
83                                                         noDeadStrip, content,
84                                                         sectionName, align);
85     addAtomForSection(inSection, atom, sectionOffset);
86   }
87 
addZeroFillDefinedAtom(StringRef name,Atom::Scope scope,uint64_t sectionOffset,uint64_t size,bool noDeadStrip,bool copyRefs,const Section * inSection)88   void addZeroFillDefinedAtom(StringRef name, Atom::Scope scope,
89                               uint64_t sectionOffset, uint64_t size,
90                               bool noDeadStrip, bool copyRefs,
91                               const Section *inSection) {
92     if (copyRefs) {
93       // Make a copy of the atom's name and content that is owned by this file.
94       name = name.copy(allocator());
95     }
96     DefinedAtom::Alignment align(
97         inSection->alignment,
98         sectionOffset % inSection->alignment);
99 
100     DefinedAtom::ContentType type = DefinedAtom::typeUnknown;
101     switch (inSection->type) {
102     case llvm::MachO::S_ZEROFILL:
103       type = DefinedAtom::typeZeroFill;
104       break;
105     case llvm::MachO::S_THREAD_LOCAL_ZEROFILL:
106       type = DefinedAtom::typeTLVInitialZeroFill;
107       break;
108     default:
109       llvm_unreachable("Unrecognized zero-fill section");
110     }
111 
112     auto *atom =
113         new (allocator()) MachODefinedAtom(*this, name, scope, type, size,
114                                            noDeadStrip, align);
115     addAtomForSection(inSection, atom, sectionOffset);
116   }
117 
addUndefinedAtom(StringRef name,bool copyRefs)118   void addUndefinedAtom(StringRef name, bool copyRefs) {
119     if (copyRefs) {
120       // Make a copy of the atom's name that is owned by this file.
121       name = name.copy(allocator());
122     }
123     auto *atom = new (allocator()) SimpleUndefinedAtom(*this, name);
124     addAtom(*atom);
125     _undefAtoms[name] = atom;
126   }
127 
addTentativeDefAtom(StringRef name,Atom::Scope scope,uint64_t size,DefinedAtom::Alignment align,bool copyRefs)128   void addTentativeDefAtom(StringRef name, Atom::Scope scope, uint64_t size,
129                            DefinedAtom::Alignment align, bool copyRefs) {
130     if (copyRefs) {
131       // Make a copy of the atom's name that is owned by this file.
132       name = name.copy(allocator());
133     }
134     auto *atom =
135         new (allocator()) MachOTentativeDefAtom(*this, name, scope, size, align);
136     addAtom(*atom);
137     _undefAtoms[name] = atom;
138   }
139 
140   /// Search this file for the atom from 'section' that covers
141   /// 'offsetInSect'.  Returns nullptr is no atom found.
142   MachODefinedAtom *findAtomCoveringAddress(const Section &section,
143                                             uint64_t offsetInSect,
144                                             uint32_t *foundOffsetAtom=nullptr) {
145     const auto &pos = _sectionAtoms.find(&section);
146     if (pos == _sectionAtoms.end())
147       return nullptr;
148     const auto &vec = pos->second;
149     assert(offsetInSect < section.content.size());
150     // Vector of atoms for section are already sorted, so do binary search.
151     const auto &atomPos = std::lower_bound(vec.begin(), vec.end(), offsetInSect,
152         [offsetInSect](const SectionOffsetAndAtom &ao,
153                        uint64_t targetAddr) -> bool {
154           // Each atom has a start offset of its slice of the
155           // section's content. This compare function must return true
156           // iff the atom's range is before the offset being searched for.
157           uint64_t atomsEndOffset = ao.offset+ao.atom->rawContent().size();
158           return (atomsEndOffset <= offsetInSect);
159         });
160     if (atomPos == vec.end())
161       return nullptr;
162     if (foundOffsetAtom)
163       *foundOffsetAtom = offsetInSect - atomPos->offset;
164     return atomPos->atom;
165   }
166 
167   /// Searches this file for an UndefinedAtom named 'name'. Returns
168   /// nullptr is no such atom found.
findUndefAtom(StringRef name)169   const lld::Atom *findUndefAtom(StringRef name) {
170     auto pos = _undefAtoms.find(name);
171     if (pos == _undefAtoms.end())
172       return nullptr;
173     return pos->second;
174   }
175 
176   typedef std::function<void (MachODefinedAtom* atom)> DefinedAtomVisitor;
177 
eachDefinedAtom(DefinedAtomVisitor vistor)178   void eachDefinedAtom(DefinedAtomVisitor vistor) {
179     for (auto &sectAndAtoms : _sectionAtoms) {
180       for (auto &offAndAtom : sectAndAtoms.second) {
181         vistor(offAndAtom.atom);
182       }
183     }
184   }
185 
186   typedef std::function<void(MachODefinedAtom *atom, uint64_t offset)>
187       SectionAtomVisitor;
188 
eachAtomInSection(const Section & section,SectionAtomVisitor visitor)189   void eachAtomInSection(const Section &section, SectionAtomVisitor visitor) {
190     auto pos = _sectionAtoms.find(&section);
191     if (pos == _sectionAtoms.end())
192       return;
193     auto vec = pos->second;
194 
195     for (auto &offAndAtom : vec)
196       visitor(offAndAtom.atom, offAndAtom.offset);
197   }
198 
arch()199   MachOLinkingContext::Arch arch() const { return _arch; }
setArch(MachOLinkingContext::Arch arch)200   void setArch(MachOLinkingContext::Arch arch) { _arch = arch; }
201 
OS()202   MachOLinkingContext::OS OS() const { return _os; }
setOS(MachOLinkingContext::OS os)203   void setOS(MachOLinkingContext::OS os) { _os = os; }
204 
objcConstraint()205   MachOLinkingContext::ObjCConstraint objcConstraint() const {
206     return _objcConstraint;
207   }
setObjcConstraint(MachOLinkingContext::ObjCConstraint v)208   void setObjcConstraint(MachOLinkingContext::ObjCConstraint v) {
209     _objcConstraint = v;
210   }
211 
minVersion()212   uint32_t minVersion() const { return _minVersion; }
setMinVersion(uint32_t v)213   void setMinVersion(uint32_t v) { _minVersion = v; }
214 
minVersionLoadCommandKind()215   LoadCommandType minVersionLoadCommandKind() const {
216     return _minVersionLoadCommandKind;
217   }
setMinVersionLoadCommandKind(LoadCommandType v)218   void setMinVersionLoadCommandKind(LoadCommandType v) {
219     _minVersionLoadCommandKind = v;
220   }
221 
swiftVersion()222   uint32_t swiftVersion() const { return _swiftVersion; }
setSwiftVersion(uint32_t v)223   void setSwiftVersion(uint32_t v) { _swiftVersion = v; }
224 
subsectionsViaSymbols()225   bool subsectionsViaSymbols() const {
226     return _flags & llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS;
227   }
setFlags(normalized::FileFlags v)228   void setFlags(normalized::FileFlags v) { _flags = v; }
229 
230   /// Methods for support type inquiry through isa, cast, and dyn_cast:
classof(const File * F)231   static inline bool classof(const File *F) {
232     return F->kind() == File::kindMachObject;
233   }
234 
setDebugInfo(std::unique_ptr<DebugInfo> debugInfo)235   void setDebugInfo(std::unique_ptr<DebugInfo> debugInfo) {
236     _debugInfo = std::move(debugInfo);
237   }
238 
debugInfo()239   DebugInfo* debugInfo() const { return _debugInfo.get(); }
takeDebugInfo()240   std::unique_ptr<DebugInfo> takeDebugInfo() { return std::move(_debugInfo); }
241 
242 protected:
doParse()243   std::error_code doParse() override {
244     // Convert binary file to normalized mach-o.
245     auto normFile = normalized::readBinary(_mb, _ctx->arch());
246     if (auto ec = normFile.takeError())
247       return llvm::errorToErrorCode(std::move(ec));
248     // Convert normalized mach-o to atoms.
249     if (auto ec = normalized::normalizedObjectToAtoms(this, **normFile, false))
250       return llvm::errorToErrorCode(std::move(ec));
251     return std::error_code();
252   }
253 
254 private:
255   struct SectionOffsetAndAtom { uint64_t offset;  MachODefinedAtom *atom; };
256 
addAtomForSection(const Section * inSection,MachODefinedAtom * atom,uint64_t sectionOffset)257   void addAtomForSection(const Section *inSection, MachODefinedAtom* atom,
258                          uint64_t sectionOffset) {
259     SectionOffsetAndAtom offAndAtom;
260     offAndAtom.offset = sectionOffset;
261     offAndAtom.atom   = atom;
262      _sectionAtoms[inSection].push_back(offAndAtom);
263     addAtom(*atom);
264   }
265 
266   typedef llvm::DenseMap<const normalized::Section *,
267                          std::vector<SectionOffsetAndAtom>>  SectionToAtoms;
268   typedef llvm::StringMap<const lld::Atom *> NameToAtom;
269 
270   std::unique_ptr<MemoryBuffer> _mb;
271   MachOLinkingContext          *_ctx;
272   SectionToAtoms                _sectionAtoms;
273   NameToAtom                     _undefAtoms;
274   MachOLinkingContext::Arch      _arch = MachOLinkingContext::arch_unknown;
275   MachOLinkingContext::OS        _os = MachOLinkingContext::OS::unknown;
276   uint32_t                       _minVersion = 0;
277   LoadCommandType               _minVersionLoadCommandKind = (LoadCommandType)0;
278   MachOLinkingContext::ObjCConstraint _objcConstraint =
279       MachOLinkingContext::objc_unknown;
280   uint32_t                       _swiftVersion = 0;
281   normalized::FileFlags        _flags = llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS;
282   std::unique_ptr<DebugInfo>   _debugInfo;
283 };
284 
285 class MachODylibFile : public SharedLibraryFile {
286 public:
MachODylibFile(std::unique_ptr<MemoryBuffer> mb,MachOLinkingContext * ctx)287   MachODylibFile(std::unique_ptr<MemoryBuffer> mb, MachOLinkingContext *ctx)
288       : SharedLibraryFile(mb->getBufferIdentifier()),
289         _mb(std::move(mb)), _ctx(ctx) {}
290 
MachODylibFile(StringRef path)291   MachODylibFile(StringRef path) : SharedLibraryFile(path) {}
292 
exports(StringRef name)293   OwningAtomPtr<SharedLibraryAtom> exports(StringRef name) const override {
294     // Pass down _installName so that if this requested symbol
295     // is re-exported through this dylib, the SharedLibraryAtom's loadName()
296     // is this dylib installName and not the implementation dylib's.
297     // NOTE: isData is not needed for dylibs (it matters for static libs).
298     return exports(name, _installName);
299   }
300 
301   /// Adds symbol name that this dylib exports. The corresponding
302   /// SharedLibraryAtom is created lazily (since most symbols are not used).
addExportedSymbol(StringRef name,bool weakDef,bool copyRefs)303   void addExportedSymbol(StringRef name, bool weakDef, bool copyRefs) {
304     if (copyRefs) {
305       name = name.copy(allocator());
306     }
307     AtomAndFlags info(weakDef);
308     _nameToAtom[name] = info;
309   }
310 
addReExportedDylib(StringRef dylibPath)311   void addReExportedDylib(StringRef dylibPath) {
312     _reExportedDylibs.emplace_back(dylibPath);
313   }
314 
installName()315   StringRef installName() const { return _installName; }
currentVersion()316   uint32_t currentVersion() { return _currentVersion; }
compatVersion()317   uint32_t compatVersion() { return _compatVersion; }
318 
setInstallName(StringRef name)319   void setInstallName(StringRef name) { _installName = name; }
setCompatVersion(uint32_t version)320   void setCompatVersion(uint32_t version) { _compatVersion = version; }
setCurrentVersion(uint32_t version)321   void setCurrentVersion(uint32_t version) { _currentVersion = version; }
322 
323   typedef std::function<MachODylibFile *(StringRef)> FindDylib;
324 
loadReExportedDylibs(FindDylib find)325   void loadReExportedDylibs(FindDylib find) {
326     for (ReExportedDylib &entry : _reExportedDylibs) {
327       if (!entry.file)
328         entry.file = find(entry.path);
329     }
330   }
331 
getDSOName()332   StringRef getDSOName() const override { return _installName; }
333 
doParse()334   std::error_code doParse() override {
335     // Convert binary file to normalized mach-o.
336     auto normFile = normalized::readBinary(_mb, _ctx->arch());
337     if (auto ec = normFile.takeError())
338       return llvm::errorToErrorCode(std::move(ec));
339     // Convert normalized mach-o to atoms.
340     if (auto ec = normalized::normalizedDylibToAtoms(this, **normFile, false))
341       return llvm::errorToErrorCode(std::move(ec));
342     return std::error_code();
343   }
344 
345 protected:
exports(StringRef name,StringRef installName)346   OwningAtomPtr<SharedLibraryAtom> exports(StringRef name,
347                                    StringRef installName) const {
348     // First, check if requested symbol is directly implemented by this dylib.
349     auto entry = _nameToAtom.find(name);
350     if (entry != _nameToAtom.end()) {
351       // FIXME: Make this map a set and only used in assert builds.
352       // Note, its safe to assert here as the resolver is the only client of
353       // this API and it only requests exports for undefined symbols.
354       // If we return from here we are no longer undefined so we should never
355       // get here again.
356       assert(!entry->second.atom && "Duplicate shared library export");
357       bool weakDef = entry->second.weakDef;
358       auto *atom = new (allocator()) MachOSharedLibraryAtom(*this, name,
359                                                             installName,
360                                                             weakDef);
361       entry->second.atom = atom;
362       return atom;
363     }
364 
365     // Next, check if symbol is implemented in some re-exported dylib.
366     for (const ReExportedDylib &dylib : _reExportedDylibs) {
367       assert(dylib.file);
368       auto atom = dylib.file->exports(name, installName);
369       if (atom.get())
370         return atom;
371     }
372 
373     // Symbol not exported or re-exported by this dylib.
374     return nullptr;
375   }
376 
377   struct ReExportedDylib {
ReExportedDylibReExportedDylib378     ReExportedDylib(StringRef p) : path(p), file(nullptr) { }
ReExportedDylibReExportedDylib379     ReExportedDylib(StringRef p, MachODylibFile *file) : path(p), file(file) { }
380     StringRef       path;
381     MachODylibFile *file;
382   };
383 
384   struct AtomAndFlags {
AtomAndFlagsAtomAndFlags385     AtomAndFlags() : atom(nullptr), weakDef(false) { }
AtomAndFlagsAtomAndFlags386     AtomAndFlags(bool weak) : atom(nullptr), weakDef(weak) { }
387     const SharedLibraryAtom  *atom;
388     bool                      weakDef;
389   };
390 
391   std::unique_ptr<MemoryBuffer>              _mb;
392   MachOLinkingContext                       *_ctx;
393   StringRef                                  _installName;
394   uint32_t                                   _currentVersion;
395   uint32_t                                   _compatVersion;
396   std::vector<ReExportedDylib>               _reExportedDylibs;
397   mutable std::unordered_map<StringRef, AtomAndFlags> _nameToAtom;
398 };
399 
400 class TAPIFile : public MachODylibFile {
401 public:
402 
TAPIFile(std::unique_ptr<MemoryBuffer> mb,MachOLinkingContext * ctx)403   TAPIFile(std::unique_ptr<MemoryBuffer> mb, MachOLinkingContext *ctx)
404       : MachODylibFile(std::move(mb), ctx) {}
405 
doParse()406   std::error_code doParse() override {
407 
408     llvm::Expected<std::unique_ptr<llvm::MachO::InterfaceFile>> result =
409         llvm::MachO::TextAPIReader::get(*_mb);
410     if (!result)
411       return std::make_error_code(std::errc::invalid_argument);
412 
413     std::unique_ptr<llvm::MachO::InterfaceFile> interface{std::move(*result)};
414     return loadFromInterface(*interface);
415   }
416 
417 private:
loadFromInterface(llvm::MachO::InterfaceFile & interface)418   std::error_code loadFromInterface(llvm::MachO::InterfaceFile &interface) {
419     llvm::MachO::Architecture arch;
420     switch(_ctx->arch()) {
421     case MachOLinkingContext::arch_x86:
422       arch = llvm::MachO::AK_i386;
423       break;
424     case MachOLinkingContext::arch_x86_64:
425       arch = llvm::MachO::AK_x86_64;
426       break;
427     case MachOLinkingContext::arch_arm64:
428       arch = llvm::MachO::AK_arm64;
429       break;
430     default:
431       return std::make_error_code(std::errc::invalid_argument);
432     }
433 
434     setInstallName(interface.getInstallName().copy(allocator()));
435     // TODO(compnerd) filter out symbols based on the target platform
436     for (const auto symbol : interface.symbols())
437       if (symbol->getArchitectures().has(arch))
438         addExportedSymbol(symbol->getName(), symbol->isWeakDefined(), true);
439 
440     for (const llvm::MachO::InterfaceFileRef &reexport :
441          interface.reexportedLibraries())
442       addReExportedDylib(reexport.getInstallName().copy(allocator()));
443 
444     for (const auto& document : interface.documents()) {
445       for (auto& reexport : _reExportedDylibs) {
446         if (reexport.path != document->getInstallName())
447           continue;
448         assert(!reexport.file);
449         _ownedFiles.push_back(std::make_unique<TAPIFile>(
450             MemoryBuffer::getMemBuffer("", _mb->getBufferIdentifier()), _ctx));
451         reexport.file = _ownedFiles.back().get();
452         std::error_code err = _ownedFiles.back()->loadFromInterface(*document);
453         if (err)
454           return err;
455       }
456     }
457 
458     return std::error_code();
459   }
460 
461   std::vector<std::unique_ptr<TAPIFile>> _ownedFiles;
462 };
463 
464 } // end namespace mach_o
465 } // end namespace lld
466 
467 #endif // LLD_READER_WRITER_MACHO_FILE_H
468