1 //=--------- MachOLinkGraphBuilder.cpp - MachO LinkGraph builder ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Generic MachO LinkGraph buliding code.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "MachOLinkGraphBuilder.h"
14 
15 #define DEBUG_TYPE "jitlink"
16 
17 static const char *CommonSectionName = "__common";
18 
19 namespace llvm {
20 namespace jitlink {
21 
22 MachOLinkGraphBuilder::~MachOLinkGraphBuilder() {}
23 
24 Expected<std::unique_ptr<LinkGraph>> MachOLinkGraphBuilder::buildGraph() {
25 
26   // Sanity check: we only operate on relocatable objects.
27   if (!Obj.isRelocatableObject())
28     return make_error<JITLinkError>("Object is not a relocatable MachO");
29 
30   if (auto Err = createNormalizedSections())
31     return std::move(Err);
32 
33   if (auto Err = createNormalizedSymbols())
34     return std::move(Err);
35 
36   if (auto Err = graphifyRegularSymbols())
37     return std::move(Err);
38 
39   if (auto Err = graphifySectionsWithCustomParsers())
40     return std::move(Err);
41 
42   if (auto Err = addRelocations())
43     return std::move(Err);
44 
45   return std::move(G);
46 }
47 
48 MachOLinkGraphBuilder::MachOLinkGraphBuilder(const object::MachOObjectFile &Obj)
49     : Obj(Obj),
50       G(std::make_unique<LinkGraph>(std::string(Obj.getFileName()),
51                                     getPointerSize(Obj), getEndianness(Obj))) {}
52 
53 void MachOLinkGraphBuilder::addCustomSectionParser(
54     StringRef SectionName, SectionParserFunction Parser) {
55   assert(!CustomSectionParserFunctions.count(SectionName) &&
56          "Custom parser for this section already exists");
57   CustomSectionParserFunctions[SectionName] = std::move(Parser);
58 }
59 
60 Linkage MachOLinkGraphBuilder::getLinkage(uint16_t Desc) {
61   if ((Desc & MachO::N_WEAK_DEF) || (Desc & MachO::N_WEAK_REF))
62     return Linkage::Weak;
63   return Linkage::Strong;
64 }
65 
66 Scope MachOLinkGraphBuilder::getScope(StringRef Name, uint8_t Type) {
67   if (Type & MachO::N_PEXT)
68     return Scope::Hidden;
69   if (Type & MachO::N_EXT) {
70     if (Name.startswith("l"))
71       return Scope::Hidden;
72     else
73       return Scope::Default;
74   }
75   return Scope::Local;
76 }
77 
78 bool MachOLinkGraphBuilder::isAltEntry(const NormalizedSymbol &NSym) {
79   return NSym.Desc & MachO::N_ALT_ENTRY;
80 }
81 
82 bool MachOLinkGraphBuilder::isDebugSection(const NormalizedSection &NSec) {
83   return (NSec.Flags & MachO::S_ATTR_DEBUG &&
84           strcmp(NSec.SegName, "__DWARF") == 0);
85 }
86 
87 unsigned
88 MachOLinkGraphBuilder::getPointerSize(const object::MachOObjectFile &Obj) {
89   return Obj.is64Bit() ? 8 : 4;
90 }
91 
92 support::endianness
93 MachOLinkGraphBuilder::getEndianness(const object::MachOObjectFile &Obj) {
94   return Obj.isLittleEndian() ? support::little : support::big;
95 }
96 
97 Section &MachOLinkGraphBuilder::getCommonSection() {
98   if (!CommonSection) {
99     auto Prot = static_cast<sys::Memory::ProtectionFlags>(
100         sys::Memory::MF_READ | sys::Memory::MF_WRITE);
101     CommonSection = &G->createSection(CommonSectionName, Prot);
102   }
103   return *CommonSection;
104 }
105 
106 Error MachOLinkGraphBuilder::createNormalizedSections() {
107   // Build normalized sections. Verifies that section data is in-range (for
108   // sections with content) and that address ranges are non-overlapping.
109 
110   LLVM_DEBUG(dbgs() << "Creating normalized sections...\n");
111 
112   for (auto &SecRef : Obj.sections()) {
113     NormalizedSection NSec;
114     uint32_t DataOffset = 0;
115 
116     auto SecIndex = Obj.getSectionIndex(SecRef.getRawDataRefImpl());
117 
118     auto Name = SecRef.getName();
119     if (!Name)
120       return Name.takeError();
121 
122     if (Obj.is64Bit()) {
123       const MachO::section_64 &Sec64 =
124           Obj.getSection64(SecRef.getRawDataRefImpl());
125 
126       memcpy(&NSec.SectName, &Sec64.sectname, 16);
127       NSec.SectName[16] = '\0';
128       memcpy(&NSec.SegName, Sec64.segname, 16);
129       NSec.SegName[16] = '\0';
130 
131       NSec.Address = Sec64.addr;
132       NSec.Size = Sec64.size;
133       NSec.Alignment = 1ULL << Sec64.align;
134       NSec.Flags = Sec64.flags;
135       DataOffset = Sec64.offset;
136     } else {
137       const MachO::section &Sec32 = Obj.getSection(SecRef.getRawDataRefImpl());
138 
139       memcpy(&NSec.SectName, &Sec32.sectname, 16);
140       NSec.SectName[16] = '\0';
141       memcpy(&NSec.SegName, Sec32.segname, 16);
142       NSec.SegName[16] = '\0';
143 
144       NSec.Address = Sec32.addr;
145       NSec.Size = Sec32.size;
146       NSec.Alignment = 1ULL << Sec32.align;
147       NSec.Flags = Sec32.flags;
148       DataOffset = Sec32.offset;
149     }
150 
151     LLVM_DEBUG({
152       dbgs() << "  " << *Name << ": " << formatv("{0:x16}", NSec.Address)
153              << " -- " << formatv("{0:x16}", NSec.Address + NSec.Size)
154              << ", align: " << NSec.Alignment << ", index: " << SecIndex
155              << "\n";
156     });
157 
158     // Get the section data if any.
159     {
160       unsigned SectionType = NSec.Flags & MachO::SECTION_TYPE;
161       if (SectionType != MachO::S_ZEROFILL &&
162           SectionType != MachO::S_GB_ZEROFILL) {
163 
164         if (DataOffset + NSec.Size > Obj.getData().size())
165           return make_error<JITLinkError>(
166               "Section data extends past end of file");
167 
168         NSec.Data = Obj.getData().data() + DataOffset;
169       }
170     }
171 
172     // Get prot flags.
173     // FIXME: Make sure this test is correct (it's probably missing cases
174     // as-is).
175     sys::Memory::ProtectionFlags Prot;
176     if (NSec.Flags & MachO::S_ATTR_PURE_INSTRUCTIONS)
177       Prot = static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
178                                                        sys::Memory::MF_EXEC);
179     else
180       Prot = static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
181                                                        sys::Memory::MF_WRITE);
182 
183     if (!isDebugSection(NSec))
184       NSec.GraphSection = &G->createSection(*Name, Prot);
185     else
186       LLVM_DEBUG({
187         dbgs() << "    " << *Name
188                << " is a debug section: No graph section will be created.\n";
189       });
190 
191     IndexToSection.insert(std::make_pair(SecIndex, std::move(NSec)));
192   }
193 
194   std::vector<NormalizedSection *> Sections;
195   Sections.reserve(IndexToSection.size());
196   for (auto &KV : IndexToSection)
197     Sections.push_back(&KV.second);
198 
199   // If we didn't end up creating any sections then bail out. The code below
200   // assumes that we have at least one section.
201   if (Sections.empty())
202     return Error::success();
203 
204   llvm::sort(Sections,
205              [](const NormalizedSection *LHS, const NormalizedSection *RHS) {
206                assert(LHS && RHS && "Null section?");
207                if (LHS->Address != RHS->Address)
208                  return LHS->Address < RHS->Address;
209                return LHS->Size < RHS->Size;
210              });
211 
212   for (unsigned I = 0, E = Sections.size() - 1; I != E; ++I) {
213     auto &Cur = *Sections[I];
214     auto &Next = *Sections[I + 1];
215     if (Next.Address < Cur.Address + Cur.Size)
216       return make_error<JITLinkError>(
217           "Address range for section " +
218           formatv("\"{0}/{1}\" [ {2:x16} -- {3:x16} ] ", Cur.SegName,
219                   Cur.SectName, Cur.Address, Cur.Address + Cur.Size) +
220           "overlaps section \"" + Next.SegName + "/" + Next.SectName + "\"" +
221           formatv("\"{0}/{1}\" [ {2:x16} -- {3:x16} ] ", Next.SegName,
222                   Next.SectName, Next.Address, Next.Address + Next.Size));
223   }
224 
225   return Error::success();
226 }
227 
228 Error MachOLinkGraphBuilder::createNormalizedSymbols() {
229   LLVM_DEBUG(dbgs() << "Creating normalized symbols...\n");
230 
231   for (auto &SymRef : Obj.symbols()) {
232 
233     unsigned SymbolIndex = Obj.getSymbolIndex(SymRef.getRawDataRefImpl());
234     uint64_t Value;
235     uint32_t NStrX;
236     uint8_t Type;
237     uint8_t Sect;
238     uint16_t Desc;
239 
240     if (Obj.is64Bit()) {
241       const MachO::nlist_64 &NL64 =
242           Obj.getSymbol64TableEntry(SymRef.getRawDataRefImpl());
243       Value = NL64.n_value;
244       NStrX = NL64.n_strx;
245       Type = NL64.n_type;
246       Sect = NL64.n_sect;
247       Desc = NL64.n_desc;
248     } else {
249       const MachO::nlist &NL32 =
250           Obj.getSymbolTableEntry(SymRef.getRawDataRefImpl());
251       Value = NL32.n_value;
252       NStrX = NL32.n_strx;
253       Type = NL32.n_type;
254       Sect = NL32.n_sect;
255       Desc = NL32.n_desc;
256     }
257 
258     // Skip stabs.
259     // FIXME: Are there other symbols we should be skipping?
260     if (Type & MachO::N_STAB)
261       continue;
262 
263     Optional<StringRef> Name;
264     if (NStrX) {
265       if (auto NameOrErr = SymRef.getName())
266         Name = *NameOrErr;
267       else
268         return NameOrErr.takeError();
269     }
270 
271     LLVM_DEBUG({
272       dbgs() << "  ";
273       if (!Name)
274         dbgs() << "<anonymous symbol>";
275       else
276         dbgs() << *Name;
277       dbgs() << ": value = " << formatv("{0:x16}", Value)
278              << ", type = " << formatv("{0:x2}", Type)
279              << ", desc = " << formatv("{0:x4}", Desc) << ", sect = ";
280       if (Sect)
281         dbgs() << static_cast<unsigned>(Sect - 1);
282       else
283         dbgs() << "none";
284       dbgs() << "\n";
285     });
286 
287     // If this symbol has a section, sanity check that the addresses line up.
288     if (Sect != 0) {
289       auto NSec = findSectionByIndex(Sect - 1);
290       if (!NSec)
291         return NSec.takeError();
292 
293       if (Value < NSec->Address || Value > NSec->Address + NSec->Size)
294         return make_error<JITLinkError>("Symbol address does not fall within "
295                                         "section");
296 
297       if (!NSec->GraphSection) {
298         LLVM_DEBUG({
299           dbgs() << "  Skipping: Symbol is in section " << NSec->SegName << "/"
300                  << NSec->SectName
301                  << " which has no associated graph section.\n";
302         });
303         continue;
304       }
305     }
306 
307     IndexToSymbol[SymbolIndex] =
308         &createNormalizedSymbol(*Name, Value, Type, Sect, Desc,
309                                 getLinkage(Desc), getScope(*Name, Type));
310   }
311 
312   return Error::success();
313 }
314 
315 void MachOLinkGraphBuilder::addSectionStartSymAndBlock(
316     Section &GraphSec, uint64_t Address, const char *Data, uint64_t Size,
317     uint32_t Alignment, bool IsLive) {
318   Block &B =
319       Data ? G->createContentBlock(GraphSec, StringRef(Data, Size), Address,
320                                    Alignment, 0)
321            : G->createZeroFillBlock(GraphSec, Size, Address, Alignment, 0);
322   auto &Sym = G->addAnonymousSymbol(B, 0, Size, false, IsLive);
323   assert(!AddrToCanonicalSymbol.count(Sym.getAddress()) &&
324          "Anonymous block start symbol clashes with existing symbol address");
325   AddrToCanonicalSymbol[Sym.getAddress()] = &Sym;
326 }
327 
328 Error MachOLinkGraphBuilder::graphifyRegularSymbols() {
329 
330   LLVM_DEBUG(dbgs() << "Creating graph symbols...\n");
331 
332   /// We only have 256 section indexes: Use a vector rather than a map.
333   std::vector<std::vector<NormalizedSymbol *>> SecIndexToSymbols;
334   SecIndexToSymbols.resize(256);
335 
336   // Create commons, externs, and absolutes, and partition all other symbols by
337   // section.
338   for (auto &KV : IndexToSymbol) {
339     auto &NSym = *KV.second;
340 
341     switch (NSym.Type & MachO::N_TYPE) {
342     case MachO::N_UNDF:
343       if (NSym.Value) {
344         if (!NSym.Name)
345           return make_error<JITLinkError>("Anonymous common symbol at index " +
346                                           Twine(KV.first));
347         NSym.GraphSymbol = &G->addCommonSymbol(
348             *NSym.Name, NSym.S, getCommonSection(), 0, NSym.Value,
349             1ull << MachO::GET_COMM_ALIGN(NSym.Desc),
350             NSym.Desc & MachO::N_NO_DEAD_STRIP);
351       } else {
352         if (!NSym.Name)
353           return make_error<JITLinkError>("Anonymous external symbol at "
354                                           "index " +
355                                           Twine(KV.first));
356         NSym.GraphSymbol = &G->addExternalSymbol(
357             *NSym.Name, 0,
358             NSym.Desc & MachO::N_WEAK_REF ? Linkage::Weak : Linkage::Strong);
359       }
360       break;
361     case MachO::N_ABS:
362       if (!NSym.Name)
363         return make_error<JITLinkError>("Anonymous absolute symbol at index " +
364                                         Twine(KV.first));
365       NSym.GraphSymbol = &G->addAbsoluteSymbol(
366           *NSym.Name, NSym.Value, 0, Linkage::Strong, Scope::Default,
367           NSym.Desc & MachO::N_NO_DEAD_STRIP);
368       break;
369     case MachO::N_SECT:
370       SecIndexToSymbols[NSym.Sect - 1].push_back(&NSym);
371       break;
372     case MachO::N_PBUD:
373       return make_error<JITLinkError>(
374           "Unupported N_PBUD symbol " +
375           (NSym.Name ? ("\"" + *NSym.Name + "\"") : Twine("<anon>")) +
376           " at index " + Twine(KV.first));
377     case MachO::N_INDR:
378       return make_error<JITLinkError>(
379           "Unupported N_INDR symbol " +
380           (NSym.Name ? ("\"" + *NSym.Name + "\"") : Twine("<anon>")) +
381           " at index " + Twine(KV.first));
382     default:
383       return make_error<JITLinkError>(
384           "Unrecognized symbol type " + Twine(NSym.Type & MachO::N_TYPE) +
385           " for symbol " +
386           (NSym.Name ? ("\"" + *NSym.Name + "\"") : Twine("<anon>")) +
387           " at index " + Twine(KV.first));
388     }
389   }
390 
391   // Loop over sections performing regular graphification for those that
392   // don't have custom parsers.
393   for (auto &KV : IndexToSection) {
394     auto SecIndex = KV.first;
395     auto &NSec = KV.second;
396 
397     if (!NSec.GraphSection) {
398       LLVM_DEBUG({
399         dbgs() << "  " << NSec.SegName << "/" << NSec.SectName
400                << " has no graph section. Skipping.\n";
401       });
402       continue;
403     }
404 
405     // Skip sections with custom parsers.
406     if (CustomSectionParserFunctions.count(NSec.GraphSection->getName())) {
407       LLVM_DEBUG({
408         dbgs() << "  Skipping section " << NSec.GraphSection->getName()
409                << " as it has a custom parser.\n";
410       });
411       continue;
412     } else
413       LLVM_DEBUG({
414         dbgs() << "  Processing section " << NSec.GraphSection->getName()
415                << "...\n";
416       });
417 
418     bool SectionIsNoDeadStrip = NSec.Flags & MachO::S_ATTR_NO_DEAD_STRIP;
419     bool SectionIsText = NSec.Flags & MachO::S_ATTR_PURE_INSTRUCTIONS;
420 
421     auto &SecNSymStack = SecIndexToSymbols[SecIndex];
422 
423     // If this section is non-empty but there are no symbols covering it then
424     // create one block and anonymous symbol to cover the entire section.
425     if (SecNSymStack.empty()) {
426       if (NSec.Size > 0) {
427         LLVM_DEBUG({
428           dbgs() << "    Section non-empty, but contains no symbols. "
429                     "Creating anonymous block to cover "
430                  << formatv("{0:x16}", NSec.Address) << " -- "
431                  << formatv("{0:x16}", NSec.Address + NSec.Size) << "\n";
432         });
433         addSectionStartSymAndBlock(*NSec.GraphSection, NSec.Address, NSec.Data,
434                                    NSec.Size, NSec.Alignment,
435                                    SectionIsNoDeadStrip);
436       } else
437         LLVM_DEBUG({
438           dbgs() << "    Section empty and contains no symbols. Skipping.\n";
439         });
440       continue;
441     }
442 
443     // Sort the symbol stack in by address, alt-entry status, scope, and name.
444     // We sort in reverse order so that symbols will be visited in the right
445     // order when we pop off the stack below.
446     llvm::sort(SecNSymStack, [](const NormalizedSymbol *LHS,
447                                 const NormalizedSymbol *RHS) {
448       if (LHS->Value != RHS->Value)
449         return LHS->Value > RHS->Value;
450       if (isAltEntry(*LHS) != isAltEntry(*RHS))
451         return isAltEntry(*RHS);
452       if (LHS->S != RHS->S)
453         return static_cast<uint8_t>(LHS->S) < static_cast<uint8_t>(RHS->S);
454       return LHS->Name < RHS->Name;
455     });
456 
457     // The first symbol in a section can not be an alt-entry symbol.
458     if (!SecNSymStack.empty() && isAltEntry(*SecNSymStack.back()))
459       return make_error<JITLinkError>(
460           "First symbol in " + NSec.GraphSection->getName() + " is alt-entry");
461 
462     // If the section is non-empty but there is no symbol covering the start
463     // address then add an anonymous one.
464     if (SecNSymStack.back()->Value != NSec.Address) {
465       auto AnonBlockSize = SecNSymStack.back()->Value - NSec.Address;
466       LLVM_DEBUG({
467         dbgs() << "    Section start not covered by symbol. "
468                << "Creating anonymous block to cover [ "
469                << formatv("{0:x16}", NSec.Address) << " -- "
470                << formatv("{0:x16}", NSec.Address + AnonBlockSize) << " ]\n";
471       });
472       addSectionStartSymAndBlock(*NSec.GraphSection, NSec.Address, NSec.Data,
473                                  AnonBlockSize, NSec.Alignment,
474                                  SectionIsNoDeadStrip);
475     }
476 
477     // Visit section symbols in order by popping off the reverse-sorted stack,
478     // building blocks for each alt-entry chain and creating symbols as we go.
479     while (!SecNSymStack.empty()) {
480       SmallVector<NormalizedSymbol *, 8> BlockSyms;
481 
482       BlockSyms.push_back(SecNSymStack.back());
483       SecNSymStack.pop_back();
484       while (!SecNSymStack.empty() &&
485              (isAltEntry(*SecNSymStack.back()) ||
486               SecNSymStack.back()->Value == BlockSyms.back()->Value)) {
487         BlockSyms.push_back(SecNSymStack.back());
488         SecNSymStack.pop_back();
489       }
490 
491       // BlockNSyms now contains the block symbols in reverse canonical order.
492       JITTargetAddress BlockStart = BlockSyms.front()->Value;
493       JITTargetAddress BlockEnd = SecNSymStack.empty()
494                                       ? NSec.Address + NSec.Size
495                                       : SecNSymStack.back()->Value;
496       JITTargetAddress BlockOffset = BlockStart - NSec.Address;
497       JITTargetAddress BlockSize = BlockEnd - BlockStart;
498 
499       LLVM_DEBUG({
500         dbgs() << "    Creating block for " << formatv("{0:x16}", BlockStart)
501                << " -- " << formatv("{0:x16}", BlockEnd) << ": "
502                << NSec.GraphSection->getName() << " + "
503                << formatv("{0:x16}", BlockOffset) << " with "
504                << BlockSyms.size() << " symbol(s)...\n";
505       });
506 
507       Block &B =
508           NSec.Data
509               ? G->createContentBlock(
510                     *NSec.GraphSection,
511                     StringRef(NSec.Data + BlockOffset, BlockSize), BlockStart,
512                     NSec.Alignment, BlockStart % NSec.Alignment)
513               : G->createZeroFillBlock(*NSec.GraphSection, BlockSize,
514                                        BlockStart, NSec.Alignment,
515                                        BlockStart % NSec.Alignment);
516 
517       Optional<JITTargetAddress> LastCanonicalAddr;
518       JITTargetAddress SymEnd = BlockEnd;
519       while (!BlockSyms.empty()) {
520         auto &NSym = *BlockSyms.back();
521         BlockSyms.pop_back();
522 
523         bool SymLive =
524             (NSym.Desc & MachO::N_NO_DEAD_STRIP) || SectionIsNoDeadStrip;
525 
526         LLVM_DEBUG({
527           dbgs() << "      " << formatv("{0:x16}", NSym.Value) << " -- "
528                  << formatv("{0:x16}", SymEnd) << ": ";
529           if (!NSym.Name)
530             dbgs() << "<anonymous symbol>";
531           else
532             dbgs() << NSym.Name;
533           if (SymLive)
534             dbgs() << " [no-dead-strip]";
535           if (LastCanonicalAddr == NSym.Value)
536             dbgs() << " [non-canonical]";
537           dbgs() << "\n";
538         });
539 
540         auto &Sym =
541             NSym.Name
542                 ? G->addDefinedSymbol(B, NSym.Value - BlockStart, *NSym.Name,
543                                       SymEnd - NSym.Value, NSym.L, NSym.S,
544                                       SectionIsText, SymLive)
545                 : G->addAnonymousSymbol(B, NSym.Value - BlockStart,
546                                         SymEnd - NSym.Value, SectionIsText,
547                                         SymLive);
548         NSym.GraphSymbol = &Sym;
549         if (LastCanonicalAddr != Sym.getAddress()) {
550           if (LastCanonicalAddr)
551             SymEnd = *LastCanonicalAddr;
552           LastCanonicalAddr = Sym.getAddress();
553           setCanonicalSymbol(Sym);
554         }
555       }
556     }
557   }
558 
559   return Error::success();
560 }
561 
562 Error MachOLinkGraphBuilder::graphifySectionsWithCustomParsers() {
563   // Graphify special sections.
564   for (auto &KV : IndexToSection) {
565     auto &NSec = KV.second;
566 
567     // Skip non-graph sections.
568     if (!NSec.GraphSection)
569       continue;
570 
571     auto HI = CustomSectionParserFunctions.find(NSec.GraphSection->getName());
572     if (HI != CustomSectionParserFunctions.end()) {
573       auto &Parse = HI->second;
574       if (auto Err = Parse(NSec))
575         return Err;
576     }
577   }
578 
579   return Error::success();
580 }
581 
582 } // end namespace jitlink
583 } // end namespace llvm
584