1 //===- lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 ///
10 /// \file Converts from in-memory normalized mach-o to in-memory Atoms.
11 ///
12 /// +------------+
13 /// | normalized |
14 /// +------------+
15 /// |
16 /// |
17 /// v
18 /// +-------+
19 /// | Atoms |
20 /// +-------+
21
22 #include "ArchHandler.h"
23 #include "Atoms.h"
24 #include "File.h"
25 #include "MachONormalizedFile.h"
26 #include "MachONormalizedFileBinaryUtils.h"
27 #include "lld/Common/LLVM.h"
28 #include "lld/Core/Error.h"
29 #include "llvm/BinaryFormat/Dwarf.h"
30 #include "llvm/BinaryFormat/MachO.h"
31 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
32 #include "llvm/Support/DataExtractor.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/Error.h"
35 #include "llvm/Support/Format.h"
36 #include "llvm/Support/LEB128.h"
37 #include "llvm/Support/raw_ostream.h"
38
39 using namespace llvm::MachO;
40 using namespace lld::mach_o::normalized;
41
42 #define DEBUG_TYPE "normalized-file-to-atoms"
43
44 namespace lld {
45 namespace mach_o {
46
47
48 namespace { // anonymous
49
50
51 #define ENTRY(seg, sect, type, atomType) \
52 {seg, sect, type, DefinedAtom::atomType }
53
54 struct MachORelocatableSectionToAtomType {
55 StringRef segmentName;
56 StringRef sectionName;
57 SectionType sectionType;
58 DefinedAtom::ContentType atomType;
59 };
60
61 const MachORelocatableSectionToAtomType sectsToAtomType[] = {
62 ENTRY("__TEXT", "__text", S_REGULAR, typeCode),
63 ENTRY("__TEXT", "__text", S_REGULAR, typeResolver),
64 ENTRY("__TEXT", "__cstring", S_CSTRING_LITERALS, typeCString),
65 ENTRY("", "", S_CSTRING_LITERALS, typeCString),
66 ENTRY("__TEXT", "__ustring", S_REGULAR, typeUTF16String),
67 ENTRY("__TEXT", "__const", S_REGULAR, typeConstant),
68 ENTRY("__TEXT", "__const_coal", S_COALESCED, typeConstant),
69 ENTRY("__TEXT", "__eh_frame", S_COALESCED, typeCFI),
70 ENTRY("__TEXT", "__eh_frame", S_REGULAR, typeCFI),
71 ENTRY("__TEXT", "__literal4", S_4BYTE_LITERALS, typeLiteral4),
72 ENTRY("__TEXT", "__literal8", S_8BYTE_LITERALS, typeLiteral8),
73 ENTRY("__TEXT", "__literal16", S_16BYTE_LITERALS, typeLiteral16),
74 ENTRY("__TEXT", "__gcc_except_tab", S_REGULAR, typeLSDA),
75 ENTRY("__DATA", "__data", S_REGULAR, typeData),
76 ENTRY("__DATA", "__datacoal_nt", S_COALESCED, typeData),
77 ENTRY("__DATA", "__const", S_REGULAR, typeConstData),
78 ENTRY("__DATA", "__cfstring", S_REGULAR, typeCFString),
79 ENTRY("__DATA", "__mod_init_func", S_MOD_INIT_FUNC_POINTERS,
80 typeInitializerPtr),
81 ENTRY("__DATA", "__mod_term_func", S_MOD_TERM_FUNC_POINTERS,
82 typeTerminatorPtr),
83 ENTRY("__DATA", "__got", S_NON_LAZY_SYMBOL_POINTERS,
84 typeGOT),
85 ENTRY("__DATA", "__bss", S_ZEROFILL, typeZeroFill),
86 ENTRY("", "", S_NON_LAZY_SYMBOL_POINTERS,
87 typeGOT),
88 ENTRY("__DATA", "__interposing", S_INTERPOSING, typeInterposingTuples),
89 ENTRY("__DATA", "__thread_vars", S_THREAD_LOCAL_VARIABLES,
90 typeThunkTLV),
91 ENTRY("__DATA", "__thread_data", S_THREAD_LOCAL_REGULAR, typeTLVInitialData),
92 ENTRY("__DATA", "__thread_bss", S_THREAD_LOCAL_ZEROFILL,
93 typeTLVInitialZeroFill),
94 ENTRY("__DATA", "__objc_imageinfo", S_REGULAR, typeObjCImageInfo),
95 ENTRY("__DATA", "__objc_catlist", S_REGULAR, typeObjC2CategoryList),
96 ENTRY("", "", S_INTERPOSING, typeInterposingTuples),
97 ENTRY("__LD", "__compact_unwind", S_REGULAR,
98 typeCompactUnwindInfo),
99 ENTRY("", "", S_REGULAR, typeUnknown)
100 };
101 #undef ENTRY
102
103
104 /// Figures out ContentType of a mach-o section.
atomTypeFromSection(const Section & section,bool & customSectionName)105 DefinedAtom::ContentType atomTypeFromSection(const Section §ion,
106 bool &customSectionName) {
107 // First look for match of name and type. Empty names in table are wildcards.
108 customSectionName = false;
109 for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ;
110 p->atomType != DefinedAtom::typeUnknown; ++p) {
111 if (p->sectionType != section.type)
112 continue;
113 if (!p->segmentName.equals(section.segmentName) && !p->segmentName.empty())
114 continue;
115 if (!p->sectionName.equals(section.sectionName) && !p->sectionName.empty())
116 continue;
117 customSectionName = p->segmentName.empty() && p->sectionName.empty();
118 return p->atomType;
119 }
120 // Look for code denoted by section attributes
121 if (section.attributes & S_ATTR_PURE_INSTRUCTIONS)
122 return DefinedAtom::typeCode;
123
124 return DefinedAtom::typeUnknown;
125 }
126
127 enum AtomizeModel {
128 atomizeAtSymbols,
129 atomizeFixedSize,
130 atomizePointerSize,
131 atomizeUTF8,
132 atomizeUTF16,
133 atomizeCFI,
134 atomizeCU,
135 atomizeCFString
136 };
137
138 /// Returns info on how to atomize a section of the specified ContentType.
sectionParseInfo(DefinedAtom::ContentType atomType,unsigned int & sizeMultiple,DefinedAtom::Scope & scope,DefinedAtom::Merge & merge,AtomizeModel & atomizeModel)139 void sectionParseInfo(DefinedAtom::ContentType atomType,
140 unsigned int &sizeMultiple,
141 DefinedAtom::Scope &scope,
142 DefinedAtom::Merge &merge,
143 AtomizeModel &atomizeModel) {
144 struct ParseInfo {
145 DefinedAtom::ContentType atomType;
146 unsigned int sizeMultiple;
147 DefinedAtom::Scope scope;
148 DefinedAtom::Merge merge;
149 AtomizeModel atomizeModel;
150 };
151
152 #define ENTRY(type, size, scope, merge, model) \
153 {DefinedAtom::type, size, DefinedAtom::scope, DefinedAtom::merge, model }
154
155 static const ParseInfo parseInfo[] = {
156 ENTRY(typeCode, 1, scopeGlobal, mergeNo,
157 atomizeAtSymbols),
158 ENTRY(typeData, 1, scopeGlobal, mergeNo,
159 atomizeAtSymbols),
160 ENTRY(typeConstData, 1, scopeGlobal, mergeNo,
161 atomizeAtSymbols),
162 ENTRY(typeZeroFill, 1, scopeGlobal, mergeNo,
163 atomizeAtSymbols),
164 ENTRY(typeConstant, 1, scopeGlobal, mergeNo,
165 atomizeAtSymbols),
166 ENTRY(typeCString, 1, scopeLinkageUnit, mergeByContent,
167 atomizeUTF8),
168 ENTRY(typeUTF16String, 1, scopeLinkageUnit, mergeByContent,
169 atomizeUTF16),
170 ENTRY(typeCFI, 4, scopeTranslationUnit, mergeNo,
171 atomizeCFI),
172 ENTRY(typeLiteral4, 4, scopeLinkageUnit, mergeByContent,
173 atomizeFixedSize),
174 ENTRY(typeLiteral8, 8, scopeLinkageUnit, mergeByContent,
175 atomizeFixedSize),
176 ENTRY(typeLiteral16, 16, scopeLinkageUnit, mergeByContent,
177 atomizeFixedSize),
178 ENTRY(typeCFString, 4, scopeLinkageUnit, mergeByContent,
179 atomizeCFString),
180 ENTRY(typeInitializerPtr, 4, scopeTranslationUnit, mergeNo,
181 atomizePointerSize),
182 ENTRY(typeTerminatorPtr, 4, scopeTranslationUnit, mergeNo,
183 atomizePointerSize),
184 ENTRY(typeCompactUnwindInfo, 4, scopeTranslationUnit, mergeNo,
185 atomizeCU),
186 ENTRY(typeGOT, 4, scopeLinkageUnit, mergeByContent,
187 atomizePointerSize),
188 ENTRY(typeObjC2CategoryList, 4, scopeTranslationUnit, mergeByContent,
189 atomizePointerSize),
190 ENTRY(typeUnknown, 1, scopeGlobal, mergeNo,
191 atomizeAtSymbols)
192 };
193 #undef ENTRY
194 const int tableLen = sizeof(parseInfo) / sizeof(ParseInfo);
195 for (int i=0; i < tableLen; ++i) {
196 if (parseInfo[i].atomType == atomType) {
197 sizeMultiple = parseInfo[i].sizeMultiple;
198 scope = parseInfo[i].scope;
199 merge = parseInfo[i].merge;
200 atomizeModel = parseInfo[i].atomizeModel;
201 return;
202 }
203 }
204
205 // Unknown type is atomized by symbols.
206 sizeMultiple = 1;
207 scope = DefinedAtom::scopeGlobal;
208 merge = DefinedAtom::mergeNo;
209 atomizeModel = atomizeAtSymbols;
210 }
211
212
atomScope(uint8_t scope)213 Atom::Scope atomScope(uint8_t scope) {
214 switch (scope) {
215 case N_EXT:
216 return Atom::scopeGlobal;
217 case N_PEXT:
218 case N_PEXT | N_EXT:
219 return Atom::scopeLinkageUnit;
220 case 0:
221 return Atom::scopeTranslationUnit;
222 }
223 llvm_unreachable("unknown scope value!");
224 }
225
appendSymbolsInSection(const std::vector<lld::mach_o::normalized::Symbol> & inSymbols,uint32_t sectionIndex,SmallVector<const lld::mach_o::normalized::Symbol *,64> & outSyms)226 void appendSymbolsInSection(
227 const std::vector<lld::mach_o::normalized::Symbol> &inSymbols,
228 uint32_t sectionIndex,
229 SmallVector<const lld::mach_o::normalized::Symbol *, 64> &outSyms) {
230 for (const lld::mach_o::normalized::Symbol &sym : inSymbols) {
231 // Only look at definition symbols.
232 if ((sym.type & N_TYPE) != N_SECT)
233 continue;
234 if (sym.sect != sectionIndex)
235 continue;
236 outSyms.push_back(&sym);
237 }
238 }
239
atomFromSymbol(DefinedAtom::ContentType atomType,const Section & section,MachOFile & file,uint64_t symbolAddr,StringRef symbolName,uint16_t symbolDescFlags,Atom::Scope symbolScope,uint64_t nextSymbolAddr,bool scatterable,bool copyRefs)240 void atomFromSymbol(DefinedAtom::ContentType atomType, const Section §ion,
241 MachOFile &file, uint64_t symbolAddr, StringRef symbolName,
242 uint16_t symbolDescFlags, Atom::Scope symbolScope,
243 uint64_t nextSymbolAddr, bool scatterable, bool copyRefs) {
244 // Mach-O symbol table does have size in it. Instead the size is the
245 // difference between this and the next symbol.
246 uint64_t size = nextSymbolAddr - symbolAddr;
247 uint64_t offset = symbolAddr - section.address;
248 bool noDeadStrip = (symbolDescFlags & N_NO_DEAD_STRIP) || !scatterable;
249 if (isZeroFillSection(section.type)) {
250 file.addZeroFillDefinedAtom(symbolName, symbolScope, offset, size,
251 noDeadStrip, copyRefs, §ion);
252 } else {
253 DefinedAtom::Merge merge = (symbolDescFlags & N_WEAK_DEF)
254 ? DefinedAtom::mergeAsWeak : DefinedAtom::mergeNo;
255 bool thumb = (symbolDescFlags & N_ARM_THUMB_DEF);
256 if (atomType == DefinedAtom::typeUnknown) {
257 // Mach-O needs a segment and section name. Concatenate those two
258 // with a / separator (e.g. "seg/sect") to fit into the lld model
259 // of just a section name.
260 std::string segSectName = section.segmentName.str()
261 + "/" + section.sectionName.str();
262 file.addDefinedAtomInCustomSection(symbolName, symbolScope, atomType,
263 merge, thumb, noDeadStrip, offset,
264 size, segSectName, true, §ion);
265 } else {
266 if ((atomType == lld::DefinedAtom::typeCode) &&
267 (symbolDescFlags & N_SYMBOL_RESOLVER)) {
268 atomType = lld::DefinedAtom::typeResolver;
269 }
270 file.addDefinedAtom(symbolName, symbolScope, atomType, merge,
271 offset, size, thumb, noDeadStrip, copyRefs, §ion);
272 }
273 }
274 }
275
processSymboledSection(DefinedAtom::ContentType atomType,const Section & section,const NormalizedFile & normalizedFile,MachOFile & file,bool scatterable,bool copyRefs)276 llvm::Error processSymboledSection(DefinedAtom::ContentType atomType,
277 const Section §ion,
278 const NormalizedFile &normalizedFile,
279 MachOFile &file, bool scatterable,
280 bool copyRefs) {
281 // Find section's index.
282 uint32_t sectIndex = 1;
283 for (auto § : normalizedFile.sections) {
284 if (§ == §ion)
285 break;
286 ++sectIndex;
287 }
288
289 // Find all symbols in this section.
290 SmallVector<const lld::mach_o::normalized::Symbol *, 64> symbols;
291 appendSymbolsInSection(normalizedFile.globalSymbols, sectIndex, symbols);
292 appendSymbolsInSection(normalizedFile.localSymbols, sectIndex, symbols);
293
294 // Sort symbols.
295 std::sort(symbols.begin(), symbols.end(),
296 [](const lld::mach_o::normalized::Symbol *lhs,
297 const lld::mach_o::normalized::Symbol *rhs) -> bool {
298 if (lhs == rhs)
299 return false;
300 // First by address.
301 uint64_t lhsAddr = lhs->value;
302 uint64_t rhsAddr = rhs->value;
303 if (lhsAddr != rhsAddr)
304 return lhsAddr < rhsAddr;
305 // If same address, one is an alias so sort by scope.
306 Atom::Scope lScope = atomScope(lhs->scope);
307 Atom::Scope rScope = atomScope(rhs->scope);
308 if (lScope != rScope)
309 return lScope < rScope;
310 // If same address and scope, see if one might be better as
311 // the alias.
312 bool lPrivate = (lhs->name.front() == 'l');
313 bool rPrivate = (rhs->name.front() == 'l');
314 if (lPrivate != rPrivate)
315 return lPrivate;
316 // If same address and scope, sort by name.
317 return lhs->name < rhs->name;
318 });
319
320 // Debug logging of symbols.
321 // for (const Symbol *sym : symbols)
322 // llvm::errs() << " sym: "
323 // << llvm::format("0x%08llx ", (uint64_t)sym->value)
324 // << ", " << sym->name << "\n";
325
326 // If section has no symbols and no content, there are no atoms.
327 if (symbols.empty() && section.content.empty())
328 return llvm::Error::success();
329
330 if (symbols.empty()) {
331 // Section has no symbols, put all content in one anonymous atom.
332 atomFromSymbol(atomType, section, file, section.address, StringRef(),
333 0, Atom::scopeTranslationUnit,
334 section.address + section.content.size(),
335 scatterable, copyRefs);
336 }
337 else if (symbols.front()->value != section.address) {
338 // Section has anonymous content before first symbol.
339 atomFromSymbol(atomType, section, file, section.address, StringRef(),
340 0, Atom::scopeTranslationUnit, symbols.front()->value,
341 scatterable, copyRefs);
342 }
343
344 const lld::mach_o::normalized::Symbol *lastSym = nullptr;
345 for (const lld::mach_o::normalized::Symbol *sym : symbols) {
346 if (lastSym != nullptr) {
347 // Ignore any assembler added "ltmpNNN" symbol at start of section
348 // if there is another symbol at the start.
349 if ((lastSym->value != sym->value)
350 || lastSym->value != section.address
351 || !lastSym->name.startswith("ltmp")) {
352 atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name,
353 lastSym->desc, atomScope(lastSym->scope), sym->value,
354 scatterable, copyRefs);
355 }
356 }
357 lastSym = sym;
358 }
359 if (lastSym != nullptr) {
360 atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name,
361 lastSym->desc, atomScope(lastSym->scope),
362 section.address + section.content.size(),
363 scatterable, copyRefs);
364 }
365
366 // If object built without .subsections_via_symbols, add reference chain.
367 if (!scatterable) {
368 MachODefinedAtom *prevAtom = nullptr;
369 file.eachAtomInSection(section,
370 [&](MachODefinedAtom *atom, uint64_t offset)->void {
371 if (prevAtom)
372 prevAtom->addReference(Reference::KindNamespace::all,
373 Reference::KindArch::all,
374 Reference::kindLayoutAfter, 0, atom, 0);
375 prevAtom = atom;
376 });
377 }
378
379 return llvm::Error::success();
380 }
381
processSection(DefinedAtom::ContentType atomType,const Section & section,bool customSectionName,const NormalizedFile & normalizedFile,MachOFile & file,bool scatterable,bool copyRefs)382 llvm::Error processSection(DefinedAtom::ContentType atomType,
383 const Section §ion,
384 bool customSectionName,
385 const NormalizedFile &normalizedFile,
386 MachOFile &file, bool scatterable,
387 bool copyRefs) {
388 const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
389 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
390
391 // Get info on how to atomize section.
392 unsigned int sizeMultiple;
393 DefinedAtom::Scope scope;
394 DefinedAtom::Merge merge;
395 AtomizeModel atomizeModel;
396 sectionParseInfo(atomType, sizeMultiple, scope, merge, atomizeModel);
397
398 // Validate section size.
399 if ((section.content.size() % sizeMultiple) != 0)
400 return llvm::make_error<GenericError>(Twine("Section ")
401 + section.segmentName
402 + "/" + section.sectionName
403 + " has size ("
404 + Twine(section.content.size())
405 + ") which is not a multiple of "
406 + Twine(sizeMultiple));
407
408 if (atomizeModel == atomizeAtSymbols) {
409 // Break section up into atoms each with a fixed size.
410 return processSymboledSection(atomType, section, normalizedFile, file,
411 scatterable, copyRefs);
412 } else {
413 unsigned int size;
414 for (unsigned int offset = 0, e = section.content.size(); offset != e;) {
415 switch (atomizeModel) {
416 case atomizeFixedSize:
417 // Break section up into atoms each with a fixed size.
418 size = sizeMultiple;
419 break;
420 case atomizePointerSize:
421 // Break section up into atoms each the size of a pointer.
422 size = is64 ? 8 : 4;
423 break;
424 case atomizeUTF8:
425 // Break section up into zero terminated c-strings.
426 size = 0;
427 for (unsigned int i = offset; i < e; ++i) {
428 if (section.content[i] == 0) {
429 size = i + 1 - offset;
430 break;
431 }
432 }
433 break;
434 case atomizeUTF16:
435 // Break section up into zero terminated UTF16 strings.
436 size = 0;
437 for (unsigned int i = offset; i < e; i += 2) {
438 if ((section.content[i] == 0) && (section.content[i + 1] == 0)) {
439 size = i + 2 - offset;
440 break;
441 }
442 }
443 break;
444 case atomizeCFI:
445 // Break section up into dwarf unwind CFIs (FDE or CIE).
446 size = read32(§ion.content[offset], isBig) + 4;
447 if (offset+size > section.content.size()) {
448 return llvm::make_error<GenericError>(Twine("Section ")
449 + section.segmentName
450 + "/" + section.sectionName
451 + " is malformed. Size of CFI "
452 "starting at offset ("
453 + Twine(offset)
454 + ") is past end of section.");
455 }
456 break;
457 case atomizeCU:
458 // Break section up into compact unwind entries.
459 size = is64 ? 32 : 20;
460 break;
461 case atomizeCFString:
462 // Break section up into NS/CFString objects.
463 size = is64 ? 32 : 16;
464 break;
465 case atomizeAtSymbols:
466 break;
467 }
468 if (size == 0) {
469 return llvm::make_error<GenericError>(Twine("Section ")
470 + section.segmentName
471 + "/" + section.sectionName
472 + " is malformed. The last atom "
473 "is not zero terminated.");
474 }
475 if (customSectionName) {
476 // Mach-O needs a segment and section name. Concatenate those two
477 // with a / separator (e.g. "seg/sect") to fit into the lld model
478 // of just a section name.
479 std::string segSectName = section.segmentName.str()
480 + "/" + section.sectionName.str();
481 file.addDefinedAtomInCustomSection(StringRef(), scope, atomType,
482 merge, false, false, offset,
483 size, segSectName, true, §ion);
484 } else {
485 file.addDefinedAtom(StringRef(), scope, atomType, merge, offset, size,
486 false, false, copyRefs, §ion);
487 }
488 offset += size;
489 }
490 }
491 return llvm::Error::success();
492 }
493
findSectionCoveringAddress(const NormalizedFile & normalizedFile,uint64_t address)494 const Section* findSectionCoveringAddress(const NormalizedFile &normalizedFile,
495 uint64_t address) {
496 for (const Section &s : normalizedFile.sections) {
497 uint64_t sAddr = s.address;
498 if ((sAddr <= address) && (address < sAddr+s.content.size())) {
499 return &s;
500 }
501 }
502 return nullptr;
503 }
504
505 const MachODefinedAtom *
findAtomCoveringAddress(const NormalizedFile & normalizedFile,MachOFile & file,uint64_t addr,Reference::Addend & addend)506 findAtomCoveringAddress(const NormalizedFile &normalizedFile, MachOFile &file,
507 uint64_t addr, Reference::Addend &addend) {
508 const Section *sect = nullptr;
509 sect = findSectionCoveringAddress(normalizedFile, addr);
510 if (!sect)
511 return nullptr;
512
513 uint32_t offsetInTarget;
514 uint64_t offsetInSect = addr - sect->address;
515 auto atom =
516 file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget);
517 addend = offsetInTarget;
518 return atom;
519 }
520
521 // Walks all relocations for a section in a normalized .o file and
522 // creates corresponding lld::Reference objects.
convertRelocs(const Section & section,const NormalizedFile & normalizedFile,bool scatterable,MachOFile & file,ArchHandler & handler)523 llvm::Error convertRelocs(const Section §ion,
524 const NormalizedFile &normalizedFile,
525 bool scatterable,
526 MachOFile &file,
527 ArchHandler &handler) {
528 // Utility function for ArchHandler to find atom by its address.
529 auto atomByAddr = [&] (uint32_t sectIndex, uint64_t addr,
530 const lld::Atom **atom, Reference::Addend *addend)
531 -> llvm::Error {
532 if (sectIndex > normalizedFile.sections.size())
533 return llvm::make_error<GenericError>(Twine("out of range section "
534 "index (") + Twine(sectIndex) + ")");
535 const Section *sect = nullptr;
536 if (sectIndex == 0) {
537 sect = findSectionCoveringAddress(normalizedFile, addr);
538 if (!sect)
539 return llvm::make_error<GenericError>(Twine("address (" + Twine(addr)
540 + ") is not in any section"));
541 } else {
542 sect = &normalizedFile.sections[sectIndex-1];
543 }
544 uint32_t offsetInTarget;
545 uint64_t offsetInSect = addr - sect->address;
546 *atom = file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget);
547 *addend = offsetInTarget;
548 return llvm::Error::success();
549 };
550
551 // Utility function for ArchHandler to find atom by its symbol index.
552 auto atomBySymbol = [&] (uint32_t symbolIndex, const lld::Atom **result)
553 -> llvm::Error {
554 // Find symbol from index.
555 const lld::mach_o::normalized::Symbol *sym = nullptr;
556 uint32_t numStabs = normalizedFile.stabsSymbols.size();
557 uint32_t numLocal = normalizedFile.localSymbols.size();
558 uint32_t numGlobal = normalizedFile.globalSymbols.size();
559 uint32_t numUndef = normalizedFile.undefinedSymbols.size();
560 assert(symbolIndex >= numStabs && "Searched for stab via atomBySymbol?");
561 if (symbolIndex < numStabs+numLocal) {
562 sym = &normalizedFile.localSymbols[symbolIndex-numStabs];
563 } else if (symbolIndex < numStabs+numLocal+numGlobal) {
564 sym = &normalizedFile.globalSymbols[symbolIndex-numStabs-numLocal];
565 } else if (symbolIndex < numStabs+numLocal+numGlobal+numUndef) {
566 sym = &normalizedFile.undefinedSymbols[symbolIndex-numStabs-numLocal-
567 numGlobal];
568 } else {
569 return llvm::make_error<GenericError>(Twine("symbol index (")
570 + Twine(symbolIndex) + ") out of range");
571 }
572
573 // Find atom from symbol.
574 if ((sym->type & N_TYPE) == N_SECT) {
575 if (sym->sect > normalizedFile.sections.size())
576 return llvm::make_error<GenericError>(Twine("symbol section index (")
577 + Twine(sym->sect) + ") out of range ");
578 const Section &symSection = normalizedFile.sections[sym->sect-1];
579 uint64_t targetOffsetInSect = sym->value - symSection.address;
580 MachODefinedAtom *target = file.findAtomCoveringAddress(symSection,
581 targetOffsetInSect);
582 if (target) {
583 *result = target;
584 return llvm::Error::success();
585 }
586 return llvm::make_error<GenericError>("no atom found for defined symbol");
587 } else if ((sym->type & N_TYPE) == N_UNDF) {
588 const lld::Atom *target = file.findUndefAtom(sym->name);
589 if (target) {
590 *result = target;
591 return llvm::Error::success();
592 }
593 return llvm::make_error<GenericError>("no undefined atom found for sym");
594 } else {
595 // Search undefs
596 return llvm::make_error<GenericError>("no atom found for symbol");
597 }
598 };
599
600 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
601 // Use old-school iterator so that paired relocations can be grouped.
602 for (auto it=section.relocations.begin(), e=section.relocations.end();
603 it != e; ++it) {
604 const Relocation &reloc = *it;
605 // Find atom this relocation is in.
606 if (reloc.offset > section.content.size())
607 return llvm::make_error<GenericError>(
608 Twine("r_address (") + Twine(reloc.offset)
609 + ") is larger than section size ("
610 + Twine(section.content.size()) + ")");
611 uint32_t offsetInAtom;
612 MachODefinedAtom *inAtom = file.findAtomCoveringAddress(section,
613 reloc.offset,
614 &offsetInAtom);
615 assert(inAtom && "r_address in range, should have found atom");
616 uint64_t fixupAddress = section.address + reloc.offset;
617
618 const lld::Atom *target = nullptr;
619 Reference::Addend addend = 0;
620 Reference::KindValue kind;
621 if (handler.isPairedReloc(reloc)) {
622 // Handle paired relocations together.
623 const Relocation &reloc2 = *++it;
624 auto relocErr = handler.getPairReferenceInfo(
625 reloc, reloc2, inAtom, offsetInAtom, fixupAddress, isBig, scatterable,
626 atomByAddr, atomBySymbol, &kind, &target, &addend);
627 if (relocErr) {
628 return handleErrors(std::move(relocErr),
629 [&](std::unique_ptr<GenericError> GE) {
630 return llvm::make_error<GenericError>(
631 Twine("bad relocation (") + GE->getMessage()
632 + ") in section "
633 + section.segmentName + "/" + section.sectionName
634 + " (r1_address=" + Twine::utohexstr(reloc.offset)
635 + ", r1_type=" + Twine(reloc.type)
636 + ", r1_extern=" + Twine(reloc.isExtern)
637 + ", r1_length=" + Twine((int)reloc.length)
638 + ", r1_pcrel=" + Twine(reloc.pcRel)
639 + (!reloc.scattered ? (Twine(", r1_symbolnum=")
640 + Twine(reloc.symbol))
641 : (Twine(", r1_scattered=1, r1_value=")
642 + Twine(reloc.value)))
643 + ")"
644 + ", (r2_address=" + Twine::utohexstr(reloc2.offset)
645 + ", r2_type=" + Twine(reloc2.type)
646 + ", r2_extern=" + Twine(reloc2.isExtern)
647 + ", r2_length=" + Twine((int)reloc2.length)
648 + ", r2_pcrel=" + Twine(reloc2.pcRel)
649 + (!reloc2.scattered ? (Twine(", r2_symbolnum=")
650 + Twine(reloc2.symbol))
651 : (Twine(", r2_scattered=1, r2_value=")
652 + Twine(reloc2.value)))
653 + ")" );
654 });
655 }
656 }
657 else {
658 // Use ArchHandler to convert relocation record into information
659 // needed to instantiate an lld::Reference object.
660 auto relocErr = handler.getReferenceInfo(
661 reloc, inAtom, offsetInAtom, fixupAddress, isBig, atomByAddr,
662 atomBySymbol, &kind, &target, &addend);
663 if (relocErr) {
664 return handleErrors(std::move(relocErr),
665 [&](std::unique_ptr<GenericError> GE) {
666 return llvm::make_error<GenericError>(
667 Twine("bad relocation (") + GE->getMessage()
668 + ") in section "
669 + section.segmentName + "/" + section.sectionName
670 + " (r_address=" + Twine::utohexstr(reloc.offset)
671 + ", r_type=" + Twine(reloc.type)
672 + ", r_extern=" + Twine(reloc.isExtern)
673 + ", r_length=" + Twine((int)reloc.length)
674 + ", r_pcrel=" + Twine(reloc.pcRel)
675 + (!reloc.scattered ? (Twine(", r_symbolnum=") + Twine(reloc.symbol))
676 : (Twine(", r_scattered=1, r_value=")
677 + Twine(reloc.value)))
678 + ")" );
679 });
680 }
681 }
682 // Instantiate an lld::Reference object and add to its atom.
683 inAtom->addReference(Reference::KindNamespace::mach_o,
684 handler.kindArch(),
685 kind, offsetInAtom, target, addend);
686 }
687
688 return llvm::Error::success();
689 }
690
isDebugInfoSection(const Section & section)691 bool isDebugInfoSection(const Section §ion) {
692 if ((section.attributes & S_ATTR_DEBUG) == 0)
693 return false;
694 return section.segmentName.equals("__DWARF");
695 }
696
findDefinedAtomByName(MachOFile & file,Twine name)697 static const Atom* findDefinedAtomByName(MachOFile &file, Twine name) {
698 std::string strName = name.str();
699 for (auto *atom : file.defined())
700 if (atom->name() == strName)
701 return atom;
702 return nullptr;
703 }
704
copyDebugString(StringRef str,BumpPtrAllocator & alloc)705 static StringRef copyDebugString(StringRef str, BumpPtrAllocator &alloc) {
706 char *strCopy = alloc.Allocate<char>(str.size() + 1);
707 memcpy(strCopy, str.data(), str.size());
708 strCopy[str.size()] = '\0';
709 return strCopy;
710 }
711
parseStabs(MachOFile & file,const NormalizedFile & normalizedFile,bool copyRefs)712 llvm::Error parseStabs(MachOFile &file,
713 const NormalizedFile &normalizedFile,
714 bool copyRefs) {
715
716 if (normalizedFile.stabsSymbols.empty())
717 return llvm::Error::success();
718
719 // FIXME: Kill this off when we can move to sane yaml parsing.
720 std::unique_ptr<BumpPtrAllocator> allocator;
721 if (copyRefs)
722 allocator = std::make_unique<BumpPtrAllocator>();
723
724 enum { start, inBeginEnd } state = start;
725
726 const Atom *currentAtom = nullptr;
727 uint64_t currentAtomAddress = 0;
728 StabsDebugInfo::StabsList stabsList;
729 for (const auto &stabSym : normalizedFile.stabsSymbols) {
730 Stab stab(nullptr, stabSym.type, stabSym.sect, stabSym.desc,
731 stabSym.value, stabSym.name);
732 switch (state) {
733 case start:
734 switch (static_cast<StabType>(stabSym.type)) {
735 case N_BNSYM:
736 state = inBeginEnd;
737 currentAtomAddress = stabSym.value;
738 Reference::Addend addend;
739 currentAtom = findAtomCoveringAddress(normalizedFile, file,
740 currentAtomAddress, addend);
741 if (addend != 0)
742 return llvm::make_error<GenericError>(
743 "Non-zero addend for BNSYM '" + stabSym.name + "' in " +
744 file.path());
745 if (currentAtom)
746 stab.atom = currentAtom;
747 else {
748 // FIXME: ld64 just issues a warning here - should we match that?
749 return llvm::make_error<GenericError>(
750 "can't find atom for stabs BNSYM at " +
751 Twine::utohexstr(stabSym.value) + " in " + file.path());
752 }
753 break;
754 case N_SO:
755 case N_OSO:
756 // Not associated with an atom, just copy.
757 if (copyRefs)
758 stab.str = copyDebugString(stabSym.name, *allocator);
759 else
760 stab.str = stabSym.name;
761 break;
762 case N_GSYM: {
763 auto colonIdx = stabSym.name.find(':');
764 if (colonIdx != StringRef::npos) {
765 StringRef name = stabSym.name.substr(0, colonIdx);
766 currentAtom = findDefinedAtomByName(file, "_" + name);
767 stab.atom = currentAtom;
768 if (copyRefs)
769 stab.str = copyDebugString(stabSym.name, *allocator);
770 else
771 stab.str = stabSym.name;
772 } else {
773 currentAtom = findDefinedAtomByName(file, stabSym.name);
774 stab.atom = currentAtom;
775 if (copyRefs)
776 stab.str = copyDebugString(stabSym.name, *allocator);
777 else
778 stab.str = stabSym.name;
779 }
780 if (stab.atom == nullptr)
781 return llvm::make_error<GenericError>(
782 "can't find atom for N_GSYM stabs" + stabSym.name +
783 " in " + file.path());
784 break;
785 }
786 case N_FUN:
787 return llvm::make_error<GenericError>(
788 "old-style N_FUN stab '" + stabSym.name + "' unsupported");
789 default:
790 return llvm::make_error<GenericError>(
791 "unrecognized stab symbol '" + stabSym.name + "'");
792 }
793 break;
794 case inBeginEnd:
795 stab.atom = currentAtom;
796 switch (static_cast<StabType>(stabSym.type)) {
797 case N_ENSYM:
798 state = start;
799 currentAtom = nullptr;
800 break;
801 case N_FUN:
802 // Just copy the string.
803 if (copyRefs)
804 stab.str = copyDebugString(stabSym.name, *allocator);
805 else
806 stab.str = stabSym.name;
807 break;
808 default:
809 return llvm::make_error<GenericError>(
810 "unrecognized stab symbol '" + stabSym.name + "'");
811 }
812 }
813 llvm::dbgs() << "Adding to stabsList: " << stab << "\n";
814 stabsList.push_back(stab);
815 }
816
817 file.setDebugInfo(std::make_unique<StabsDebugInfo>(std::move(stabsList)));
818
819 // FIXME: Kill this off when we fix YAML memory ownership.
820 file.debugInfo()->setAllocator(std::move(allocator));
821
822 return llvm::Error::success();
823 }
824
825 static llvm::DataExtractor
dataExtractorFromSection(const NormalizedFile & normalizedFile,const Section & S)826 dataExtractorFromSection(const NormalizedFile &normalizedFile,
827 const Section &S) {
828 const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
829 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
830 StringRef SecData(reinterpret_cast<const char*>(S.content.data()),
831 S.content.size());
832 return llvm::DataExtractor(SecData, !isBig, is64 ? 8 : 4);
833 }
834
835 // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
836 // inspection" code if possible.
getCUAbbrevOffset(llvm::DataExtractor abbrevData,uint64_t abbrCode)837 static uint64_t getCUAbbrevOffset(llvm::DataExtractor abbrevData,
838 uint64_t abbrCode) {
839 uint64_t offset = 0;
840 while (abbrevData.getULEB128(&offset) != abbrCode) {
841 // Tag
842 abbrevData.getULEB128(&offset);
843 // DW_CHILDREN
844 abbrevData.getU8(&offset);
845 // Attributes
846 while (abbrevData.getULEB128(&offset) | abbrevData.getULEB128(&offset))
847 ;
848 }
849 return offset;
850 }
851
852 // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
853 // inspection" code if possible.
854 static Expected<const char *>
getIndexedString(const NormalizedFile & normalizedFile,llvm::dwarf::Form form,llvm::DataExtractor infoData,uint64_t & infoOffset,const Section & stringsSection)855 getIndexedString(const NormalizedFile &normalizedFile,
856 llvm::dwarf::Form form, llvm::DataExtractor infoData,
857 uint64_t &infoOffset, const Section &stringsSection) {
858 if (form == llvm::dwarf::DW_FORM_string)
859 return infoData.getCStr(&infoOffset);
860 if (form != llvm::dwarf::DW_FORM_strp)
861 return llvm::make_error<GenericError>(
862 "string field encoded without DW_FORM_strp");
863 uint64_t stringOffset = infoData.getU32(&infoOffset);
864 llvm::DataExtractor stringsData =
865 dataExtractorFromSection(normalizedFile, stringsSection);
866 return stringsData.getCStr(&stringOffset);
867 }
868
869 // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
870 // inspection" code if possible.
871 static llvm::Expected<TranslationUnitSource>
readCompUnit(const NormalizedFile & normalizedFile,const Section & info,const Section & abbrev,const Section & strings,StringRef path)872 readCompUnit(const NormalizedFile &normalizedFile,
873 const Section &info,
874 const Section &abbrev,
875 const Section &strings,
876 StringRef path) {
877 // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
878 // inspection" code if possible.
879 uint64_t offset = 0;
880 llvm::dwarf::DwarfFormat Format = llvm::dwarf::DwarfFormat::DWARF32;
881 auto infoData = dataExtractorFromSection(normalizedFile, info);
882 uint32_t length = infoData.getU32(&offset);
883 if (length == llvm::dwarf::DW_LENGTH_DWARF64) {
884 Format = llvm::dwarf::DwarfFormat::DWARF64;
885 infoData.getU64(&offset);
886 }
887 else if (length >= llvm::dwarf::DW_LENGTH_lo_reserved)
888 return llvm::make_error<GenericError>("Malformed DWARF in " + path);
889
890 uint16_t version = infoData.getU16(&offset);
891
892 if (version < 2 || version > 4)
893 return llvm::make_error<GenericError>("Unsupported DWARF version in " +
894 path);
895
896 infoData.getU32(&offset); // Abbrev offset (should be zero)
897 uint8_t addrSize = infoData.getU8(&offset);
898
899 uint32_t abbrCode = infoData.getULEB128(&offset);
900 auto abbrevData = dataExtractorFromSection(normalizedFile, abbrev);
901 uint64_t abbrevOffset = getCUAbbrevOffset(abbrevData, abbrCode);
902 uint64_t tag = abbrevData.getULEB128(&abbrevOffset);
903 if (tag != llvm::dwarf::DW_TAG_compile_unit)
904 return llvm::make_error<GenericError>("top level DIE is not a compile unit");
905 // DW_CHILDREN
906 abbrevData.getU8(&abbrevOffset);
907 uint32_t name;
908 llvm::dwarf::Form form;
909 llvm::dwarf::FormParams formParams = {version, addrSize, Format};
910 TranslationUnitSource tu;
911 while ((name = abbrevData.getULEB128(&abbrevOffset)) |
912 (form = static_cast<llvm::dwarf::Form>(
913 abbrevData.getULEB128(&abbrevOffset))) &&
914 (name != 0 || form != 0)) {
915 switch (name) {
916 case llvm::dwarf::DW_AT_name: {
917 if (auto eName = getIndexedString(normalizedFile, form, infoData, offset,
918 strings))
919 tu.name = *eName;
920 else
921 return eName.takeError();
922 break;
923 }
924 case llvm::dwarf::DW_AT_comp_dir: {
925 if (auto eName = getIndexedString(normalizedFile, form, infoData, offset,
926 strings))
927 tu.path = *eName;
928 else
929 return eName.takeError();
930 break;
931 }
932 default:
933 llvm::DWARFFormValue::skipValue(form, infoData, &offset, formParams);
934 }
935 }
936 return tu;
937 }
938
parseDebugInfo(MachOFile & file,const NormalizedFile & normalizedFile,bool copyRefs)939 llvm::Error parseDebugInfo(MachOFile &file,
940 const NormalizedFile &normalizedFile, bool copyRefs) {
941
942 // Find the interesting debug info sections.
943 const Section *debugInfo = nullptr;
944 const Section *debugAbbrev = nullptr;
945 const Section *debugStrings = nullptr;
946
947 for (auto &s : normalizedFile.sections) {
948 if (s.segmentName == "__DWARF") {
949 if (s.sectionName == "__debug_info")
950 debugInfo = &s;
951 else if (s.sectionName == "__debug_abbrev")
952 debugAbbrev = &s;
953 else if (s.sectionName == "__debug_str")
954 debugStrings = &s;
955 }
956 }
957
958 if (!debugInfo)
959 return parseStabs(file, normalizedFile, copyRefs);
960
961 if (debugInfo->content.size() == 0)
962 return llvm::Error::success();
963
964 if (debugInfo->content.size() < 12)
965 return llvm::make_error<GenericError>("Malformed __debug_info section in " +
966 file.path() + ": too small");
967
968 if (!debugAbbrev)
969 return llvm::make_error<GenericError>("Missing __dwarf_abbrev section in " +
970 file.path());
971
972 if (auto tuOrErr = readCompUnit(normalizedFile, *debugInfo, *debugAbbrev,
973 *debugStrings, file.path())) {
974 // FIXME: Kill of allocator and code under 'copyRefs' when we fix YAML
975 // memory ownership.
976 std::unique_ptr<BumpPtrAllocator> allocator;
977 if (copyRefs) {
978 allocator = std::make_unique<BumpPtrAllocator>();
979 tuOrErr->name = copyDebugString(tuOrErr->name, *allocator);
980 tuOrErr->path = copyDebugString(tuOrErr->path, *allocator);
981 }
982 file.setDebugInfo(std::make_unique<DwarfDebugInfo>(std::move(*tuOrErr)));
983 if (copyRefs)
984 file.debugInfo()->setAllocator(std::move(allocator));
985 } else
986 return tuOrErr.takeError();
987
988 return llvm::Error::success();
989 }
990
readSPtr(bool is64,bool isBig,const uint8_t * addr)991 static int64_t readSPtr(bool is64, bool isBig, const uint8_t *addr) {
992 if (is64)
993 return read64(addr, isBig);
994
995 int32_t res = read32(addr, isBig);
996 return res;
997 }
998
999 /// --- Augmentation String Processing ---
1000
1001 struct CIEInfo {
1002 bool _augmentationDataPresent = false;
1003 bool _mayHaveEH = false;
1004 uint32_t _offsetOfLSDA = ~0U;
1005 uint32_t _offsetOfPersonality = ~0U;
1006 uint32_t _offsetOfFDEPointerEncoding = ~0U;
1007 uint32_t _augmentationDataLength = ~0U;
1008 };
1009
1010 typedef llvm::DenseMap<const MachODefinedAtom*, CIEInfo> CIEInfoMap;
1011
processAugmentationString(const uint8_t * augStr,CIEInfo & cieInfo,unsigned & len)1012 static llvm::Error processAugmentationString(const uint8_t *augStr,
1013 CIEInfo &cieInfo,
1014 unsigned &len) {
1015
1016 if (augStr[0] == '\0') {
1017 len = 1;
1018 return llvm::Error::success();
1019 }
1020
1021 if (augStr[0] != 'z')
1022 return llvm::make_error<GenericError>("expected 'z' at start of "
1023 "augmentation string");
1024
1025 cieInfo._augmentationDataPresent = true;
1026 uint64_t idx = 1;
1027
1028 uint32_t offsetInAugmentationData = 0;
1029 while (augStr[idx] != '\0') {
1030 if (augStr[idx] == 'L') {
1031 cieInfo._offsetOfLSDA = offsetInAugmentationData;
1032 // This adds a single byte to the augmentation data.
1033 ++offsetInAugmentationData;
1034 ++idx;
1035 continue;
1036 }
1037 if (augStr[idx] == 'P') {
1038 cieInfo._offsetOfPersonality = offsetInAugmentationData;
1039 // This adds a single byte to the augmentation data for the encoding,
1040 // then a number of bytes for the pointer data.
1041 // FIXME: We are assuming 4 is correct here for the pointer size as we
1042 // always currently use delta32ToGOT.
1043 offsetInAugmentationData += 5;
1044 ++idx;
1045 continue;
1046 }
1047 if (augStr[idx] == 'R') {
1048 cieInfo._offsetOfFDEPointerEncoding = offsetInAugmentationData;
1049 // This adds a single byte to the augmentation data.
1050 ++offsetInAugmentationData;
1051 ++idx;
1052 continue;
1053 }
1054 if (augStr[idx] == 'e') {
1055 if (augStr[idx + 1] != 'h')
1056 return llvm::make_error<GenericError>("expected 'eh' in "
1057 "augmentation string");
1058 cieInfo._mayHaveEH = true;
1059 idx += 2;
1060 continue;
1061 }
1062 ++idx;
1063 }
1064
1065 cieInfo._augmentationDataLength = offsetInAugmentationData;
1066
1067 len = idx + 1;
1068 return llvm::Error::success();
1069 }
1070
processCIE(const NormalizedFile & normalizedFile,MachOFile & file,mach_o::ArchHandler & handler,const Section * ehFrameSection,MachODefinedAtom * atom,uint64_t offset,CIEInfoMap & cieInfos)1071 static llvm::Error processCIE(const NormalizedFile &normalizedFile,
1072 MachOFile &file,
1073 mach_o::ArchHandler &handler,
1074 const Section *ehFrameSection,
1075 MachODefinedAtom *atom,
1076 uint64_t offset,
1077 CIEInfoMap &cieInfos) {
1078 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
1079 const uint8_t *frameData = atom->rawContent().data();
1080
1081 CIEInfo cieInfo;
1082
1083 uint32_t size = read32(frameData, isBig);
1084 uint64_t cieIDField = size == 0xffffffffU
1085 ? sizeof(uint32_t) + sizeof(uint64_t)
1086 : sizeof(uint32_t);
1087 uint64_t versionField = cieIDField + sizeof(uint32_t);
1088 uint64_t augmentationStringField = versionField + sizeof(uint8_t);
1089
1090 unsigned augmentationStringLength = 0;
1091 if (auto err = processAugmentationString(frameData + augmentationStringField,
1092 cieInfo, augmentationStringLength))
1093 return err;
1094
1095 if (cieInfo._offsetOfPersonality != ~0U) {
1096 // If we have augmentation data for the personality function, then we may
1097 // need to implicitly generate its relocation.
1098
1099 // Parse the EH Data field which is pointer sized.
1100 uint64_t EHDataField = augmentationStringField + augmentationStringLength;
1101 const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
1102 unsigned EHDataFieldSize = (cieInfo._mayHaveEH ? (is64 ? 8 : 4) : 0);
1103
1104 // Parse Code Align Factor which is a ULEB128.
1105 uint64_t CodeAlignField = EHDataField + EHDataFieldSize;
1106 unsigned lengthFieldSize = 0;
1107 llvm::decodeULEB128(frameData + CodeAlignField, &lengthFieldSize);
1108
1109 // Parse Data Align Factor which is a SLEB128.
1110 uint64_t DataAlignField = CodeAlignField + lengthFieldSize;
1111 llvm::decodeSLEB128(frameData + DataAlignField, &lengthFieldSize);
1112
1113 // Parse Return Address Register which is a byte.
1114 uint64_t ReturnAddressField = DataAlignField + lengthFieldSize;
1115
1116 // Parse the augmentation length which is a ULEB128.
1117 uint64_t AugmentationLengthField = ReturnAddressField + 1;
1118 uint64_t AugmentationLength =
1119 llvm::decodeULEB128(frameData + AugmentationLengthField,
1120 &lengthFieldSize);
1121
1122 if (AugmentationLength != cieInfo._augmentationDataLength)
1123 return llvm::make_error<GenericError>("CIE augmentation data length "
1124 "mismatch");
1125
1126 // Get the start address of the augmentation data.
1127 uint64_t AugmentationDataField = AugmentationLengthField + lengthFieldSize;
1128
1129 // Parse the personality function from the augmentation data.
1130 uint64_t PersonalityField =
1131 AugmentationDataField + cieInfo._offsetOfPersonality;
1132
1133 // Parse the personality encoding.
1134 // FIXME: Verify that this is a 32-bit pcrel offset.
1135 uint64_t PersonalityFunctionField = PersonalityField + 1;
1136
1137 if (atom->begin() != atom->end()) {
1138 // If we have an explicit relocation, then make sure it matches this
1139 // offset as this is where we'd expect it to be applied to.
1140 DefinedAtom::reference_iterator CurrentRef = atom->begin();
1141 if (CurrentRef->offsetInAtom() != PersonalityFunctionField)
1142 return llvm::make_error<GenericError>("CIE personality reloc at "
1143 "wrong offset");
1144
1145 if (++CurrentRef != atom->end())
1146 return llvm::make_error<GenericError>("CIE contains too many relocs");
1147 } else {
1148 // Implicitly generate the personality function reloc. It's assumed to
1149 // be a delta32 offset to a GOT entry.
1150 // FIXME: Parse the encoding and check this.
1151 int32_t funcDelta = read32(frameData + PersonalityFunctionField, isBig);
1152 uint64_t funcAddress = ehFrameSection->address + offset +
1153 PersonalityFunctionField;
1154 funcAddress += funcDelta;
1155
1156 const MachODefinedAtom *func = nullptr;
1157 Reference::Addend addend;
1158 func = findAtomCoveringAddress(normalizedFile, file, funcAddress,
1159 addend);
1160 atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(),
1161 handler.unwindRefToPersonalityFunctionKind(),
1162 PersonalityFunctionField, func, addend);
1163 }
1164 } else if (atom->begin() != atom->end()) {
1165 // Otherwise, we expect there to be no relocations in this atom as the only
1166 // relocation would have been to the personality function.
1167 return llvm::make_error<GenericError>("unexpected relocation in CIE");
1168 }
1169
1170
1171 cieInfos[atom] = std::move(cieInfo);
1172
1173 return llvm::Error::success();
1174 }
1175
processFDE(const NormalizedFile & normalizedFile,MachOFile & file,mach_o::ArchHandler & handler,const Section * ehFrameSection,MachODefinedAtom * atom,uint64_t offset,const CIEInfoMap & cieInfos)1176 static llvm::Error processFDE(const NormalizedFile &normalizedFile,
1177 MachOFile &file,
1178 mach_o::ArchHandler &handler,
1179 const Section *ehFrameSection,
1180 MachODefinedAtom *atom,
1181 uint64_t offset,
1182 const CIEInfoMap &cieInfos) {
1183
1184 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
1185 const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
1186
1187 // Compiler wasn't lazy and actually told us what it meant.
1188 // Unfortunately, the compiler may not have generated references for all of
1189 // [cie, func, lsda] and so we still need to parse the FDE and add references
1190 // for any the compiler didn't generate.
1191 if (atom->begin() != atom->end())
1192 atom->sortReferences();
1193
1194 DefinedAtom::reference_iterator CurrentRef = atom->begin();
1195
1196 // This helper returns the reference (if one exists) at the offset we are
1197 // currently processing. It automatically increments the ref iterator if we
1198 // do return a ref, and throws an error if we pass over a ref without
1199 // comsuming it.
1200 auto currentRefGetter = [&CurrentRef,
1201 &atom](uint64_t Offset)->const Reference* {
1202 // If there are no more refs found, then we are done.
1203 if (CurrentRef == atom->end())
1204 return nullptr;
1205
1206 const Reference *Ref = *CurrentRef;
1207
1208 // If we haven't reached the offset for this reference, then return that
1209 // we don't yet have a reference to process.
1210 if (Offset < Ref->offsetInAtom())
1211 return nullptr;
1212
1213 // If the offset is equal, then we want to process this ref.
1214 if (Offset == Ref->offsetInAtom()) {
1215 ++CurrentRef;
1216 return Ref;
1217 }
1218
1219 // The current ref is at an offset which is earlier than the current
1220 // offset, then we failed to consume it when we should have. In this case
1221 // throw an error.
1222 llvm::report_fatal_error("Skipped reference when processing FDE");
1223 };
1224
1225 // Helper to either get the reference at this current location, and verify
1226 // that it is of the expected type, or add a reference of that type.
1227 // Returns the reference target.
1228 auto verifyOrAddReference = [&](uint64_t targetAddress,
1229 Reference::KindValue refKind,
1230 uint64_t refAddress,
1231 bool allowsAddend)->const Atom* {
1232 if (auto *ref = currentRefGetter(refAddress)) {
1233 // The compiler already emitted a relocation for the CIE ref. This should
1234 // have been converted to the correct type of reference in
1235 // get[Pair]ReferenceInfo().
1236 assert(ref->kindValue() == refKind &&
1237 "Incorrect EHFrame reference kind");
1238 return ref->target();
1239 }
1240 Reference::Addend addend;
1241 auto *target = findAtomCoveringAddress(normalizedFile, file,
1242 targetAddress, addend);
1243 atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(),
1244 refKind, refAddress, target, addend);
1245
1246 if (!allowsAddend)
1247 assert(!addend && "EHFrame reference cannot have addend");
1248 return target;
1249 };
1250
1251 const uint8_t *startFrameData = atom->rawContent().data();
1252 const uint8_t *frameData = startFrameData;
1253
1254 uint32_t size = read32(frameData, isBig);
1255 uint64_t cieFieldInFDE = size == 0xffffffffU
1256 ? sizeof(uint32_t) + sizeof(uint64_t)
1257 : sizeof(uint32_t);
1258
1259 // Linker needs to fixup a reference from the FDE to its parent CIE (a
1260 // 32-bit byte offset backwards in the __eh_frame section).
1261 uint32_t cieDelta = read32(frameData + cieFieldInFDE, isBig);
1262 uint64_t cieAddress = ehFrameSection->address + offset + cieFieldInFDE;
1263 cieAddress -= cieDelta;
1264
1265 auto *cieRefTarget = verifyOrAddReference(cieAddress,
1266 handler.unwindRefToCIEKind(),
1267 cieFieldInFDE, false);
1268 const MachODefinedAtom *cie = dyn_cast<MachODefinedAtom>(cieRefTarget);
1269 assert(cie && cie->contentType() == DefinedAtom::typeCFI &&
1270 "FDE's CIE field does not point at the start of a CIE.");
1271
1272 const CIEInfo &cieInfo = cieInfos.find(cie)->second;
1273
1274 // Linker needs to fixup reference from the FDE to the function it's
1275 // describing. FIXME: there are actually different ways to do this, and the
1276 // particular method used is specified in the CIE's augmentation fields
1277 // (hopefully)
1278 uint64_t rangeFieldInFDE = cieFieldInFDE + sizeof(uint32_t);
1279
1280 int64_t functionFromFDE = readSPtr(is64, isBig,
1281 frameData + rangeFieldInFDE);
1282 uint64_t rangeStart = ehFrameSection->address + offset + rangeFieldInFDE;
1283 rangeStart += functionFromFDE;
1284
1285 verifyOrAddReference(rangeStart,
1286 handler.unwindRefToFunctionKind(),
1287 rangeFieldInFDE, true);
1288
1289 // Handle the augmentation data if there is any.
1290 if (cieInfo._augmentationDataPresent) {
1291 // First process the augmentation data length field.
1292 uint64_t augmentationDataLengthFieldInFDE =
1293 rangeFieldInFDE + 2 * (is64 ? sizeof(uint64_t) : sizeof(uint32_t));
1294 unsigned lengthFieldSize = 0;
1295 uint64_t augmentationDataLength =
1296 llvm::decodeULEB128(frameData + augmentationDataLengthFieldInFDE,
1297 &lengthFieldSize);
1298
1299 if (cieInfo._offsetOfLSDA != ~0U && augmentationDataLength > 0) {
1300
1301 // Look at the augmentation data field.
1302 uint64_t augmentationDataFieldInFDE =
1303 augmentationDataLengthFieldInFDE + lengthFieldSize;
1304
1305 int64_t lsdaFromFDE = readSPtr(is64, isBig,
1306 frameData + augmentationDataFieldInFDE);
1307 uint64_t lsdaStart =
1308 ehFrameSection->address + offset + augmentationDataFieldInFDE +
1309 lsdaFromFDE;
1310
1311 verifyOrAddReference(lsdaStart,
1312 handler.unwindRefToFunctionKind(),
1313 augmentationDataFieldInFDE, true);
1314 }
1315 }
1316
1317 return llvm::Error::success();
1318 }
1319
addEHFrameReferences(const NormalizedFile & normalizedFile,MachOFile & file,mach_o::ArchHandler & handler)1320 llvm::Error addEHFrameReferences(const NormalizedFile &normalizedFile,
1321 MachOFile &file,
1322 mach_o::ArchHandler &handler) {
1323
1324 const Section *ehFrameSection = nullptr;
1325 for (auto §ion : normalizedFile.sections)
1326 if (section.segmentName == "__TEXT" &&
1327 section.sectionName == "__eh_frame") {
1328 ehFrameSection = §ion;
1329 break;
1330 }
1331
1332 // No __eh_frame so nothing to do.
1333 if (!ehFrameSection)
1334 return llvm::Error::success();
1335
1336 llvm::Error ehFrameErr = llvm::Error::success();
1337 CIEInfoMap cieInfos;
1338
1339 file.eachAtomInSection(*ehFrameSection,
1340 [&](MachODefinedAtom *atom, uint64_t offset) -> void {
1341 assert(atom->contentType() == DefinedAtom::typeCFI);
1342
1343 // Bail out if we've encountered an error.
1344 if (ehFrameErr)
1345 return;
1346
1347 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
1348 if (ArchHandler::isDwarfCIE(isBig, atom))
1349 ehFrameErr = processCIE(normalizedFile, file, handler, ehFrameSection,
1350 atom, offset, cieInfos);
1351 else
1352 ehFrameErr = processFDE(normalizedFile, file, handler, ehFrameSection,
1353 atom, offset, cieInfos);
1354 });
1355
1356 return ehFrameErr;
1357 }
1358
parseObjCImageInfo(const Section & sect,const NormalizedFile & normalizedFile,MachOFile & file)1359 llvm::Error parseObjCImageInfo(const Section §,
1360 const NormalizedFile &normalizedFile,
1361 MachOFile &file) {
1362
1363 // struct objc_image_info {
1364 // uint32_t version; // initially 0
1365 // uint32_t flags;
1366 // };
1367
1368 ArrayRef<uint8_t> content = sect.content;
1369 if (content.size() != 8)
1370 return llvm::make_error<GenericError>(sect.segmentName + "/" +
1371 sect.sectionName +
1372 " in file " + file.path() +
1373 " should be 8 bytes in size");
1374
1375 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
1376 uint32_t version = read32(content.data(), isBig);
1377 if (version)
1378 return llvm::make_error<GenericError>(sect.segmentName + "/" +
1379 sect.sectionName +
1380 " in file " + file.path() +
1381 " should have version=0");
1382
1383 uint32_t flags = read32(content.data() + 4, isBig);
1384 if (flags & (MachOLinkingContext::objc_supports_gc |
1385 MachOLinkingContext::objc_gc_only))
1386 return llvm::make_error<GenericError>(sect.segmentName + "/" +
1387 sect.sectionName +
1388 " in file " + file.path() +
1389 " uses GC. This is not supported");
1390
1391 if (flags & MachOLinkingContext::objc_retainReleaseForSimulator)
1392 file.setObjcConstraint(MachOLinkingContext::objc_retainReleaseForSimulator);
1393 else
1394 file.setObjcConstraint(MachOLinkingContext::objc_retainRelease);
1395
1396 file.setSwiftVersion((flags >> 8) & 0xFF);
1397
1398 return llvm::Error::success();
1399 }
1400
1401 /// Converts normalized mach-o file into an lld::File and lld::Atoms.
1402 llvm::Expected<std::unique_ptr<lld::File>>
objectToAtoms(const NormalizedFile & normalizedFile,StringRef path,bool copyRefs)1403 objectToAtoms(const NormalizedFile &normalizedFile, StringRef path,
1404 bool copyRefs) {
1405 auto file = std::make_unique<MachOFile>(path);
1406 if (auto ec = normalizedObjectToAtoms(file.get(), normalizedFile, copyRefs))
1407 return std::move(ec);
1408 return std::unique_ptr<File>(std::move(file));
1409 }
1410
1411 llvm::Expected<std::unique_ptr<lld::File>>
dylibToAtoms(const NormalizedFile & normalizedFile,StringRef path,bool copyRefs)1412 dylibToAtoms(const NormalizedFile &normalizedFile, StringRef path,
1413 bool copyRefs) {
1414 // Instantiate SharedLibraryFile object.
1415 auto file = std::make_unique<MachODylibFile>(path);
1416 if (auto ec = normalizedDylibToAtoms(file.get(), normalizedFile, copyRefs))
1417 return std::move(ec);
1418 return std::unique_ptr<File>(std::move(file));
1419 }
1420
1421 } // anonymous namespace
1422
1423 namespace normalized {
1424
isObjCImageInfo(const Section & sect)1425 static bool isObjCImageInfo(const Section §) {
1426 return (sect.segmentName == "__OBJC" && sect.sectionName == "__image_info") ||
1427 (sect.segmentName == "__DATA" && sect.sectionName == "__objc_imageinfo");
1428 }
1429
1430 llvm::Error
normalizedObjectToAtoms(MachOFile * file,const NormalizedFile & normalizedFile,bool copyRefs)1431 normalizedObjectToAtoms(MachOFile *file,
1432 const NormalizedFile &normalizedFile,
1433 bool copyRefs) {
1434 LLVM_DEBUG(llvm::dbgs() << "******** Normalizing file to atoms: "
1435 << file->path() << "\n");
1436 bool scatterable = ((normalizedFile.flags & MH_SUBSECTIONS_VIA_SYMBOLS) != 0);
1437
1438 // Create atoms from each section.
1439 for (auto § : normalizedFile.sections) {
1440
1441 // If this is a debug-info section parse it specially.
1442 if (isDebugInfoSection(sect))
1443 continue;
1444
1445 // If the file contains an objc_image_info struct, then we should parse the
1446 // ObjC flags and Swift version.
1447 if (isObjCImageInfo(sect)) {
1448 if (auto ec = parseObjCImageInfo(sect, normalizedFile, *file))
1449 return ec;
1450 // We then skip adding atoms for this section as we use the ObjCPass to
1451 // re-emit this data after it has been aggregated for all files.
1452 continue;
1453 }
1454
1455 bool customSectionName;
1456 DefinedAtom::ContentType atomType = atomTypeFromSection(sect,
1457 customSectionName);
1458 if (auto ec = processSection(atomType, sect, customSectionName,
1459 normalizedFile, *file, scatterable, copyRefs))
1460 return ec;
1461 }
1462 // Create atoms from undefined symbols.
1463 for (auto &sym : normalizedFile.undefinedSymbols) {
1464 // Undefined symbols with n_value != 0 are actually tentative definitions.
1465 if (sym.value == Hex64(0)) {
1466 file->addUndefinedAtom(sym.name, copyRefs);
1467 } else {
1468 file->addTentativeDefAtom(sym.name, atomScope(sym.scope), sym.value,
1469 DefinedAtom::Alignment(1 << (sym.desc >> 8)),
1470 copyRefs);
1471 }
1472 }
1473
1474 // Convert mach-o relocations to References
1475 std::unique_ptr<mach_o::ArchHandler> handler
1476 = ArchHandler::create(normalizedFile.arch);
1477 for (auto § : normalizedFile.sections) {
1478 if (isDebugInfoSection(sect))
1479 continue;
1480 if (llvm::Error ec = convertRelocs(sect, normalizedFile, scatterable,
1481 *file, *handler))
1482 return ec;
1483 }
1484
1485 // Add additional arch-specific References
1486 file->eachDefinedAtom([&](MachODefinedAtom* atom) -> void {
1487 handler->addAdditionalReferences(*atom);
1488 });
1489
1490 // Each __eh_frame section needs references to both __text (the function we're
1491 // providing unwind info for) and itself (FDE -> CIE). These aren't
1492 // represented in the relocations on some architectures, so we have to add
1493 // them back in manually there.
1494 if (auto ec = addEHFrameReferences(normalizedFile, *file, *handler))
1495 return ec;
1496
1497 // Process mach-o data-in-code regions array. That information is encoded in
1498 // atoms as References at each transition point.
1499 unsigned nextIndex = 0;
1500 for (const DataInCode &entry : normalizedFile.dataInCode) {
1501 ++nextIndex;
1502 const Section* s = findSectionCoveringAddress(normalizedFile, entry.offset);
1503 if (!s) {
1504 return llvm::make_error<GenericError>(Twine("LC_DATA_IN_CODE address ("
1505 + Twine(entry.offset)
1506 + ") is not in any section"));
1507 }
1508 uint64_t offsetInSect = entry.offset - s->address;
1509 uint32_t offsetInAtom;
1510 MachODefinedAtom *atom = file->findAtomCoveringAddress(*s, offsetInSect,
1511 &offsetInAtom);
1512 if (offsetInAtom + entry.length > atom->size()) {
1513 return llvm::make_error<GenericError>(Twine("LC_DATA_IN_CODE entry "
1514 "(offset="
1515 + Twine(entry.offset)
1516 + ", length="
1517 + Twine(entry.length)
1518 + ") crosses atom boundary."));
1519 }
1520 // Add reference that marks start of data-in-code.
1521 atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(),
1522 handler->dataInCodeTransitionStart(*atom),
1523 offsetInAtom, atom, entry.kind);
1524
1525 // Peek at next entry, if it starts where this one ends, skip ending ref.
1526 if (nextIndex < normalizedFile.dataInCode.size()) {
1527 const DataInCode &nextEntry = normalizedFile.dataInCode[nextIndex];
1528 if (nextEntry.offset == (entry.offset + entry.length))
1529 continue;
1530 }
1531
1532 // If data goes to end of function, skip ending ref.
1533 if ((offsetInAtom + entry.length) == atom->size())
1534 continue;
1535
1536 // Add reference that marks end of data-in-code.
1537 atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(),
1538 handler->dataInCodeTransitionEnd(*atom),
1539 offsetInAtom+entry.length, atom, 0);
1540 }
1541
1542 // Cache some attributes on the file for use later.
1543 file->setFlags(normalizedFile.flags);
1544 file->setArch(normalizedFile.arch);
1545 file->setOS(normalizedFile.os);
1546 file->setMinVersion(normalizedFile.minOSverson);
1547 file->setMinVersionLoadCommandKind(normalizedFile.minOSVersionKind);
1548
1549 // Sort references in each atom to their canonical order.
1550 for (const DefinedAtom* defAtom : file->defined()) {
1551 reinterpret_cast<const SimpleDefinedAtom*>(defAtom)->sortReferences();
1552 }
1553
1554 if (auto err = parseDebugInfo(*file, normalizedFile, copyRefs))
1555 return err;
1556
1557 return llvm::Error::success();
1558 }
1559
1560 llvm::Error
normalizedDylibToAtoms(MachODylibFile * file,const NormalizedFile & normalizedFile,bool copyRefs)1561 normalizedDylibToAtoms(MachODylibFile *file,
1562 const NormalizedFile &normalizedFile,
1563 bool copyRefs) {
1564 file->setInstallName(normalizedFile.installName);
1565 file->setCompatVersion(normalizedFile.compatVersion);
1566 file->setCurrentVersion(normalizedFile.currentVersion);
1567
1568 // Tell MachODylibFile object about all symbols it exports.
1569 if (!normalizedFile.exportInfo.empty()) {
1570 // If exports trie exists, use it instead of traditional symbol table.
1571 for (const Export &exp : normalizedFile.exportInfo) {
1572 bool weakDef = (exp.flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION);
1573 // StringRefs from export iterator are ephemeral, so force copy.
1574 file->addExportedSymbol(exp.name, weakDef, true);
1575 }
1576 } else {
1577 for (auto &sym : normalizedFile.globalSymbols) {
1578 assert((sym.scope & N_EXT) && "only expect external symbols here");
1579 bool weakDef = (sym.desc & N_WEAK_DEF);
1580 file->addExportedSymbol(sym.name, weakDef, copyRefs);
1581 }
1582 }
1583 // Tell MachODylibFile object about all dylibs it re-exports.
1584 for (const DependentDylib &dep : normalizedFile.dependentDylibs) {
1585 if (dep.kind == llvm::MachO::LC_REEXPORT_DYLIB)
1586 file->addReExportedDylib(dep.path);
1587 }
1588 return llvm::Error::success();
1589 }
1590
relocatableSectionInfoForContentType(DefinedAtom::ContentType atomType,StringRef & segmentName,StringRef & sectionName,SectionType & sectionType,SectionAttr & sectionAttrs,bool & relocsToDefinedCanBeImplicit)1591 void relocatableSectionInfoForContentType(DefinedAtom::ContentType atomType,
1592 StringRef &segmentName,
1593 StringRef §ionName,
1594 SectionType §ionType,
1595 SectionAttr §ionAttrs,
1596 bool &relocsToDefinedCanBeImplicit) {
1597
1598 for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ;
1599 p->atomType != DefinedAtom::typeUnknown; ++p) {
1600 if (p->atomType != atomType)
1601 continue;
1602 // Wild carded entries are ignored for reverse lookups.
1603 if (p->segmentName.empty() || p->sectionName.empty())
1604 continue;
1605 segmentName = p->segmentName;
1606 sectionName = p->sectionName;
1607 sectionType = p->sectionType;
1608 sectionAttrs = 0;
1609 relocsToDefinedCanBeImplicit = false;
1610 if (atomType == DefinedAtom::typeCode)
1611 sectionAttrs = S_ATTR_PURE_INSTRUCTIONS;
1612 if (atomType == DefinedAtom::typeCFI)
1613 relocsToDefinedCanBeImplicit = true;
1614 return;
1615 }
1616 llvm_unreachable("content type not yet supported");
1617 }
1618
1619 llvm::Expected<std::unique_ptr<lld::File>>
normalizedToAtoms(const NormalizedFile & normalizedFile,StringRef path,bool copyRefs)1620 normalizedToAtoms(const NormalizedFile &normalizedFile, StringRef path,
1621 bool copyRefs) {
1622 switch (normalizedFile.fileType) {
1623 case MH_DYLIB:
1624 case MH_DYLIB_STUB:
1625 return dylibToAtoms(normalizedFile, path, copyRefs);
1626 case MH_OBJECT:
1627 return objectToAtoms(normalizedFile, path, copyRefs);
1628 default:
1629 llvm_unreachable("unhandled MachO file type!");
1630 }
1631 }
1632
1633 } // namespace normalized
1634 } // namespace mach_o
1635 } // namespace lld
1636