1 //===-- LVBinaryReader.cpp ------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This implements the LVBinaryReader class.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "llvm/DebugInfo/LogicalView/Readers/LVBinaryReader.h"
14 #include "llvm/Support/Errc.h"
15 #include "llvm/Support/FormatAdapters.h"
16 #include "llvm/Support/FormatVariadic.h"
17
18 using namespace llvm;
19 using namespace llvm::logicalview;
20
21 #define DEBUG_TYPE "BinaryReader"
22
23 // Function names extracted from the object symbol table.
add(StringRef Name,LVScope * Function,LVSectionIndex SectionIndex)24 void LVSymbolTable::add(StringRef Name, LVScope *Function,
25 LVSectionIndex SectionIndex) {
26 std::string SymbolName(Name);
27 if (SymbolNames.find(SymbolName) == SymbolNames.end()) {
28 SymbolNames.emplace(
29 std::piecewise_construct, std::forward_as_tuple(SymbolName),
30 std::forward_as_tuple(Function, 0, SectionIndex, false));
31 } else {
32 // Update a recorded entry with its logical scope and section index.
33 SymbolNames[SymbolName].Scope = Function;
34 if (SectionIndex)
35 SymbolNames[SymbolName].SectionIndex = SectionIndex;
36 }
37
38 if (Function && SymbolNames[SymbolName].IsComdat)
39 Function->setIsComdat();
40
41 LLVM_DEBUG({ print(dbgs()); });
42 }
43
add(StringRef Name,LVAddress Address,LVSectionIndex SectionIndex,bool IsComdat)44 void LVSymbolTable::add(StringRef Name, LVAddress Address,
45 LVSectionIndex SectionIndex, bool IsComdat) {
46 std::string SymbolName(Name);
47 if (SymbolNames.find(SymbolName) == SymbolNames.end())
48 SymbolNames.emplace(
49 std::piecewise_construct, std::forward_as_tuple(SymbolName),
50 std::forward_as_tuple(nullptr, Address, SectionIndex, IsComdat));
51 else
52 // Update a recorded symbol name with its logical scope.
53 SymbolNames[SymbolName].Address = Address;
54
55 LVScope *Function = SymbolNames[SymbolName].Scope;
56 if (Function && IsComdat)
57 Function->setIsComdat();
58 LLVM_DEBUG({ print(dbgs()); });
59 }
60
update(LVScope * Function)61 LVSectionIndex LVSymbolTable::update(LVScope *Function) {
62 LVSectionIndex SectionIndex = getReader().getDotTextSectionIndex();
63 StringRef Name = Function->getLinkageName();
64 if (Name.empty())
65 Name = Function->getName();
66 std::string SymbolName(Name);
67
68 if (SymbolName.empty() || (SymbolNames.find(SymbolName) == SymbolNames.end()))
69 return SectionIndex;
70
71 // Update a recorded entry with its logical scope, only if the scope has
72 // ranges. That is the case when in DWARF there are 2 DIEs connected via
73 // the DW_AT_specification.
74 if (Function->getHasRanges()) {
75 SymbolNames[SymbolName].Scope = Function;
76 SectionIndex = SymbolNames[SymbolName].SectionIndex;
77 } else {
78 SectionIndex = UndefinedSectionIndex;
79 }
80
81 if (SymbolNames[SymbolName].IsComdat)
82 Function->setIsComdat();
83
84 LLVM_DEBUG({ print(dbgs()); });
85 return SectionIndex;
86 }
87
getEntry(StringRef Name)88 const LVSymbolTableEntry &LVSymbolTable::getEntry(StringRef Name) {
89 static LVSymbolTableEntry Empty = LVSymbolTableEntry();
90 LVSymbolNames::iterator Iter = SymbolNames.find(std::string(Name));
91 return Iter != SymbolNames.end() ? Iter->second : Empty;
92 }
getAddress(StringRef Name)93 LVAddress LVSymbolTable::getAddress(StringRef Name) {
94 LVSymbolNames::iterator Iter = SymbolNames.find(std::string(Name));
95 return Iter != SymbolNames.end() ? Iter->second.Address : 0;
96 }
getIndex(StringRef Name)97 LVSectionIndex LVSymbolTable::getIndex(StringRef Name) {
98 LVSymbolNames::iterator Iter = SymbolNames.find(std::string(Name));
99 return Iter != SymbolNames.end() ? Iter->second.SectionIndex
100 : getReader().getDotTextSectionIndex();
101 }
getIsComdat(StringRef Name)102 bool LVSymbolTable::getIsComdat(StringRef Name) {
103 LVSymbolNames::iterator Iter = SymbolNames.find(std::string(Name));
104 return Iter != SymbolNames.end() ? Iter->second.IsComdat : false;
105 }
106
print(raw_ostream & OS)107 void LVSymbolTable::print(raw_ostream &OS) {
108 OS << "Symbol Table\n";
109 for (LVSymbolNames::reference Entry : SymbolNames) {
110 LVSymbolTableEntry &SymbolName = Entry.second;
111 LVScope *Scope = SymbolName.Scope;
112 LVOffset Offset = Scope ? Scope->getOffset() : 0;
113 OS << "Index: " << hexValue(SymbolName.SectionIndex, 5)
114 << " Comdat: " << (SymbolName.IsComdat ? "Y" : "N")
115 << " Scope: " << hexValue(Offset)
116 << " Address: " << hexValue(SymbolName.Address)
117 << " Name: " << Entry.first << "\n";
118 }
119 }
120
addToSymbolTable(StringRef Name,LVScope * Function,LVSectionIndex SectionIndex)121 void LVBinaryReader::addToSymbolTable(StringRef Name, LVScope *Function,
122 LVSectionIndex SectionIndex) {
123 SymbolTable.add(Name, Function, SectionIndex);
124 }
addToSymbolTable(StringRef Name,LVAddress Address,LVSectionIndex SectionIndex,bool IsComdat)125 void LVBinaryReader::addToSymbolTable(StringRef Name, LVAddress Address,
126 LVSectionIndex SectionIndex,
127 bool IsComdat) {
128 SymbolTable.add(Name, Address, SectionIndex, IsComdat);
129 }
updateSymbolTable(LVScope * Function)130 LVSectionIndex LVBinaryReader::updateSymbolTable(LVScope *Function) {
131 return SymbolTable.update(Function);
132 }
133
getSymbolTableEntry(StringRef Name)134 const LVSymbolTableEntry &LVBinaryReader::getSymbolTableEntry(StringRef Name) {
135 return SymbolTable.getEntry(Name);
136 }
getSymbolTableAddress(StringRef Name)137 LVAddress LVBinaryReader::getSymbolTableAddress(StringRef Name) {
138 return SymbolTable.getAddress(Name);
139 }
getSymbolTableIndex(StringRef Name)140 LVSectionIndex LVBinaryReader::getSymbolTableIndex(StringRef Name) {
141 return SymbolTable.getIndex(Name);
142 }
getSymbolTableIsComdat(StringRef Name)143 bool LVBinaryReader::getSymbolTableIsComdat(StringRef Name) {
144 return SymbolTable.getIsComdat(Name);
145 }
146
mapVirtualAddress(const object::ObjectFile & Obj)147 void LVBinaryReader::mapVirtualAddress(const object::ObjectFile &Obj) {
148 for (const object::SectionRef &Section : Obj.sections()) {
149 if (!Section.isText() || Section.isVirtual() || !Section.getSize())
150 continue;
151
152 // Record section information required for symbol resolution.
153 // Note: The section index returned by 'getIndex()' is one based.
154 Sections.emplace(Section.getIndex(), Section);
155 addSectionAddress(Section);
156
157 // Identify the ".text" section.
158 Expected<StringRef> SectionNameOrErr = Section.getName();
159 if (!SectionNameOrErr) {
160 consumeError(SectionNameOrErr.takeError());
161 continue;
162 }
163 if ((*SectionNameOrErr).equals(".text") ||
164 (*SectionNameOrErr).equals(".code"))
165 DotTextSectionIndex = Section.getIndex();
166 }
167
168 // Process the symbol table.
169 mapRangeAddress(Obj);
170
171 LLVM_DEBUG({
172 dbgs() << "\nSections Information:\n";
173 for (LVSections::reference Entry : Sections) {
174 LVSectionIndex SectionIndex = Entry.first;
175 const object::SectionRef Section = Entry.second;
176 Expected<StringRef> SectionNameOrErr = Section.getName();
177 if (!SectionNameOrErr)
178 consumeError(SectionNameOrErr.takeError());
179 dbgs() << "\nIndex: " << format_decimal(SectionIndex, 3)
180 << " Name: " << *SectionNameOrErr << "\n"
181 << "Size: " << hexValue(Section.getSize()) << "\n"
182 << "VirtualAddress: " << hexValue(VirtualAddress) << "\n"
183 << "SectionAddress: " << hexValue(Section.getAddress()) << "\n";
184 }
185 dbgs() << "\nObject Section Information:\n";
186 for (LVSectionAddresses::const_reference Entry : SectionAddresses)
187 dbgs() << "[" << hexValue(Entry.first) << ":"
188 << hexValue(Entry.first + Entry.second.getSize())
189 << "] Size: " << hexValue(Entry.second.getSize()) << "\n";
190 });
191 }
192
mapVirtualAddress(const object::COFFObjectFile & COFFObj)193 void LVBinaryReader::mapVirtualAddress(const object::COFFObjectFile &COFFObj) {
194 ErrorOr<uint64_t> ImageBase = COFFObj.getImageBase();
195 if (ImageBase)
196 ImageBaseAddress = ImageBase.get();
197
198 LLVM_DEBUG({
199 dbgs() << "ImageBaseAddress: " << hexValue(ImageBaseAddress) << "\n";
200 });
201
202 uint32_t Flags = COFF::IMAGE_SCN_CNT_CODE | COFF::IMAGE_SCN_LNK_COMDAT;
203
204 for (const object::SectionRef &Section : COFFObj.sections()) {
205 if (!Section.isText() || Section.isVirtual() || !Section.getSize())
206 continue;
207
208 const object::coff_section *COFFSection = COFFObj.getCOFFSection(Section);
209 VirtualAddress = COFFSection->VirtualAddress;
210 bool IsComdat = (COFFSection->Characteristics & Flags) == Flags;
211
212 // Record section information required for symbol resolution.
213 // Note: The section index returned by 'getIndex()' is zero based.
214 Sections.emplace(Section.getIndex() + 1, Section);
215 addSectionAddress(Section);
216
217 // Additional initialization on the specific object format.
218 mapRangeAddress(COFFObj, Section, IsComdat);
219 }
220
221 LLVM_DEBUG({
222 dbgs() << "\nSections Information:\n";
223 for (LVSections::reference Entry : Sections) {
224 LVSectionIndex SectionIndex = Entry.first;
225 const object::SectionRef Section = Entry.second;
226 const object::coff_section *COFFSection = COFFObj.getCOFFSection(Section);
227 Expected<StringRef> SectionNameOrErr = Section.getName();
228 if (!SectionNameOrErr)
229 consumeError(SectionNameOrErr.takeError());
230 dbgs() << "\nIndex: " << format_decimal(SectionIndex, 3)
231 << " Name: " << *SectionNameOrErr << "\n"
232 << "Size: " << hexValue(Section.getSize()) << "\n"
233 << "VirtualAddress: " << hexValue(VirtualAddress) << "\n"
234 << "SectionAddress: " << hexValue(Section.getAddress()) << "\n"
235 << "PointerToRawData: " << hexValue(COFFSection->PointerToRawData)
236 << "\n"
237 << "SizeOfRawData: " << hexValue(COFFSection->SizeOfRawData)
238 << "\n";
239 }
240 dbgs() << "\nObject Section Information:\n";
241 for (LVSectionAddresses::const_reference Entry : SectionAddresses)
242 dbgs() << "[" << hexValue(Entry.first) << ":"
243 << hexValue(Entry.first + Entry.second.getSize())
244 << "] Size: " << hexValue(Entry.second.getSize()) << "\n";
245 });
246 }
247
loadGenericTargetInfo(StringRef TheTriple,StringRef TheFeatures)248 Error LVBinaryReader::loadGenericTargetInfo(StringRef TheTriple,
249 StringRef TheFeatures) {
250 std::string TargetLookupError;
251 const Target *TheTarget =
252 TargetRegistry::lookupTarget(std::string(TheTriple), TargetLookupError);
253 if (!TheTarget)
254 return createStringError(errc::invalid_argument, TargetLookupError.c_str());
255
256 // Register information.
257 MCRegisterInfo *RegisterInfo = TheTarget->createMCRegInfo(TheTriple);
258 if (!RegisterInfo)
259 return createStringError(errc::invalid_argument,
260 "no register info for target " + TheTriple);
261 MRI.reset(RegisterInfo);
262
263 // Assembler properties and features.
264 MCTargetOptions MCOptions;
265 MCAsmInfo *AsmInfo(TheTarget->createMCAsmInfo(*MRI, TheTriple, MCOptions));
266 if (!AsmInfo)
267 return createStringError(errc::invalid_argument,
268 "no assembly info for target " + TheTriple);
269 MAI.reset(AsmInfo);
270
271 // Target subtargets.
272 StringRef CPU;
273 MCSubtargetInfo *SubtargetInfo(
274 TheTarget->createMCSubtargetInfo(TheTriple, CPU, TheFeatures));
275 if (!SubtargetInfo)
276 return createStringError(errc::invalid_argument,
277 "no subtarget info for target " + TheTriple);
278 STI.reset(SubtargetInfo);
279
280 // Instructions Info.
281 MCInstrInfo *InstructionInfo(TheTarget->createMCInstrInfo());
282 if (!InstructionInfo)
283 return createStringError(errc::invalid_argument,
284 "no instruction info for target " + TheTriple);
285 MII.reset(InstructionInfo);
286
287 MC = std::make_unique<MCContext>(Triple(TheTriple), MAI.get(), MRI.get(),
288 STI.get());
289
290 // Assembler.
291 MCDisassembler *DisAsm(TheTarget->createMCDisassembler(*STI, *MC));
292 if (!DisAsm)
293 return createStringError(errc::invalid_argument,
294 "no disassembler for target " + TheTriple);
295 MD.reset(DisAsm);
296
297 MCInstPrinter *InstructionPrinter(TheTarget->createMCInstPrinter(
298 Triple(TheTriple), AsmInfo->getAssemblerDialect(), *MAI, *MII, *MRI));
299 if (!InstructionPrinter)
300 return createStringError(errc::invalid_argument,
301 "no target assembly language printer for target " +
302 TheTriple);
303 MIP.reset(InstructionPrinter);
304 InstructionPrinter->setPrintImmHex(true);
305
306 return Error::success();
307 }
308
309 Expected<std::pair<uint64_t, object::SectionRef>>
getSection(LVScope * Scope,LVAddress Address,LVSectionIndex SectionIndex)310 LVBinaryReader::getSection(LVScope *Scope, LVAddress Address,
311 LVSectionIndex SectionIndex) {
312 // Return the 'text' section with the code for this logical scope.
313 // COFF: SectionIndex is zero. Use 'SectionAddresses' data.
314 // ELF: SectionIndex is the section index in the file.
315 if (SectionIndex) {
316 LVSections::iterator Iter = Sections.find(SectionIndex);
317 if (Iter == Sections.end()) {
318 return createStringError(errc::invalid_argument,
319 "invalid section index for: '%s'",
320 Scope->getName().str().c_str());
321 }
322 const object::SectionRef Section = Iter->second;
323 return std::make_pair(Section.getAddress(), Section);
324 }
325
326 // Ensure a valid starting address for the public names.
327 LVSectionAddresses::const_iterator Iter =
328 SectionAddresses.upper_bound(Address);
329 if (Iter == SectionAddresses.begin())
330 return createStringError(errc::invalid_argument,
331 "invalid section address for: '%s'",
332 Scope->getName().str().c_str());
333
334 // Get section that contains the code for this function.
335 Iter = SectionAddresses.lower_bound(Address);
336 if (Iter != SectionAddresses.begin())
337 --Iter;
338 return std::make_pair(Iter->first, Iter->second);
339 }
340
addSectionRange(LVSectionIndex SectionIndex,LVScope * Scope)341 void LVBinaryReader::addSectionRange(LVSectionIndex SectionIndex,
342 LVScope *Scope) {
343 LVRange *ScopesWithRanges = getSectionRanges(SectionIndex);
344 ScopesWithRanges->addEntry(Scope);
345 }
346
addSectionRange(LVSectionIndex SectionIndex,LVScope * Scope,LVAddress LowerAddress,LVAddress UpperAddress)347 void LVBinaryReader::addSectionRange(LVSectionIndex SectionIndex,
348 LVScope *Scope, LVAddress LowerAddress,
349 LVAddress UpperAddress) {
350 LVRange *ScopesWithRanges = getSectionRanges(SectionIndex);
351 ScopesWithRanges->addEntry(Scope, LowerAddress, UpperAddress);
352 }
353
getSectionRanges(LVSectionIndex SectionIndex)354 LVRange *LVBinaryReader::getSectionRanges(LVSectionIndex SectionIndex) {
355 // Check if we already have a mapping for this section index.
356 LVSectionRanges::iterator IterSection = SectionRanges.find(SectionIndex);
357 if (IterSection == SectionRanges.end())
358 IterSection =
359 SectionRanges.emplace(SectionIndex, std::make_unique<LVRange>()).first;
360 LVRange *Range = IterSection->second.get();
361 assert(Range && "Range is null.");
362 return Range;
363 }
364
createInstructions(LVScope * Scope,LVSectionIndex SectionIndex,const LVNameInfo & NameInfo)365 Error LVBinaryReader::createInstructions(LVScope *Scope,
366 LVSectionIndex SectionIndex,
367 const LVNameInfo &NameInfo) {
368 assert(Scope && "Scope is null.");
369
370 // Skip stripped functions.
371 if (Scope->getIsDiscarded())
372 return Error::success();
373
374 // Find associated address and size for the given function entry point.
375 LVAddress Address = NameInfo.first;
376 uint64_t Size = NameInfo.second;
377
378 LLVM_DEBUG({
379 dbgs() << "\nPublic Name instructions: '" << Scope->getName() << "' / '"
380 << Scope->getLinkageName() << "'\n"
381 << "DIE Offset: " << hexValue(Scope->getOffset()) << " Range: ["
382 << hexValue(Address) << ":" << hexValue(Address + Size) << "]\n";
383 });
384
385 Expected<std::pair<uint64_t, const object::SectionRef>> SectionOrErr =
386 getSection(Scope, Address, SectionIndex);
387 if (!SectionOrErr)
388 return SectionOrErr.takeError();
389 const object::SectionRef Section = (*SectionOrErr).second;
390 uint64_t SectionAddress = (*SectionOrErr).first;
391
392 Expected<StringRef> SectionContentsOrErr = Section.getContents();
393 if (!SectionContentsOrErr)
394 return SectionOrErr.takeError();
395
396 // There are cases where the section size is smaller than the [LowPC,HighPC]
397 // range; it causes us to decode invalid addresses. The recorded size in the
398 // logical scope is one less than the real size.
399 LLVM_DEBUG({
400 dbgs() << " Size: " << hexValue(Size)
401 << ", Section Size: " << hexValue(Section.getSize()) << "\n";
402 });
403 Size = std::min(Size + 1, Section.getSize());
404
405 ArrayRef<uint8_t> Bytes = arrayRefFromStringRef(*SectionContentsOrErr);
406 uint64_t Offset = Address - SectionAddress;
407 uint8_t const *Begin = Bytes.data() + Offset;
408 uint8_t const *End = Bytes.data() + Offset + Size;
409
410 LLVM_DEBUG({
411 Expected<StringRef> SectionNameOrErr = Section.getName();
412 if (!SectionNameOrErr)
413 consumeError(SectionNameOrErr.takeError());
414 else
415 dbgs() << "Section Index: " << hexValue(Section.getIndex()) << " ["
416 << hexValue((uint64_t)Section.getAddress()) << ":"
417 << hexValue((uint64_t)Section.getAddress() + Section.getSize(), 10)
418 << "] Name: '" << *SectionNameOrErr << "'\n"
419 << "Begin: " << hexValue((uint64_t)Begin)
420 << ", End: " << hexValue((uint64_t)End) << "\n";
421 });
422
423 // Address for first instruction line.
424 LVAddress FirstAddress = Address;
425 auto InstructionsSP = std::make_unique<LVLines>();
426 LVLines &Instructions = *InstructionsSP;
427 DiscoveredLines.emplace_back(std::move(InstructionsSP));
428
429 while (Begin < End) {
430 MCInst Instruction;
431 uint64_t BytesConsumed = 0;
432 SmallVector<char, 64> InsnStr;
433 raw_svector_ostream Annotations(InsnStr);
434 MCDisassembler::DecodeStatus const S =
435 MD->getInstruction(Instruction, BytesConsumed,
436 ArrayRef<uint8_t>(Begin, End), Address, outs());
437 switch (S) {
438 case MCDisassembler::Fail:
439 LLVM_DEBUG({ dbgs() << "Invalid instruction\n"; });
440 if (BytesConsumed == 0)
441 // Skip invalid bytes
442 BytesConsumed = 1;
443 break;
444 case MCDisassembler::SoftFail:
445 LLVM_DEBUG({ dbgs() << "Potentially undefined instruction:"; });
446 [[fallthrough]];
447 case MCDisassembler::Success: {
448 std::string Buffer;
449 raw_string_ostream Stream(Buffer);
450 StringRef AnnotationsStr = Annotations.str();
451 MIP->printInst(&Instruction, Address, AnnotationsStr, *STI, Stream);
452 LLVM_DEBUG({
453 std::string BufferCodes;
454 raw_string_ostream StreamCodes(BufferCodes);
455 StreamCodes << format_bytes(
456 ArrayRef<uint8_t>(Begin, Begin + BytesConsumed), std::nullopt, 16,
457 16);
458 dbgs() << "[" << hexValue((uint64_t)Begin) << "] "
459 << "Size: " << format_decimal(BytesConsumed, 2) << " ("
460 << formatv("{0}",
461 fmt_align(StreamCodes.str(), AlignStyle::Left, 32))
462 << ") " << hexValue((uint64_t)Address) << ": " << Stream.str()
463 << "\n";
464 });
465 // Here we add logical lines to the Instructions. Later on,
466 // the 'processLines()' function will move each created logical line
467 // to its enclosing logical scope, using the debug ranges information
468 // and they will be released when its scope parent is deleted.
469 LVLineAssembler *Line = createLineAssembler();
470 Line->setAddress(Address);
471 Line->setName(StringRef(Stream.str()).trim());
472 Instructions.push_back(Line);
473 break;
474 }
475 }
476 Address += BytesConsumed;
477 Begin += BytesConsumed;
478 }
479
480 LLVM_DEBUG({
481 size_t Index = 0;
482 dbgs() << "\nSectionIndex: " << format_decimal(SectionIndex, 3)
483 << " Scope DIE: " << hexValue(Scope->getOffset()) << "\n"
484 << "Address: " << hexValue(FirstAddress)
485 << format(" - Collected instructions lines: %d\n",
486 Instructions.size());
487 for (const LVLine *Line : Instructions)
488 dbgs() << format_decimal(++Index, 5) << ": "
489 << hexValue(Line->getOffset()) << ", (" << Line->getName()
490 << ")\n";
491 });
492
493 // The scope in the assembler names is linked to its own instructions.
494 ScopeInstructions.add(SectionIndex, Scope, &Instructions);
495 AssemblerMappings.add(SectionIndex, FirstAddress, Scope);
496
497 return Error::success();
498 }
499
createInstructions(LVScope * Function,LVSectionIndex SectionIndex)500 Error LVBinaryReader::createInstructions(LVScope *Function,
501 LVSectionIndex SectionIndex) {
502 if (!options().getPrintInstructions())
503 return Error::success();
504
505 LVNameInfo Name = CompileUnit->findPublicName(Function);
506 if (Name.first != LVAddress(UINT64_MAX))
507 return createInstructions(Function, SectionIndex, Name);
508
509 return Error::success();
510 }
511
createInstructions()512 Error LVBinaryReader::createInstructions() {
513 if (!options().getPrintInstructions())
514 return Error::success();
515
516 LLVM_DEBUG({
517 size_t Index = 1;
518 dbgs() << "\nPublic Names (Scope):\n";
519 for (LVPublicNames::const_reference Name : CompileUnit->getPublicNames()) {
520 LVScope *Scope = Name.first;
521 const LVNameInfo &NameInfo = Name.second;
522 LVAddress Address = NameInfo.first;
523 uint64_t Size = NameInfo.second;
524 dbgs() << format_decimal(Index++, 5) << ": "
525 << "DIE Offset: " << hexValue(Scope->getOffset()) << " Range: ["
526 << hexValue(Address) << ":" << hexValue(Address + Size) << "] "
527 << "Name: '" << Scope->getName() << "' / '"
528 << Scope->getLinkageName() << "'\n";
529 }
530 });
531
532 // For each public name in the current compile unit, create the line
533 // records that represent the executable instructions.
534 for (LVPublicNames::const_reference Name : CompileUnit->getPublicNames()) {
535 LVScope *Scope = Name.first;
536 // The symbol table extracted from the object file always contains a
537 // non-empty name (linkage name). However, the logical scope does not
538 // guarantee to have a name for the linkage name (main is one case).
539 // For those cases, set the linkage name the same as the name.
540 if (!Scope->getLinkageNameIndex())
541 Scope->setLinkageName(Scope->getName());
542 LVSectionIndex SectionIndex = getSymbolTableIndex(Scope->getLinkageName());
543 if (Error Err = createInstructions(Scope, SectionIndex, Name.second))
544 return Err;
545 }
546
547 return Error::success();
548 }
549
550 // During the traversal of the debug information sections, we created the
551 // logical lines representing the disassembled instructions from the text
552 // section and the logical lines representing the line records from the
553 // debug line section. Using the ranges associated with the logical scopes,
554 // we will allocate those logical lines to their logical scopes.
processLines(LVLines * DebugLines,LVSectionIndex SectionIndex,LVScope * Function)555 void LVBinaryReader::processLines(LVLines *DebugLines,
556 LVSectionIndex SectionIndex,
557 LVScope *Function) {
558 assert(DebugLines && "DebugLines is null.");
559
560 // Just return if this compilation unit does not have any line records
561 // and no instruction lines were created.
562 if (DebugLines->empty() && !options().getPrintInstructions())
563 return;
564
565 // Merge the debug lines and instruction lines using their text address;
566 // the logical line representing the debug line record is followed by the
567 // line(s) representing the disassembled instructions, whose addresses are
568 // equal or greater that the line address and less than the address of the
569 // next debug line record.
570 LLVM_DEBUG({
571 size_t Index = 1;
572 size_t PerLine = 4;
573 dbgs() << format("\nProcess debug lines: %d\n", DebugLines->size());
574 for (const LVLine *Line : *DebugLines) {
575 dbgs() << format_decimal(Index, 5) << ": " << hexValue(Line->getOffset())
576 << ", (" << Line->getLineNumber() << ")"
577 << ((Index % PerLine) ? " " : "\n");
578 ++Index;
579 }
580 dbgs() << ((Index % PerLine) ? "\n" : "");
581 });
582
583 bool TraverseLines = true;
584 LVLines::iterator Iter = DebugLines->begin();
585 while (TraverseLines && Iter != DebugLines->end()) {
586 uint64_t DebugAddress = (*Iter)->getAddress();
587
588 // Get the function with an entry point that matches this line and
589 // its associated assembler entries. In the case of COMDAT, the input
590 // 'Function' is not null. Use it to find its address ranges.
591 LVScope *Scope = Function;
592 if (!Function) {
593 Scope = AssemblerMappings.find(SectionIndex, DebugAddress);
594 if (!Scope) {
595 ++Iter;
596 continue;
597 }
598 }
599
600 // Get the associated instructions for the found 'Scope'.
601 LVLines InstructionLines;
602 LVLines *Lines = ScopeInstructions.find(SectionIndex, Scope);
603 if (Lines)
604 InstructionLines = std::move(*Lines);
605
606 LLVM_DEBUG({
607 size_t Index = 0;
608 dbgs() << "\nSectionIndex: " << format_decimal(SectionIndex, 3)
609 << " Scope DIE: " << hexValue(Scope->getOffset()) << "\n"
610 << format("Process instruction lines: %d\n",
611 InstructionLines.size());
612 for (const LVLine *Line : InstructionLines)
613 dbgs() << format_decimal(++Index, 5) << ": "
614 << hexValue(Line->getOffset()) << ", (" << Line->getName()
615 << ")\n";
616 });
617
618 // Continue with next debug line if there are not instructions lines.
619 if (InstructionLines.empty()) {
620 ++Iter;
621 continue;
622 }
623
624 for (LVLine *InstructionLine : InstructionLines) {
625 uint64_t InstructionAddress = InstructionLine->getAddress();
626 LLVM_DEBUG({
627 dbgs() << "Instruction address: " << hexValue(InstructionAddress)
628 << "\n";
629 });
630 if (TraverseLines) {
631 while (Iter != DebugLines->end()) {
632 DebugAddress = (*Iter)->getAddress();
633 LLVM_DEBUG({
634 bool IsDebug = (*Iter)->getIsLineDebug();
635 dbgs() << "Line " << (IsDebug ? "dbg:" : "ins:") << " ["
636 << hexValue(DebugAddress) << "]";
637 if (IsDebug)
638 dbgs() << format(" %d", (*Iter)->getLineNumber());
639 dbgs() << "\n";
640 });
641 // Instruction address before debug line.
642 if (InstructionAddress < DebugAddress) {
643 LLVM_DEBUG({
644 dbgs() << "Inserted instruction address: "
645 << hexValue(InstructionAddress) << " before line: "
646 << format("%d", (*Iter)->getLineNumber()) << " ["
647 << hexValue(DebugAddress) << "]\n";
648 });
649 Iter = DebugLines->insert(Iter, InstructionLine);
650 // The returned iterator points to the inserted instruction.
651 // Skip it and point to the line acting as reference.
652 ++Iter;
653 break;
654 }
655 ++Iter;
656 }
657 if (Iter == DebugLines->end()) {
658 // We have reached the end of the source lines and the current
659 // instruction line address is greater than the last source line.
660 TraverseLines = false;
661 DebugLines->push_back(InstructionLine);
662 }
663 } else {
664 DebugLines->push_back(InstructionLine);
665 }
666 }
667 }
668
669 LLVM_DEBUG({
670 dbgs() << format("Lines after merge: %d\n", DebugLines->size());
671 size_t Index = 0;
672 for (const LVLine *Line : *DebugLines) {
673 dbgs() << format_decimal(++Index, 5) << ": "
674 << hexValue(Line->getOffset()) << ", ("
675 << ((Line->getIsLineDebug())
676 ? Line->lineNumberAsStringStripped(/*ShowZero=*/true)
677 : Line->getName())
678 << ")\n";
679 }
680 });
681
682 // If this compilation unit does not have line records, traverse its scopes
683 // and take any collected instruction lines as the working set in order
684 // to move them to their associated scope.
685 if (DebugLines->empty()) {
686 if (const LVScopes *Scopes = CompileUnit->getScopes())
687 for (LVScope *Scope : *Scopes) {
688 LVLines *Lines = ScopeInstructions.find(Scope);
689 if (Lines) {
690
691 LLVM_DEBUG({
692 size_t Index = 0;
693 dbgs() << "\nSectionIndex: " << format_decimal(SectionIndex, 3)
694 << " Scope DIE: " << hexValue(Scope->getOffset()) << "\n"
695 << format("Instruction lines: %d\n", Lines->size());
696 for (const LVLine *Line : *Lines)
697 dbgs() << format_decimal(++Index, 5) << ": "
698 << hexValue(Line->getOffset()) << ", (" << Line->getName()
699 << ")\n";
700 });
701
702 if (Scope->getIsArtificial()) {
703 // Add the instruction lines to their artificial scope.
704 for (LVLine *Line : *Lines)
705 Scope->addElement(Line);
706 } else {
707 DebugLines->append(*Lines);
708 }
709 Lines->clear();
710 }
711 }
712 }
713
714 LVRange *ScopesWithRanges = getSectionRanges(SectionIndex);
715 ScopesWithRanges->startSearch();
716
717 // Process collected lines.
718 LVScope *Scope;
719 for (LVLine *Line : *DebugLines) {
720 // Using the current line address, get its associated lexical scope and
721 // add the line information to it.
722 Scope = ScopesWithRanges->getEntry(Line->getAddress());
723 if (!Scope) {
724 // If missing scope, use the compile unit.
725 Scope = CompileUnit;
726 LLVM_DEBUG({
727 dbgs() << "Adding line to CU: " << hexValue(Line->getOffset()) << ", ("
728 << ((Line->getIsLineDebug())
729 ? Line->lineNumberAsStringStripped(/*ShowZero=*/true)
730 : Line->getName())
731 << ")\n";
732 });
733 }
734
735 // Add line object to scope.
736 Scope->addElement(Line);
737
738 // Report any line zero.
739 if (options().getWarningLines() && Line->getIsLineDebug() &&
740 !Line->getLineNumber())
741 CompileUnit->addLineZero(Line);
742
743 // Some compilers generate ranges in the compile unit; other compilers
744 // only DW_AT_low_pc/DW_AT_high_pc. In order to correctly map global
745 // variables, we need to generate the map ranges for the compile unit.
746 // If we use the ranges stored at the scope level, there are cases where
747 // the address referenced by a symbol location, is not in the enclosing
748 // scope, but in an outer one. By using the ranges stored in the compile
749 // unit, we can catch all those addresses.
750 if (Line->getIsLineDebug())
751 CompileUnit->addMapping(Line, SectionIndex);
752
753 // Resolve any given pattern.
754 patterns().resolvePatternMatch(Line);
755 }
756
757 ScopesWithRanges->endSearch();
758 }
759
processLines(LVLines * DebugLines,LVSectionIndex SectionIndex)760 void LVBinaryReader::processLines(LVLines *DebugLines,
761 LVSectionIndex SectionIndex) {
762 assert(DebugLines && "DebugLines is null.");
763 if (DebugLines->empty() && !ScopeInstructions.findMap(SectionIndex))
764 return;
765
766 // If the Compile Unit does not contain comdat functions, use the whole
767 // set of debug lines, as the addresses don't have conflicts.
768 if (!CompileUnit->getHasComdatScopes()) {
769 processLines(DebugLines, SectionIndex, nullptr);
770 return;
771 }
772
773 // Find the indexes for the lines whose address is zero.
774 std::vector<size_t> AddressZero;
775 LVLines::iterator It =
776 std::find_if(std::begin(*DebugLines), std::end(*DebugLines),
777 [](LVLine *Line) { return !Line->getAddress(); });
778 while (It != std::end(*DebugLines)) {
779 AddressZero.emplace_back(std::distance(std::begin(*DebugLines), It));
780 It = std::find_if(std::next(It), std::end(*DebugLines),
781 [](LVLine *Line) { return !Line->getAddress(); });
782 }
783
784 // If the set of debug lines does not contain any line with address zero,
785 // use the whole set. It means we are dealing with an initialization
786 // section from a fully linked binary.
787 if (AddressZero.empty()) {
788 processLines(DebugLines, SectionIndex, nullptr);
789 return;
790 }
791
792 // The Compile unit contains comdat functions. Traverse the collected
793 // debug lines and identify logical groups based on their start and
794 // address. Each group starts with a zero address.
795 // Begin, End, Address, IsDone.
796 using LVBucket = std::tuple<size_t, size_t, LVAddress, bool>;
797 std::vector<LVBucket> Buckets;
798
799 LVAddress Address;
800 size_t Begin = 0;
801 size_t End = 0;
802 size_t Index = 0;
803 for (Index = 0; Index < AddressZero.size() - 1; ++Index) {
804 Begin = AddressZero[Index];
805 End = AddressZero[Index + 1] - 1;
806 Address = (*DebugLines)[End]->getAddress();
807 Buckets.emplace_back(Begin, End, Address, false);
808 }
809
810 // Add the last bucket.
811 if (Index) {
812 Begin = AddressZero[Index];
813 End = DebugLines->size() - 1;
814 Address = (*DebugLines)[End]->getAddress();
815 Buckets.emplace_back(Begin, End, Address, false);
816 }
817
818 LLVM_DEBUG({
819 dbgs() << "\nDebug Lines buckets: " << Buckets.size() << "\n";
820 for (LVBucket &Bucket : Buckets) {
821 dbgs() << "Begin: " << format_decimal(std::get<0>(Bucket), 5) << ", "
822 << "End: " << format_decimal(std::get<1>(Bucket), 5) << ", "
823 << "Address: " << hexValue(std::get<2>(Bucket)) << "\n";
824 }
825 });
826
827 // Traverse the sections and buckets looking for matches on the section
828 // sizes. In the unlikely event of different buckets with the same size
829 // process them in order and mark them as done.
830 LVLines Group;
831 for (LVSections::reference Entry : Sections) {
832 LVSectionIndex SectionIndex = Entry.first;
833 const object::SectionRef Section = Entry.second;
834 uint64_t Size = Section.getSize();
835 LLVM_DEBUG({
836 dbgs() << "\nSection Index: " << format_decimal(SectionIndex, 3)
837 << " , Section Size: " << hexValue(Section.getSize())
838 << " , Section Address: " << hexValue(Section.getAddress())
839 << "\n";
840 });
841
842 for (LVBucket &Bucket : Buckets) {
843 if (std::get<3>(Bucket))
844 // Already done for previous section.
845 continue;
846 if (Size == std::get<2>(Bucket)) {
847 // We have a match on the section size.
848 Group.clear();
849 LVLines::iterator IterStart = DebugLines->begin() + std::get<0>(Bucket);
850 LVLines::iterator IterEnd =
851 DebugLines->begin() + std::get<1>(Bucket) + 1;
852 for (LVLines::iterator Iter = IterStart; Iter < IterEnd; ++Iter)
853 Group.push_back(*Iter);
854 processLines(&Group, SectionIndex, /*Function=*/nullptr);
855 std::get<3>(Bucket) = true;
856 break;
857 }
858 }
859 }
860 }
861
862 // Traverse the scopes for the given 'Function' looking for any inlined
863 // scopes with inlined lines, which are found in 'CUInlineeLines'.
includeInlineeLines(LVSectionIndex SectionIndex,LVScope * Function)864 void LVBinaryReader::includeInlineeLines(LVSectionIndex SectionIndex,
865 LVScope *Function) {
866 SmallVector<LVInlineeLine::iterator> InlineeIters;
867 std::function<void(LVScope * Parent)> FindInlinedScopes =
868 [&](LVScope *Parent) {
869 if (const LVScopes *Scopes = Parent->getScopes())
870 for (LVScope *Scope : *Scopes) {
871 LVInlineeLine::iterator Iter = CUInlineeLines.find(Scope);
872 if (Iter != CUInlineeLines.end())
873 InlineeIters.push_back(Iter);
874 FindInlinedScopes(Scope);
875 }
876 };
877
878 // Find all inlined scopes for the given 'Function'.
879 FindInlinedScopes(Function);
880 for (LVInlineeLine::iterator InlineeIter : InlineeIters) {
881 LVScope *Scope = InlineeIter->first;
882 addToSymbolTable(Scope->getLinkageName(), Scope, SectionIndex);
883
884 // TODO: Convert this into a reference.
885 LVLines *InlineeLines = InlineeIter->second.get();
886 LLVM_DEBUG({
887 dbgs() << "Inlined lines for: " << Scope->getName() << "\n";
888 for (const LVLine *Line : *InlineeLines)
889 dbgs() << "[" << hexValue(Line->getAddress()) << "] "
890 << Line->getLineNumber() << "\n";
891 dbgs() << format("Debug lines: %d\n", CULines.size());
892 for (const LVLine *Line : CULines)
893 dbgs() << "Line address: " << hexValue(Line->getOffset()) << ", ("
894 << Line->getLineNumber() << ")\n";
895 ;
896 });
897
898 // The inlined lines must be merged using its address, in order to keep
899 // the real order of the instructions. The inlined lines are mixed with
900 // the other non-inlined lines.
901 if (InlineeLines->size()) {
902 // First address of inlinee code.
903 uint64_t InlineeStart = (InlineeLines->front())->getAddress();
904 LVLines::iterator Iter = std::find_if(
905 CULines.begin(), CULines.end(), [&](LVLine *Item) -> bool {
906 return Item->getAddress() == InlineeStart;
907 });
908 if (Iter != CULines.end()) {
909 // 'Iter' points to the line where the inlined function is called.
910 // Emulate the DW_AT_call_line attribute.
911 Scope->setCallLineNumber((*Iter)->getLineNumber());
912 // Mark the referenced line as the start of the inlined function.
913 // Skip the first line during the insertion, as the address and
914 // line number as the same. Otherwise we have to erase and insert.
915 (*Iter)->setLineNumber((*InlineeLines->begin())->getLineNumber());
916 ++Iter;
917 CULines.insert(Iter, InlineeLines->begin() + 1, InlineeLines->end());
918 }
919 }
920
921 // Remove this set of lines from the container; each inlined function
922 // creates an unique set of lines. Remove only the created container.
923 CUInlineeLines.erase(InlineeIter);
924 InlineeLines->clear();
925 }
926 LLVM_DEBUG({
927 dbgs() << "Merged Inlined lines for: " << Function->getName() << "\n";
928 dbgs() << format("Debug lines: %d\n", CULines.size());
929 for (const LVLine *Line : CULines)
930 dbgs() << "Line address: " << hexValue(Line->getOffset()) << ", ("
931 << Line->getLineNumber() << ")\n";
932 ;
933 });
934 }
935
print(raw_ostream & OS) const936 void LVBinaryReader::print(raw_ostream &OS) const {
937 OS << "LVBinaryReader\n";
938 LLVM_DEBUG(dbgs() << "PrintReader\n");
939 }
940