1 //===-- LVBinaryReader.cpp ------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This implements the LVBinaryReader class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/DebugInfo/LogicalView/Readers/LVBinaryReader.h"
14 #include "llvm/Support/Errc.h"
15 #include "llvm/Support/FormatAdapters.h"
16 #include "llvm/Support/FormatVariadic.h"
17 
18 using namespace llvm;
19 using namespace llvm::logicalview;
20 
21 #define DEBUG_TYPE "BinaryReader"
22 
23 // Function names extracted from the object symbol table.
add(StringRef Name,LVScope * Function,LVSectionIndex SectionIndex)24 void LVSymbolTable::add(StringRef Name, LVScope *Function,
25                         LVSectionIndex SectionIndex) {
26   std::string SymbolName(Name);
27   if (SymbolNames.find(SymbolName) == SymbolNames.end()) {
28     SymbolNames.emplace(
29         std::piecewise_construct, std::forward_as_tuple(SymbolName),
30         std::forward_as_tuple(Function, 0, SectionIndex, false));
31   } else {
32     // Update a recorded entry with its logical scope and section index.
33     SymbolNames[SymbolName].Scope = Function;
34     if (SectionIndex)
35       SymbolNames[SymbolName].SectionIndex = SectionIndex;
36   }
37 
38   if (Function && SymbolNames[SymbolName].IsComdat)
39     Function->setIsComdat();
40 
41   LLVM_DEBUG({ print(dbgs()); });
42 }
43 
add(StringRef Name,LVAddress Address,LVSectionIndex SectionIndex,bool IsComdat)44 void LVSymbolTable::add(StringRef Name, LVAddress Address,
45                         LVSectionIndex SectionIndex, bool IsComdat) {
46   std::string SymbolName(Name);
47   if (SymbolNames.find(SymbolName) == SymbolNames.end())
48     SymbolNames.emplace(
49         std::piecewise_construct, std::forward_as_tuple(SymbolName),
50         std::forward_as_tuple(nullptr, Address, SectionIndex, IsComdat));
51   else
52     // Update a recorded symbol name with its logical scope.
53     SymbolNames[SymbolName].Address = Address;
54 
55   LVScope *Function = SymbolNames[SymbolName].Scope;
56   if (Function && IsComdat)
57     Function->setIsComdat();
58   LLVM_DEBUG({ print(dbgs()); });
59 }
60 
update(LVScope * Function)61 LVSectionIndex LVSymbolTable::update(LVScope *Function) {
62   LVSectionIndex SectionIndex = getReader().getDotTextSectionIndex();
63   StringRef Name = Function->getLinkageName();
64   if (Name.empty())
65     Name = Function->getName();
66   std::string SymbolName(Name);
67 
68   if (SymbolName.empty() || (SymbolNames.find(SymbolName) == SymbolNames.end()))
69     return SectionIndex;
70 
71   // Update a recorded entry with its logical scope, only if the scope has
72   // ranges. That is the case when in DWARF there are 2 DIEs connected via
73   // the DW_AT_specification.
74   if (Function->getHasRanges()) {
75     SymbolNames[SymbolName].Scope = Function;
76     SectionIndex = SymbolNames[SymbolName].SectionIndex;
77   } else {
78     SectionIndex = UndefinedSectionIndex;
79   }
80 
81   if (SymbolNames[SymbolName].IsComdat)
82     Function->setIsComdat();
83 
84   LLVM_DEBUG({ print(dbgs()); });
85   return SectionIndex;
86 }
87 
getEntry(StringRef Name)88 const LVSymbolTableEntry &LVSymbolTable::getEntry(StringRef Name) {
89   static LVSymbolTableEntry Empty = LVSymbolTableEntry();
90   LVSymbolNames::iterator Iter = SymbolNames.find(std::string(Name));
91   return Iter != SymbolNames.end() ? Iter->second : Empty;
92 }
getAddress(StringRef Name)93 LVAddress LVSymbolTable::getAddress(StringRef Name) {
94   LVSymbolNames::iterator Iter = SymbolNames.find(std::string(Name));
95   return Iter != SymbolNames.end() ? Iter->second.Address : 0;
96 }
getIndex(StringRef Name)97 LVSectionIndex LVSymbolTable::getIndex(StringRef Name) {
98   LVSymbolNames::iterator Iter = SymbolNames.find(std::string(Name));
99   return Iter != SymbolNames.end() ? Iter->second.SectionIndex
100                                    : getReader().getDotTextSectionIndex();
101 }
getIsComdat(StringRef Name)102 bool LVSymbolTable::getIsComdat(StringRef Name) {
103   LVSymbolNames::iterator Iter = SymbolNames.find(std::string(Name));
104   return Iter != SymbolNames.end() ? Iter->second.IsComdat : false;
105 }
106 
print(raw_ostream & OS)107 void LVSymbolTable::print(raw_ostream &OS) {
108   OS << "Symbol Table\n";
109   for (LVSymbolNames::reference Entry : SymbolNames) {
110     LVSymbolTableEntry &SymbolName = Entry.second;
111     LVScope *Scope = SymbolName.Scope;
112     LVOffset Offset = Scope ? Scope->getOffset() : 0;
113     OS << "Index: " << hexValue(SymbolName.SectionIndex, 5)
114        << " Comdat: " << (SymbolName.IsComdat ? "Y" : "N")
115        << " Scope: " << hexValue(Offset)
116        << " Address: " << hexValue(SymbolName.Address)
117        << " Name: " << Entry.first << "\n";
118   }
119 }
120 
addToSymbolTable(StringRef Name,LVScope * Function,LVSectionIndex SectionIndex)121 void LVBinaryReader::addToSymbolTable(StringRef Name, LVScope *Function,
122                                       LVSectionIndex SectionIndex) {
123   SymbolTable.add(Name, Function, SectionIndex);
124 }
addToSymbolTable(StringRef Name,LVAddress Address,LVSectionIndex SectionIndex,bool IsComdat)125 void LVBinaryReader::addToSymbolTable(StringRef Name, LVAddress Address,
126                                       LVSectionIndex SectionIndex,
127                                       bool IsComdat) {
128   SymbolTable.add(Name, Address, SectionIndex, IsComdat);
129 }
updateSymbolTable(LVScope * Function)130 LVSectionIndex LVBinaryReader::updateSymbolTable(LVScope *Function) {
131   return SymbolTable.update(Function);
132 }
133 
getSymbolTableEntry(StringRef Name)134 const LVSymbolTableEntry &LVBinaryReader::getSymbolTableEntry(StringRef Name) {
135   return SymbolTable.getEntry(Name);
136 }
getSymbolTableAddress(StringRef Name)137 LVAddress LVBinaryReader::getSymbolTableAddress(StringRef Name) {
138   return SymbolTable.getAddress(Name);
139 }
getSymbolTableIndex(StringRef Name)140 LVSectionIndex LVBinaryReader::getSymbolTableIndex(StringRef Name) {
141   return SymbolTable.getIndex(Name);
142 }
getSymbolTableIsComdat(StringRef Name)143 bool LVBinaryReader::getSymbolTableIsComdat(StringRef Name) {
144   return SymbolTable.getIsComdat(Name);
145 }
146 
mapVirtualAddress(const object::ObjectFile & Obj)147 void LVBinaryReader::mapVirtualAddress(const object::ObjectFile &Obj) {
148   for (const object::SectionRef &Section : Obj.sections()) {
149     if (!Section.isText() || Section.isVirtual() || !Section.getSize())
150       continue;
151 
152     // Record section information required for symbol resolution.
153     // Note: The section index returned by 'getIndex()' is one based.
154     Sections.emplace(Section.getIndex(), Section);
155     addSectionAddress(Section);
156 
157     // Identify the ".text" section.
158     Expected<StringRef> SectionNameOrErr = Section.getName();
159     if (!SectionNameOrErr) {
160       consumeError(SectionNameOrErr.takeError());
161       continue;
162     }
163     if ((*SectionNameOrErr).equals(".text") ||
164         (*SectionNameOrErr).equals(".code"))
165       DotTextSectionIndex = Section.getIndex();
166   }
167 
168   // Process the symbol table.
169   mapRangeAddress(Obj);
170 
171   LLVM_DEBUG({
172     dbgs() << "\nSections Information:\n";
173     for (LVSections::reference Entry : Sections) {
174       LVSectionIndex SectionIndex = Entry.first;
175       const object::SectionRef Section = Entry.second;
176       Expected<StringRef> SectionNameOrErr = Section.getName();
177       if (!SectionNameOrErr)
178         consumeError(SectionNameOrErr.takeError());
179       dbgs() << "\nIndex: " << format_decimal(SectionIndex, 3)
180              << " Name: " << *SectionNameOrErr << "\n"
181              << "Size: " << hexValue(Section.getSize()) << "\n"
182              << "VirtualAddress: " << hexValue(VirtualAddress) << "\n"
183              << "SectionAddress: " << hexValue(Section.getAddress()) << "\n";
184     }
185     dbgs() << "\nObject Section Information:\n";
186     for (LVSectionAddresses::const_reference Entry : SectionAddresses)
187       dbgs() << "[" << hexValue(Entry.first) << ":"
188              << hexValue(Entry.first + Entry.second.getSize())
189              << "] Size: " << hexValue(Entry.second.getSize()) << "\n";
190   });
191 }
192 
mapVirtualAddress(const object::COFFObjectFile & COFFObj)193 void LVBinaryReader::mapVirtualAddress(const object::COFFObjectFile &COFFObj) {
194   ErrorOr<uint64_t> ImageBase = COFFObj.getImageBase();
195   if (ImageBase)
196     ImageBaseAddress = ImageBase.get();
197 
198   LLVM_DEBUG({
199     dbgs() << "ImageBaseAddress: " << hexValue(ImageBaseAddress) << "\n";
200   });
201 
202   uint32_t Flags = COFF::IMAGE_SCN_CNT_CODE | COFF::IMAGE_SCN_LNK_COMDAT;
203 
204   for (const object::SectionRef &Section : COFFObj.sections()) {
205     if (!Section.isText() || Section.isVirtual() || !Section.getSize())
206       continue;
207 
208     const object::coff_section *COFFSection = COFFObj.getCOFFSection(Section);
209     VirtualAddress = COFFSection->VirtualAddress;
210     bool IsComdat = (COFFSection->Characteristics & Flags) == Flags;
211 
212     // Record section information required for symbol resolution.
213     // Note: The section index returned by 'getIndex()' is zero based.
214     Sections.emplace(Section.getIndex() + 1, Section);
215     addSectionAddress(Section);
216 
217     // Additional initialization on the specific object format.
218     mapRangeAddress(COFFObj, Section, IsComdat);
219   }
220 
221   LLVM_DEBUG({
222     dbgs() << "\nSections Information:\n";
223     for (LVSections::reference Entry : Sections) {
224       LVSectionIndex SectionIndex = Entry.first;
225       const object::SectionRef Section = Entry.second;
226       const object::coff_section *COFFSection = COFFObj.getCOFFSection(Section);
227       Expected<StringRef> SectionNameOrErr = Section.getName();
228       if (!SectionNameOrErr)
229         consumeError(SectionNameOrErr.takeError());
230       dbgs() << "\nIndex: " << format_decimal(SectionIndex, 3)
231              << " Name: " << *SectionNameOrErr << "\n"
232              << "Size: " << hexValue(Section.getSize()) << "\n"
233              << "VirtualAddress: " << hexValue(VirtualAddress) << "\n"
234              << "SectionAddress: " << hexValue(Section.getAddress()) << "\n"
235              << "PointerToRawData: " << hexValue(COFFSection->PointerToRawData)
236              << "\n"
237              << "SizeOfRawData: " << hexValue(COFFSection->SizeOfRawData)
238              << "\n";
239     }
240     dbgs() << "\nObject Section Information:\n";
241     for (LVSectionAddresses::const_reference Entry : SectionAddresses)
242       dbgs() << "[" << hexValue(Entry.first) << ":"
243              << hexValue(Entry.first + Entry.second.getSize())
244              << "] Size: " << hexValue(Entry.second.getSize()) << "\n";
245   });
246 }
247 
loadGenericTargetInfo(StringRef TheTriple,StringRef TheFeatures)248 Error LVBinaryReader::loadGenericTargetInfo(StringRef TheTriple,
249                                             StringRef TheFeatures) {
250   std::string TargetLookupError;
251   const Target *TheTarget =
252       TargetRegistry::lookupTarget(std::string(TheTriple), TargetLookupError);
253   if (!TheTarget)
254     return createStringError(errc::invalid_argument, TargetLookupError.c_str());
255 
256   // Register information.
257   MCRegisterInfo *RegisterInfo = TheTarget->createMCRegInfo(TheTriple);
258   if (!RegisterInfo)
259     return createStringError(errc::invalid_argument,
260                              "no register info for target " + TheTriple);
261   MRI.reset(RegisterInfo);
262 
263   // Assembler properties and features.
264   MCTargetOptions MCOptions;
265   MCAsmInfo *AsmInfo(TheTarget->createMCAsmInfo(*MRI, TheTriple, MCOptions));
266   if (!AsmInfo)
267     return createStringError(errc::invalid_argument,
268                              "no assembly info for target " + TheTriple);
269   MAI.reset(AsmInfo);
270 
271   // Target subtargets.
272   StringRef CPU;
273   MCSubtargetInfo *SubtargetInfo(
274       TheTarget->createMCSubtargetInfo(TheTriple, CPU, TheFeatures));
275   if (!SubtargetInfo)
276     return createStringError(errc::invalid_argument,
277                              "no subtarget info for target " + TheTriple);
278   STI.reset(SubtargetInfo);
279 
280   // Instructions Info.
281   MCInstrInfo *InstructionInfo(TheTarget->createMCInstrInfo());
282   if (!InstructionInfo)
283     return createStringError(errc::invalid_argument,
284                              "no instruction info for target " + TheTriple);
285   MII.reset(InstructionInfo);
286 
287   MC = std::make_unique<MCContext>(Triple(TheTriple), MAI.get(), MRI.get(),
288                                    STI.get());
289 
290   // Assembler.
291   MCDisassembler *DisAsm(TheTarget->createMCDisassembler(*STI, *MC));
292   if (!DisAsm)
293     return createStringError(errc::invalid_argument,
294                              "no disassembler for target " + TheTriple);
295   MD.reset(DisAsm);
296 
297   MCInstPrinter *InstructionPrinter(TheTarget->createMCInstPrinter(
298       Triple(TheTriple), AsmInfo->getAssemblerDialect(), *MAI, *MII, *MRI));
299   if (!InstructionPrinter)
300     return createStringError(errc::invalid_argument,
301                              "no target assembly language printer for target " +
302                                  TheTriple);
303   MIP.reset(InstructionPrinter);
304   InstructionPrinter->setPrintImmHex(true);
305 
306   return Error::success();
307 }
308 
309 Expected<std::pair<uint64_t, object::SectionRef>>
getSection(LVScope * Scope,LVAddress Address,LVSectionIndex SectionIndex)310 LVBinaryReader::getSection(LVScope *Scope, LVAddress Address,
311                            LVSectionIndex SectionIndex) {
312   // Return the 'text' section with the code for this logical scope.
313   // COFF: SectionIndex is zero. Use 'SectionAddresses' data.
314   // ELF: SectionIndex is the section index in the file.
315   if (SectionIndex) {
316     LVSections::iterator Iter = Sections.find(SectionIndex);
317     if (Iter == Sections.end()) {
318       return createStringError(errc::invalid_argument,
319                                "invalid section index for: '%s'",
320                                Scope->getName().str().c_str());
321     }
322     const object::SectionRef Section = Iter->second;
323     return std::make_pair(Section.getAddress(), Section);
324   }
325 
326   // Ensure a valid starting address for the public names.
327   LVSectionAddresses::const_iterator Iter =
328       SectionAddresses.upper_bound(Address);
329   if (Iter == SectionAddresses.begin())
330     return createStringError(errc::invalid_argument,
331                              "invalid section address for: '%s'",
332                              Scope->getName().str().c_str());
333 
334   // Get section that contains the code for this function.
335   Iter = SectionAddresses.lower_bound(Address);
336   if (Iter != SectionAddresses.begin())
337     --Iter;
338   return std::make_pair(Iter->first, Iter->second);
339 }
340 
addSectionRange(LVSectionIndex SectionIndex,LVScope * Scope)341 void LVBinaryReader::addSectionRange(LVSectionIndex SectionIndex,
342                                      LVScope *Scope) {
343   LVRange *ScopesWithRanges = getSectionRanges(SectionIndex);
344   ScopesWithRanges->addEntry(Scope);
345 }
346 
addSectionRange(LVSectionIndex SectionIndex,LVScope * Scope,LVAddress LowerAddress,LVAddress UpperAddress)347 void LVBinaryReader::addSectionRange(LVSectionIndex SectionIndex,
348                                      LVScope *Scope, LVAddress LowerAddress,
349                                      LVAddress UpperAddress) {
350   LVRange *ScopesWithRanges = getSectionRanges(SectionIndex);
351   ScopesWithRanges->addEntry(Scope, LowerAddress, UpperAddress);
352 }
353 
getSectionRanges(LVSectionIndex SectionIndex)354 LVRange *LVBinaryReader::getSectionRanges(LVSectionIndex SectionIndex) {
355   // Check if we already have a mapping for this section index.
356   LVSectionRanges::iterator IterSection = SectionRanges.find(SectionIndex);
357   if (IterSection == SectionRanges.end())
358     IterSection =
359         SectionRanges.emplace(SectionIndex, std::make_unique<LVRange>()).first;
360   LVRange *Range = IterSection->second.get();
361   assert(Range && "Range is null.");
362   return Range;
363 }
364 
createInstructions(LVScope * Scope,LVSectionIndex SectionIndex,const LVNameInfo & NameInfo)365 Error LVBinaryReader::createInstructions(LVScope *Scope,
366                                          LVSectionIndex SectionIndex,
367                                          const LVNameInfo &NameInfo) {
368   assert(Scope && "Scope is null.");
369 
370   // Skip stripped functions.
371   if (Scope->getIsDiscarded())
372     return Error::success();
373 
374   // Find associated address and size for the given function entry point.
375   LVAddress Address = NameInfo.first;
376   uint64_t Size = NameInfo.second;
377 
378   LLVM_DEBUG({
379     dbgs() << "\nPublic Name instructions: '" << Scope->getName() << "' / '"
380            << Scope->getLinkageName() << "'\n"
381            << "DIE Offset: " << hexValue(Scope->getOffset()) << " Range: ["
382            << hexValue(Address) << ":" << hexValue(Address + Size) << "]\n";
383   });
384 
385   Expected<std::pair<uint64_t, const object::SectionRef>> SectionOrErr =
386       getSection(Scope, Address, SectionIndex);
387   if (!SectionOrErr)
388     return SectionOrErr.takeError();
389   const object::SectionRef Section = (*SectionOrErr).second;
390   uint64_t SectionAddress = (*SectionOrErr).first;
391 
392   Expected<StringRef> SectionContentsOrErr = Section.getContents();
393   if (!SectionContentsOrErr)
394     return SectionOrErr.takeError();
395 
396   // There are cases where the section size is smaller than the [LowPC,HighPC]
397   // range; it causes us to decode invalid addresses. The recorded size in the
398   // logical scope is one less than the real size.
399   LLVM_DEBUG({
400     dbgs() << " Size: " << hexValue(Size)
401            << ", Section Size: " << hexValue(Section.getSize()) << "\n";
402   });
403   Size = std::min(Size + 1, Section.getSize());
404 
405   ArrayRef<uint8_t> Bytes = arrayRefFromStringRef(*SectionContentsOrErr);
406   uint64_t Offset = Address - SectionAddress;
407   uint8_t const *Begin = Bytes.data() + Offset;
408   uint8_t const *End = Bytes.data() + Offset + Size;
409 
410   LLVM_DEBUG({
411     Expected<StringRef> SectionNameOrErr = Section.getName();
412     if (!SectionNameOrErr)
413       consumeError(SectionNameOrErr.takeError());
414     else
415       dbgs() << "Section Index: " << hexValue(Section.getIndex()) << " ["
416              << hexValue((uint64_t)Section.getAddress()) << ":"
417              << hexValue((uint64_t)Section.getAddress() + Section.getSize(), 10)
418              << "] Name: '" << *SectionNameOrErr << "'\n"
419              << "Begin: " << hexValue((uint64_t)Begin)
420              << ", End: " << hexValue((uint64_t)End) << "\n";
421   });
422 
423   // Address for first instruction line.
424   LVAddress FirstAddress = Address;
425   auto InstructionsSP = std::make_unique<LVLines>();
426   LVLines &Instructions = *InstructionsSP;
427   DiscoveredLines.emplace_back(std::move(InstructionsSP));
428 
429   while (Begin < End) {
430     MCInst Instruction;
431     uint64_t BytesConsumed = 0;
432     SmallVector<char, 64> InsnStr;
433     raw_svector_ostream Annotations(InsnStr);
434     MCDisassembler::DecodeStatus const S =
435         MD->getInstruction(Instruction, BytesConsumed,
436                            ArrayRef<uint8_t>(Begin, End), Address, outs());
437     switch (S) {
438     case MCDisassembler::Fail:
439       LLVM_DEBUG({ dbgs() << "Invalid instruction\n"; });
440       if (BytesConsumed == 0)
441         // Skip invalid bytes
442         BytesConsumed = 1;
443       break;
444     case MCDisassembler::SoftFail:
445       LLVM_DEBUG({ dbgs() << "Potentially undefined instruction:"; });
446       [[fallthrough]];
447     case MCDisassembler::Success: {
448       std::string Buffer;
449       raw_string_ostream Stream(Buffer);
450       StringRef AnnotationsStr = Annotations.str();
451       MIP->printInst(&Instruction, Address, AnnotationsStr, *STI, Stream);
452       LLVM_DEBUG({
453         std::string BufferCodes;
454         raw_string_ostream StreamCodes(BufferCodes);
455         StreamCodes << format_bytes(
456             ArrayRef<uint8_t>(Begin, Begin + BytesConsumed), std::nullopt, 16,
457             16);
458         dbgs() << "[" << hexValue((uint64_t)Begin) << "] "
459                << "Size: " << format_decimal(BytesConsumed, 2) << " ("
460                << formatv("{0}",
461                           fmt_align(StreamCodes.str(), AlignStyle::Left, 32))
462                << ") " << hexValue((uint64_t)Address) << ": " << Stream.str()
463                << "\n";
464       });
465       // Here we add logical lines to the Instructions. Later on,
466       // the 'processLines()' function will move each created logical line
467       // to its enclosing logical scope, using the debug ranges information
468       // and they will be released when its scope parent is deleted.
469       LVLineAssembler *Line = createLineAssembler();
470       Line->setAddress(Address);
471       Line->setName(StringRef(Stream.str()).trim());
472       Instructions.push_back(Line);
473       break;
474     }
475     }
476     Address += BytesConsumed;
477     Begin += BytesConsumed;
478   }
479 
480   LLVM_DEBUG({
481     size_t Index = 0;
482     dbgs() << "\nSectionIndex: " << format_decimal(SectionIndex, 3)
483            << " Scope DIE: " << hexValue(Scope->getOffset()) << "\n"
484            << "Address: " << hexValue(FirstAddress)
485            << format(" - Collected instructions lines: %d\n",
486                      Instructions.size());
487     for (const LVLine *Line : Instructions)
488       dbgs() << format_decimal(++Index, 5) << ": "
489              << hexValue(Line->getOffset()) << ", (" << Line->getName()
490              << ")\n";
491   });
492 
493   // The scope in the assembler names is linked to its own instructions.
494   ScopeInstructions.add(SectionIndex, Scope, &Instructions);
495   AssemblerMappings.add(SectionIndex, FirstAddress, Scope);
496 
497   return Error::success();
498 }
499 
createInstructions(LVScope * Function,LVSectionIndex SectionIndex)500 Error LVBinaryReader::createInstructions(LVScope *Function,
501                                          LVSectionIndex SectionIndex) {
502   if (!options().getPrintInstructions())
503     return Error::success();
504 
505   LVNameInfo Name = CompileUnit->findPublicName(Function);
506   if (Name.first != LVAddress(UINT64_MAX))
507     return createInstructions(Function, SectionIndex, Name);
508 
509   return Error::success();
510 }
511 
createInstructions()512 Error LVBinaryReader::createInstructions() {
513   if (!options().getPrintInstructions())
514     return Error::success();
515 
516   LLVM_DEBUG({
517     size_t Index = 1;
518     dbgs() << "\nPublic Names (Scope):\n";
519     for (LVPublicNames::const_reference Name : CompileUnit->getPublicNames()) {
520       LVScope *Scope = Name.first;
521       const LVNameInfo &NameInfo = Name.second;
522       LVAddress Address = NameInfo.first;
523       uint64_t Size = NameInfo.second;
524       dbgs() << format_decimal(Index++, 5) << ": "
525              << "DIE Offset: " << hexValue(Scope->getOffset()) << " Range: ["
526              << hexValue(Address) << ":" << hexValue(Address + Size) << "] "
527              << "Name: '" << Scope->getName() << "' / '"
528              << Scope->getLinkageName() << "'\n";
529     }
530   });
531 
532   // For each public name in the current compile unit, create the line
533   // records that represent the executable instructions.
534   for (LVPublicNames::const_reference Name : CompileUnit->getPublicNames()) {
535     LVScope *Scope = Name.first;
536     // The symbol table extracted from the object file always contains a
537     // non-empty name (linkage name). However, the logical scope does not
538     // guarantee to have a name for the linkage name (main is one case).
539     // For those cases, set the linkage name the same as the name.
540     if (!Scope->getLinkageNameIndex())
541       Scope->setLinkageName(Scope->getName());
542     LVSectionIndex SectionIndex = getSymbolTableIndex(Scope->getLinkageName());
543     if (Error Err = createInstructions(Scope, SectionIndex, Name.second))
544       return Err;
545   }
546 
547   return Error::success();
548 }
549 
550 // During the traversal of the debug information sections, we created the
551 // logical lines representing the disassembled instructions from the text
552 // section and the logical lines representing the line records from the
553 // debug line section. Using the ranges associated with the logical scopes,
554 // we will allocate those logical lines to their logical scopes.
processLines(LVLines * DebugLines,LVSectionIndex SectionIndex,LVScope * Function)555 void LVBinaryReader::processLines(LVLines *DebugLines,
556                                   LVSectionIndex SectionIndex,
557                                   LVScope *Function) {
558   assert(DebugLines && "DebugLines is null.");
559 
560   // Just return if this compilation unit does not have any line records
561   // and no instruction lines were created.
562   if (DebugLines->empty() && !options().getPrintInstructions())
563     return;
564 
565   // Merge the debug lines and instruction lines using their text address;
566   // the logical line representing the debug line record is followed by the
567   // line(s) representing the disassembled instructions, whose addresses are
568   // equal or greater that the line address and less than the address of the
569   // next debug line record.
570   LLVM_DEBUG({
571     size_t Index = 1;
572     size_t PerLine = 4;
573     dbgs() << format("\nProcess debug lines: %d\n", DebugLines->size());
574     for (const LVLine *Line : *DebugLines) {
575       dbgs() << format_decimal(Index, 5) << ": " << hexValue(Line->getOffset())
576              << ", (" << Line->getLineNumber() << ")"
577              << ((Index % PerLine) ? "  " : "\n");
578       ++Index;
579     }
580     dbgs() << ((Index % PerLine) ? "\n" : "");
581   });
582 
583   bool TraverseLines = true;
584   LVLines::iterator Iter = DebugLines->begin();
585   while (TraverseLines && Iter != DebugLines->end()) {
586     uint64_t DebugAddress = (*Iter)->getAddress();
587 
588     // Get the function with an entry point that matches this line and
589     // its associated assembler entries. In the case of COMDAT, the input
590     // 'Function' is not null. Use it to find its address ranges.
591     LVScope *Scope = Function;
592     if (!Function) {
593       Scope = AssemblerMappings.find(SectionIndex, DebugAddress);
594       if (!Scope) {
595         ++Iter;
596         continue;
597       }
598     }
599 
600     // Get the associated instructions for the found 'Scope'.
601     LVLines InstructionLines;
602     LVLines *Lines = ScopeInstructions.find(SectionIndex, Scope);
603     if (Lines)
604       InstructionLines = std::move(*Lines);
605 
606     LLVM_DEBUG({
607       size_t Index = 0;
608       dbgs() << "\nSectionIndex: " << format_decimal(SectionIndex, 3)
609              << " Scope DIE: " << hexValue(Scope->getOffset()) << "\n"
610              << format("Process instruction lines: %d\n",
611                        InstructionLines.size());
612       for (const LVLine *Line : InstructionLines)
613         dbgs() << format_decimal(++Index, 5) << ": "
614                << hexValue(Line->getOffset()) << ", (" << Line->getName()
615                << ")\n";
616     });
617 
618     // Continue with next debug line if there are not instructions lines.
619     if (InstructionLines.empty()) {
620       ++Iter;
621       continue;
622     }
623 
624     for (LVLine *InstructionLine : InstructionLines) {
625       uint64_t InstructionAddress = InstructionLine->getAddress();
626       LLVM_DEBUG({
627         dbgs() << "Instruction address: " << hexValue(InstructionAddress)
628                << "\n";
629       });
630       if (TraverseLines) {
631         while (Iter != DebugLines->end()) {
632           DebugAddress = (*Iter)->getAddress();
633           LLVM_DEBUG({
634             bool IsDebug = (*Iter)->getIsLineDebug();
635             dbgs() << "Line " << (IsDebug ? "dbg:" : "ins:") << " ["
636                    << hexValue(DebugAddress) << "]";
637             if (IsDebug)
638               dbgs() << format(" %d", (*Iter)->getLineNumber());
639             dbgs() << "\n";
640           });
641           // Instruction address before debug line.
642           if (InstructionAddress < DebugAddress) {
643             LLVM_DEBUG({
644               dbgs() << "Inserted instruction address: "
645                      << hexValue(InstructionAddress) << " before line: "
646                      << format("%d", (*Iter)->getLineNumber()) << " ["
647                      << hexValue(DebugAddress) << "]\n";
648             });
649             Iter = DebugLines->insert(Iter, InstructionLine);
650             // The returned iterator points to the inserted instruction.
651             // Skip it and point to the line acting as reference.
652             ++Iter;
653             break;
654           }
655           ++Iter;
656         }
657         if (Iter == DebugLines->end()) {
658           // We have reached the end of the source lines and the current
659           // instruction line address is greater than the last source line.
660           TraverseLines = false;
661           DebugLines->push_back(InstructionLine);
662         }
663       } else {
664         DebugLines->push_back(InstructionLine);
665       }
666     }
667   }
668 
669   LLVM_DEBUG({
670     dbgs() << format("Lines after merge: %d\n", DebugLines->size());
671     size_t Index = 0;
672     for (const LVLine *Line : *DebugLines) {
673       dbgs() << format_decimal(++Index, 5) << ": "
674              << hexValue(Line->getOffset()) << ", ("
675              << ((Line->getIsLineDebug())
676                      ? Line->lineNumberAsStringStripped(/*ShowZero=*/true)
677                      : Line->getName())
678              << ")\n";
679     }
680   });
681 
682   // If this compilation unit does not have line records, traverse its scopes
683   // and take any collected instruction lines as the working set in order
684   // to move them to their associated scope.
685   if (DebugLines->empty()) {
686     if (const LVScopes *Scopes = CompileUnit->getScopes())
687       for (LVScope *Scope : *Scopes) {
688         LVLines *Lines = ScopeInstructions.find(Scope);
689         if (Lines) {
690 
691           LLVM_DEBUG({
692             size_t Index = 0;
693             dbgs() << "\nSectionIndex: " << format_decimal(SectionIndex, 3)
694                    << " Scope DIE: " << hexValue(Scope->getOffset()) << "\n"
695                    << format("Instruction lines: %d\n", Lines->size());
696             for (const LVLine *Line : *Lines)
697               dbgs() << format_decimal(++Index, 5) << ": "
698                      << hexValue(Line->getOffset()) << ", (" << Line->getName()
699                      << ")\n";
700           });
701 
702           if (Scope->getIsArtificial()) {
703             // Add the instruction lines to their artificial scope.
704             for (LVLine *Line : *Lines)
705               Scope->addElement(Line);
706           } else {
707             DebugLines->append(*Lines);
708           }
709           Lines->clear();
710         }
711       }
712   }
713 
714   LVRange *ScopesWithRanges = getSectionRanges(SectionIndex);
715   ScopesWithRanges->startSearch();
716 
717   // Process collected lines.
718   LVScope *Scope;
719   for (LVLine *Line : *DebugLines) {
720     // Using the current line address, get its associated lexical scope and
721     // add the line information to it.
722     Scope = ScopesWithRanges->getEntry(Line->getAddress());
723     if (!Scope) {
724       // If missing scope, use the compile unit.
725       Scope = CompileUnit;
726       LLVM_DEBUG({
727         dbgs() << "Adding line to CU: " << hexValue(Line->getOffset()) << ", ("
728                << ((Line->getIsLineDebug())
729                        ? Line->lineNumberAsStringStripped(/*ShowZero=*/true)
730                        : Line->getName())
731                << ")\n";
732       });
733     }
734 
735     // Add line object to scope.
736     Scope->addElement(Line);
737 
738     // Report any line zero.
739     if (options().getWarningLines() && Line->getIsLineDebug() &&
740         !Line->getLineNumber())
741       CompileUnit->addLineZero(Line);
742 
743     // Some compilers generate ranges in the compile unit; other compilers
744     // only DW_AT_low_pc/DW_AT_high_pc. In order to correctly map global
745     // variables, we need to generate the map ranges for the compile unit.
746     // If we use the ranges stored at the scope level, there are cases where
747     // the address referenced by a symbol location, is not in the enclosing
748     // scope, but in an outer one. By using the ranges stored in the compile
749     // unit, we can catch all those addresses.
750     if (Line->getIsLineDebug())
751       CompileUnit->addMapping(Line, SectionIndex);
752 
753     // Resolve any given pattern.
754     patterns().resolvePatternMatch(Line);
755   }
756 
757   ScopesWithRanges->endSearch();
758 }
759 
processLines(LVLines * DebugLines,LVSectionIndex SectionIndex)760 void LVBinaryReader::processLines(LVLines *DebugLines,
761                                   LVSectionIndex SectionIndex) {
762   assert(DebugLines && "DebugLines is null.");
763   if (DebugLines->empty() && !ScopeInstructions.findMap(SectionIndex))
764     return;
765 
766   // If the Compile Unit does not contain comdat functions, use the whole
767   // set of debug lines, as the addresses don't have conflicts.
768   if (!CompileUnit->getHasComdatScopes()) {
769     processLines(DebugLines, SectionIndex, nullptr);
770     return;
771   }
772 
773   // Find the indexes for the lines whose address is zero.
774   std::vector<size_t> AddressZero;
775   LVLines::iterator It =
776       std::find_if(std::begin(*DebugLines), std::end(*DebugLines),
777                    [](LVLine *Line) { return !Line->getAddress(); });
778   while (It != std::end(*DebugLines)) {
779     AddressZero.emplace_back(std::distance(std::begin(*DebugLines), It));
780     It = std::find_if(std::next(It), std::end(*DebugLines),
781                       [](LVLine *Line) { return !Line->getAddress(); });
782   }
783 
784   // If the set of debug lines does not contain any line with address zero,
785   // use the whole set. It means we are dealing with an initialization
786   // section from a fully linked binary.
787   if (AddressZero.empty()) {
788     processLines(DebugLines, SectionIndex, nullptr);
789     return;
790   }
791 
792   // The Compile unit contains comdat functions. Traverse the collected
793   // debug lines and identify logical groups based on their start and
794   // address. Each group starts with a zero address.
795   // Begin, End, Address, IsDone.
796   using LVBucket = std::tuple<size_t, size_t, LVAddress, bool>;
797   std::vector<LVBucket> Buckets;
798 
799   LVAddress Address;
800   size_t Begin = 0;
801   size_t End = 0;
802   size_t Index = 0;
803   for (Index = 0; Index < AddressZero.size() - 1; ++Index) {
804     Begin = AddressZero[Index];
805     End = AddressZero[Index + 1] - 1;
806     Address = (*DebugLines)[End]->getAddress();
807     Buckets.emplace_back(Begin, End, Address, false);
808   }
809 
810   // Add the last bucket.
811   if (Index) {
812     Begin = AddressZero[Index];
813     End = DebugLines->size() - 1;
814     Address = (*DebugLines)[End]->getAddress();
815     Buckets.emplace_back(Begin, End, Address, false);
816   }
817 
818   LLVM_DEBUG({
819     dbgs() << "\nDebug Lines buckets: " << Buckets.size() << "\n";
820     for (LVBucket &Bucket : Buckets) {
821       dbgs() << "Begin: " << format_decimal(std::get<0>(Bucket), 5) << ", "
822              << "End: " << format_decimal(std::get<1>(Bucket), 5) << ", "
823              << "Address: " << hexValue(std::get<2>(Bucket)) << "\n";
824     }
825   });
826 
827   // Traverse the sections and buckets looking for matches on the section
828   // sizes. In the unlikely event of different buckets with the same size
829   // process them in order and mark them as done.
830   LVLines Group;
831   for (LVSections::reference Entry : Sections) {
832     LVSectionIndex SectionIndex = Entry.first;
833     const object::SectionRef Section = Entry.second;
834     uint64_t Size = Section.getSize();
835     LLVM_DEBUG({
836       dbgs() << "\nSection Index: " << format_decimal(SectionIndex, 3)
837              << " , Section Size: " << hexValue(Section.getSize())
838              << " , Section Address: " << hexValue(Section.getAddress())
839              << "\n";
840     });
841 
842     for (LVBucket &Bucket : Buckets) {
843       if (std::get<3>(Bucket))
844         // Already done for previous section.
845         continue;
846       if (Size == std::get<2>(Bucket)) {
847         // We have a match on the section size.
848         Group.clear();
849         LVLines::iterator IterStart = DebugLines->begin() + std::get<0>(Bucket);
850         LVLines::iterator IterEnd =
851             DebugLines->begin() + std::get<1>(Bucket) + 1;
852         for (LVLines::iterator Iter = IterStart; Iter < IterEnd; ++Iter)
853           Group.push_back(*Iter);
854         processLines(&Group, SectionIndex, /*Function=*/nullptr);
855         std::get<3>(Bucket) = true;
856         break;
857       }
858     }
859   }
860 }
861 
862 // Traverse the scopes for the given 'Function' looking for any inlined
863 // scopes with inlined lines, which are found in 'CUInlineeLines'.
includeInlineeLines(LVSectionIndex SectionIndex,LVScope * Function)864 void LVBinaryReader::includeInlineeLines(LVSectionIndex SectionIndex,
865                                          LVScope *Function) {
866   SmallVector<LVInlineeLine::iterator> InlineeIters;
867   std::function<void(LVScope * Parent)> FindInlinedScopes =
868       [&](LVScope *Parent) {
869         if (const LVScopes *Scopes = Parent->getScopes())
870           for (LVScope *Scope : *Scopes) {
871             LVInlineeLine::iterator Iter = CUInlineeLines.find(Scope);
872             if (Iter != CUInlineeLines.end())
873               InlineeIters.push_back(Iter);
874             FindInlinedScopes(Scope);
875           }
876       };
877 
878   // Find all inlined scopes for the given 'Function'.
879   FindInlinedScopes(Function);
880   for (LVInlineeLine::iterator InlineeIter : InlineeIters) {
881     LVScope *Scope = InlineeIter->first;
882     addToSymbolTable(Scope->getLinkageName(), Scope, SectionIndex);
883 
884     // TODO: Convert this into a reference.
885     LVLines *InlineeLines = InlineeIter->second.get();
886     LLVM_DEBUG({
887       dbgs() << "Inlined lines for: " << Scope->getName() << "\n";
888       for (const LVLine *Line : *InlineeLines)
889         dbgs() << "[" << hexValue(Line->getAddress()) << "] "
890                << Line->getLineNumber() << "\n";
891       dbgs() << format("Debug lines: %d\n", CULines.size());
892       for (const LVLine *Line : CULines)
893         dbgs() << "Line address: " << hexValue(Line->getOffset()) << ", ("
894                << Line->getLineNumber() << ")\n";
895       ;
896     });
897 
898     // The inlined lines must be merged using its address, in order to keep
899     // the real order of the instructions. The inlined lines are mixed with
900     // the other non-inlined lines.
901     if (InlineeLines->size()) {
902       // First address of inlinee code.
903       uint64_t InlineeStart = (InlineeLines->front())->getAddress();
904       LVLines::iterator Iter = std::find_if(
905           CULines.begin(), CULines.end(), [&](LVLine *Item) -> bool {
906             return Item->getAddress() == InlineeStart;
907           });
908       if (Iter != CULines.end()) {
909         // 'Iter' points to the line where the inlined function is called.
910         // Emulate the DW_AT_call_line attribute.
911         Scope->setCallLineNumber((*Iter)->getLineNumber());
912         // Mark the referenced line as the start of the inlined function.
913         // Skip the first line during the insertion, as the address and
914         // line number as the same. Otherwise we have to erase and insert.
915         (*Iter)->setLineNumber((*InlineeLines->begin())->getLineNumber());
916         ++Iter;
917         CULines.insert(Iter, InlineeLines->begin() + 1, InlineeLines->end());
918       }
919     }
920 
921     // Remove this set of lines from the container; each inlined function
922     // creates an unique set of lines. Remove only the created container.
923     CUInlineeLines.erase(InlineeIter);
924     InlineeLines->clear();
925   }
926   LLVM_DEBUG({
927     dbgs() << "Merged Inlined lines for: " << Function->getName() << "\n";
928     dbgs() << format("Debug lines: %d\n", CULines.size());
929     for (const LVLine *Line : CULines)
930       dbgs() << "Line address: " << hexValue(Line->getOffset()) << ", ("
931              << Line->getLineNumber() << ")\n";
932     ;
933   });
934 }
935 
print(raw_ostream & OS) const936 void LVBinaryReader::print(raw_ostream &OS) const {
937   OS << "LVBinaryReader\n";
938   LLVM_DEBUG(dbgs() << "PrintReader\n");
939 }
940