15ffd83dbSDimitry Andric //===-- CompileUnitIndex.cpp ----------------------------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric 
90b57cec5SDimitry Andric #include "CompileUnitIndex.h"
100b57cec5SDimitry Andric 
110b57cec5SDimitry Andric #include "PdbIndex.h"
120b57cec5SDimitry Andric #include "PdbUtil.h"
130b57cec5SDimitry Andric 
140b57cec5SDimitry Andric #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
150b57cec5SDimitry Andric #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
160b57cec5SDimitry Andric #include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
170b57cec5SDimitry Andric #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
180b57cec5SDimitry Andric #include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
190b57cec5SDimitry Andric #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
200b57cec5SDimitry Andric #include "llvm/DebugInfo/PDB/Native/InfoStream.h"
210b57cec5SDimitry Andric #include "llvm/DebugInfo/PDB/Native/ModuleDebugStream.h"
220b57cec5SDimitry Andric #include "llvm/DebugInfo/PDB/Native/NamedStreamMap.h"
230b57cec5SDimitry Andric #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
240b57cec5SDimitry Andric #include "llvm/Support/Path.h"
250b57cec5SDimitry Andric 
260b57cec5SDimitry Andric #include "lldb/Utility/LLDBAssert.h"
270b57cec5SDimitry Andric 
280b57cec5SDimitry Andric using namespace lldb;
290b57cec5SDimitry Andric using namespace lldb_private;
300b57cec5SDimitry Andric using namespace lldb_private::npdb;
310b57cec5SDimitry Andric using namespace llvm::codeview;
320b57cec5SDimitry Andric using namespace llvm::pdb;
330b57cec5SDimitry Andric 
IsMainFile(llvm::StringRef main,llvm::StringRef other)340b57cec5SDimitry Andric static bool IsMainFile(llvm::StringRef main, llvm::StringRef other) {
350b57cec5SDimitry Andric   if (main == other)
360b57cec5SDimitry Andric     return true;
370b57cec5SDimitry Andric 
380b57cec5SDimitry Andric   // If the files refer to the local file system, we can just ask the file
390b57cec5SDimitry Andric   // system if they're equivalent.  But if the source isn't present on disk
400b57cec5SDimitry Andric   // then we still want to try.
410b57cec5SDimitry Andric   if (llvm::sys::fs::equivalent(main, other))
420b57cec5SDimitry Andric     return true;
430b57cec5SDimitry Andric 
440b57cec5SDimitry Andric   llvm::SmallString<64> normalized(other);
450b57cec5SDimitry Andric   llvm::sys::path::native(normalized);
46fe6060f1SDimitry Andric   return main.equals_insensitive(normalized);
470b57cec5SDimitry Andric }
480b57cec5SDimitry Andric 
ParseCompile3(const CVSymbol & sym,CompilandIndexItem & cci)490b57cec5SDimitry Andric static void ParseCompile3(const CVSymbol &sym, CompilandIndexItem &cci) {
500b57cec5SDimitry Andric   cci.m_compile_opts.emplace();
510b57cec5SDimitry Andric   llvm::cantFail(
520b57cec5SDimitry Andric       SymbolDeserializer::deserializeAs<Compile3Sym>(sym, *cci.m_compile_opts));
530b57cec5SDimitry Andric }
540b57cec5SDimitry Andric 
ParseObjname(const CVSymbol & sym,CompilandIndexItem & cci)550b57cec5SDimitry Andric static void ParseObjname(const CVSymbol &sym, CompilandIndexItem &cci) {
560b57cec5SDimitry Andric   cci.m_obj_name.emplace();
570b57cec5SDimitry Andric   llvm::cantFail(
580b57cec5SDimitry Andric       SymbolDeserializer::deserializeAs<ObjNameSym>(sym, *cci.m_obj_name));
590b57cec5SDimitry Andric }
600b57cec5SDimitry Andric 
ParseBuildInfo(PdbIndex & index,const CVSymbol & sym,CompilandIndexItem & cci)610b57cec5SDimitry Andric static void ParseBuildInfo(PdbIndex &index, const CVSymbol &sym,
620b57cec5SDimitry Andric                            CompilandIndexItem &cci) {
630b57cec5SDimitry Andric   BuildInfoSym bis(SymbolRecordKind::BuildInfoSym);
640b57cec5SDimitry Andric   llvm::cantFail(SymbolDeserializer::deserializeAs<BuildInfoSym>(sym, bis));
650b57cec5SDimitry Andric 
660b57cec5SDimitry Andric   // S_BUILDINFO just points to an LF_BUILDINFO in the IPI stream.  Let's do
670b57cec5SDimitry Andric   // a little extra work to pull out the LF_BUILDINFO.
680b57cec5SDimitry Andric   LazyRandomTypeCollection &types = index.ipi().typeCollection();
69bdd1243dSDimitry Andric   std::optional<CVType> cvt = types.tryGetType(bis.BuildId);
700b57cec5SDimitry Andric 
710b57cec5SDimitry Andric   if (!cvt || cvt->kind() != LF_BUILDINFO)
720b57cec5SDimitry Andric     return;
730b57cec5SDimitry Andric 
740b57cec5SDimitry Andric   BuildInfoRecord bir;
750b57cec5SDimitry Andric   llvm::cantFail(TypeDeserializer::deserializeAs<BuildInfoRecord>(*cvt, bir));
760b57cec5SDimitry Andric   cci.m_build_info.assign(bir.ArgIndices.begin(), bir.ArgIndices.end());
770b57cec5SDimitry Andric }
780b57cec5SDimitry Andric 
ParseExtendedInfo(PdbIndex & index,CompilandIndexItem & item)790b57cec5SDimitry Andric static void ParseExtendedInfo(PdbIndex &index, CompilandIndexItem &item) {
800b57cec5SDimitry Andric   const CVSymbolArray &syms = item.m_debug_stream.getSymbolArray();
810b57cec5SDimitry Andric 
820b57cec5SDimitry Andric   // This is a private function, it shouldn't be called if the information
830b57cec5SDimitry Andric   // has already been parsed.
840b57cec5SDimitry Andric   lldbassert(!item.m_obj_name);
850b57cec5SDimitry Andric   lldbassert(!item.m_compile_opts);
860b57cec5SDimitry Andric   lldbassert(item.m_build_info.empty());
870b57cec5SDimitry Andric 
880b57cec5SDimitry Andric   // We're looking for 3 things.  S_COMPILE3, S_OBJNAME, and S_BUILDINFO.
890b57cec5SDimitry Andric   int found = 0;
900b57cec5SDimitry Andric   for (const CVSymbol &sym : syms) {
910b57cec5SDimitry Andric     switch (sym.kind()) {
920b57cec5SDimitry Andric     case S_COMPILE3:
930b57cec5SDimitry Andric       ParseCompile3(sym, item);
940b57cec5SDimitry Andric       break;
950b57cec5SDimitry Andric     case S_OBJNAME:
960b57cec5SDimitry Andric       ParseObjname(sym, item);
970b57cec5SDimitry Andric       break;
980b57cec5SDimitry Andric     case S_BUILDINFO:
990b57cec5SDimitry Andric       ParseBuildInfo(index, sym, item);
1000b57cec5SDimitry Andric       break;
1010b57cec5SDimitry Andric     default:
1020b57cec5SDimitry Andric       continue;
1030b57cec5SDimitry Andric     }
1040b57cec5SDimitry Andric     if (++found >= 3)
1050b57cec5SDimitry Andric       break;
1060b57cec5SDimitry Andric   }
1070b57cec5SDimitry Andric }
1080b57cec5SDimitry Andric 
ParseInlineeLineTableForCompileUnit(CompilandIndexItem & item)10904eeddc0SDimitry Andric static void ParseInlineeLineTableForCompileUnit(CompilandIndexItem &item) {
11004eeddc0SDimitry Andric   for (const auto &ss : item.m_debug_stream.getSubsectionsArray()) {
11104eeddc0SDimitry Andric     if (ss.kind() != DebugSubsectionKind::InlineeLines)
11204eeddc0SDimitry Andric       continue;
11304eeddc0SDimitry Andric 
11404eeddc0SDimitry Andric     DebugInlineeLinesSubsectionRef inlinee_lines;
11504eeddc0SDimitry Andric     llvm::BinaryStreamReader reader(ss.getRecordData());
11604eeddc0SDimitry Andric     if (llvm::Error error = inlinee_lines.initialize(reader)) {
11704eeddc0SDimitry Andric       consumeError(std::move(error));
11804eeddc0SDimitry Andric       continue;
11904eeddc0SDimitry Andric     }
12004eeddc0SDimitry Andric 
12104eeddc0SDimitry Andric     for (const InlineeSourceLine &Line : inlinee_lines) {
12204eeddc0SDimitry Andric       item.m_inline_map[Line.Header->Inlinee] = Line;
12304eeddc0SDimitry Andric     }
12404eeddc0SDimitry Andric   }
12504eeddc0SDimitry Andric }
12604eeddc0SDimitry Andric 
CompilandIndexItem(PdbCompilandId id,llvm::pdb::ModuleDebugStreamRef debug_stream,llvm::pdb::DbiModuleDescriptor descriptor)1270b57cec5SDimitry Andric CompilandIndexItem::CompilandIndexItem(
1280b57cec5SDimitry Andric     PdbCompilandId id, llvm::pdb::ModuleDebugStreamRef debug_stream,
1290b57cec5SDimitry Andric     llvm::pdb::DbiModuleDescriptor descriptor)
1300b57cec5SDimitry Andric     : m_id(id), m_debug_stream(std::move(debug_stream)),
1310b57cec5SDimitry Andric       m_module_descriptor(std::move(descriptor)) {}
1320b57cec5SDimitry Andric 
GetOrCreateCompiland(uint16_t modi)1330b57cec5SDimitry Andric CompilandIndexItem &CompileUnitIndex::GetOrCreateCompiland(uint16_t modi) {
1340b57cec5SDimitry Andric   auto result = m_comp_units.try_emplace(modi, nullptr);
1350b57cec5SDimitry Andric   if (!result.second)
1360b57cec5SDimitry Andric     return *result.first->second;
1370b57cec5SDimitry Andric 
1380b57cec5SDimitry Andric   // Find the module list and load its debug information stream and cache it
1390b57cec5SDimitry Andric   // since we need to use it for almost all interesting operations.
1400b57cec5SDimitry Andric   const DbiModuleList &modules = m_index.dbi().modules();
1410b57cec5SDimitry Andric   llvm::pdb::DbiModuleDescriptor descriptor = modules.getModuleDescriptor(modi);
1420b57cec5SDimitry Andric   uint16_t stream = descriptor.getModuleStreamIndex();
1430b57cec5SDimitry Andric   std::unique_ptr<llvm::msf::MappedBlockStream> stream_data =
1440b57cec5SDimitry Andric       m_index.pdb().createIndexedStream(stream);
1450b57cec5SDimitry Andric 
1460b57cec5SDimitry Andric 
1470b57cec5SDimitry Andric   std::unique_ptr<CompilandIndexItem>& cci = result.first->second;
1480b57cec5SDimitry Andric 
1490b57cec5SDimitry Andric   if (!stream_data) {
1500b57cec5SDimitry Andric     llvm::pdb::ModuleDebugStreamRef debug_stream(descriptor, nullptr);
1519dba64beSDimitry Andric     cci = std::make_unique<CompilandIndexItem>(PdbCompilandId{ modi }, debug_stream, std::move(descriptor));
1520b57cec5SDimitry Andric     return *cci;
1530b57cec5SDimitry Andric   }
1540b57cec5SDimitry Andric 
1550b57cec5SDimitry Andric   llvm::pdb::ModuleDebugStreamRef debug_stream(descriptor,
1560b57cec5SDimitry Andric                                                std::move(stream_data));
1570b57cec5SDimitry Andric 
1580b57cec5SDimitry Andric   cantFail(debug_stream.reload());
1590b57cec5SDimitry Andric 
1609dba64beSDimitry Andric   cci = std::make_unique<CompilandIndexItem>(
1610b57cec5SDimitry Andric       PdbCompilandId{modi}, std::move(debug_stream), std::move(descriptor));
1620b57cec5SDimitry Andric   ParseExtendedInfo(m_index, *cci);
16304eeddc0SDimitry Andric   ParseInlineeLineTableForCompileUnit(*cci);
1640b57cec5SDimitry Andric 
16506c3fb27SDimitry Andric   auto strings = m_index.pdb().getStringTable();
16606c3fb27SDimitry Andric   if (strings) {
16781ad6265SDimitry Andric     cci->m_strings.initialize(cci->m_debug_stream.getSubsectionsArray());
16806c3fb27SDimitry Andric     cci->m_strings.setStrings(strings->getStringTable());
16906c3fb27SDimitry Andric   } else {
17006c3fb27SDimitry Andric     consumeError(strings.takeError());
17106c3fb27SDimitry Andric   }
1720b57cec5SDimitry Andric 
1730b57cec5SDimitry Andric   // We want the main source file to always comes first.  Note that we can't
1740b57cec5SDimitry Andric   // just push_back the main file onto the front because `GetMainSourceFile`
1750b57cec5SDimitry Andric   // computes it in such a way that it doesn't own the resulting memory.  So we
1760b57cec5SDimitry Andric   // have to iterate the module file list comparing each one to the main file
1770b57cec5SDimitry Andric   // name until we find it, and we can cache that one since the memory is backed
1780b57cec5SDimitry Andric   // by a contiguous chunk inside the mapped PDB.
1790b57cec5SDimitry Andric   llvm::SmallString<64> main_file = GetMainSourceFile(*cci);
1805ffd83dbSDimitry Andric   std::string s = std::string(main_file.str());
1810b57cec5SDimitry Andric   llvm::sys::path::native(main_file);
1820b57cec5SDimitry Andric 
1830b57cec5SDimitry Andric   uint32_t file_count = modules.getSourceFileCount(modi);
1840b57cec5SDimitry Andric   cci->m_file_list.reserve(file_count);
1850b57cec5SDimitry Andric   bool found_main_file = false;
1860b57cec5SDimitry Andric   for (llvm::StringRef file : modules.source_files(modi)) {
1870b57cec5SDimitry Andric     if (!found_main_file && IsMainFile(main_file, file)) {
1880b57cec5SDimitry Andric       cci->m_file_list.insert(cci->m_file_list.begin(), file);
1890b57cec5SDimitry Andric       found_main_file = true;
1900b57cec5SDimitry Andric       continue;
1910b57cec5SDimitry Andric     }
1920b57cec5SDimitry Andric     cci->m_file_list.push_back(file);
1930b57cec5SDimitry Andric   }
1940b57cec5SDimitry Andric 
1950b57cec5SDimitry Andric   return *cci;
1960b57cec5SDimitry Andric }
1970b57cec5SDimitry Andric 
GetCompiland(uint16_t modi) const1980b57cec5SDimitry Andric const CompilandIndexItem *CompileUnitIndex::GetCompiland(uint16_t modi) const {
1990b57cec5SDimitry Andric   auto iter = m_comp_units.find(modi);
2000b57cec5SDimitry Andric   if (iter == m_comp_units.end())
2010b57cec5SDimitry Andric     return nullptr;
2020b57cec5SDimitry Andric   return iter->second.get();
2030b57cec5SDimitry Andric }
2040b57cec5SDimitry Andric 
GetCompiland(uint16_t modi)2050b57cec5SDimitry Andric CompilandIndexItem *CompileUnitIndex::GetCompiland(uint16_t modi) {
2060b57cec5SDimitry Andric   auto iter = m_comp_units.find(modi);
2070b57cec5SDimitry Andric   if (iter == m_comp_units.end())
2080b57cec5SDimitry Andric     return nullptr;
2090b57cec5SDimitry Andric   return iter->second.get();
2100b57cec5SDimitry Andric }
2110b57cec5SDimitry Andric 
2120b57cec5SDimitry Andric llvm::SmallString<64>
GetMainSourceFile(const CompilandIndexItem & item) const2130b57cec5SDimitry Andric CompileUnitIndex::GetMainSourceFile(const CompilandIndexItem &item) const {
2140b57cec5SDimitry Andric   // LF_BUILDINFO contains a list of arg indices which point to LF_STRING_ID
2150b57cec5SDimitry Andric   // records in the IPI stream.  The order of the arg indices is as follows:
2160b57cec5SDimitry Andric   // [0] - working directory where compiler was invoked.
2170b57cec5SDimitry Andric   // [1] - absolute path to compiler binary
2180b57cec5SDimitry Andric   // [2] - source file name
2190b57cec5SDimitry Andric   // [3] - path to compiler generated PDB (the /Zi PDB, although this entry gets
2200b57cec5SDimitry Andric   //       added even when using /Z7)
2210b57cec5SDimitry Andric   // [4] - full command line invocation.
2220b57cec5SDimitry Andric   //
2230b57cec5SDimitry Andric   // We need to form the path [0]\[2] to generate the full path to the main
2240b57cec5SDimitry Andric   // file.source
2250b57cec5SDimitry Andric   if (item.m_build_info.size() < 3)
2260b57cec5SDimitry Andric     return {""};
2270b57cec5SDimitry Andric 
2280b57cec5SDimitry Andric   LazyRandomTypeCollection &types = m_index.ipi().typeCollection();
2290b57cec5SDimitry Andric 
2300b57cec5SDimitry Andric   StringIdRecord working_dir;
2310b57cec5SDimitry Andric   StringIdRecord file_name;
2320b57cec5SDimitry Andric   CVType dir_cvt = types.getType(item.m_build_info[0]);
2330b57cec5SDimitry Andric   CVType file_cvt = types.getType(item.m_build_info[2]);
2340b57cec5SDimitry Andric   llvm::cantFail(
2350b57cec5SDimitry Andric       TypeDeserializer::deserializeAs<StringIdRecord>(dir_cvt, working_dir));
2360b57cec5SDimitry Andric   llvm::cantFail(
2370b57cec5SDimitry Andric       TypeDeserializer::deserializeAs<StringIdRecord>(file_cvt, file_name));
2380b57cec5SDimitry Andric 
2395f757f3fSDimitry Andric   llvm::sys::path::Style style = working_dir.String.starts_with("/")
2400b57cec5SDimitry Andric                                      ? llvm::sys::path::Style::posix
2410b57cec5SDimitry Andric                                      : llvm::sys::path::Style::windows;
2420b57cec5SDimitry Andric   if (llvm::sys::path::is_absolute(file_name.String, style))
2430b57cec5SDimitry Andric     return file_name.String;
2440b57cec5SDimitry Andric 
2450b57cec5SDimitry Andric   llvm::SmallString<64> absolute_path = working_dir.String;
2460b57cec5SDimitry Andric   llvm::sys::path::append(absolute_path, file_name.String);
2470b57cec5SDimitry Andric   return absolute_path;
2480b57cec5SDimitry Andric }
249