10b57cec5SDimitry Andric //===-- CompileUnitIndex.cpp ------------------------------------*- C++ -*-===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric 90b57cec5SDimitry Andric #include "CompileUnitIndex.h" 100b57cec5SDimitry Andric 110b57cec5SDimitry Andric #include "PdbIndex.h" 120b57cec5SDimitry Andric #include "PdbUtil.h" 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h" 150b57cec5SDimitry Andric #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h" 160b57cec5SDimitry Andric #include "llvm/DebugInfo/CodeView/TypeDeserializer.h" 170b57cec5SDimitry Andric #include "llvm/DebugInfo/MSF/MappedBlockStream.h" 180b57cec5SDimitry Andric #include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h" 190b57cec5SDimitry Andric #include "llvm/DebugInfo/PDB/Native/DbiStream.h" 200b57cec5SDimitry Andric #include "llvm/DebugInfo/PDB/Native/InfoStream.h" 210b57cec5SDimitry Andric #include "llvm/DebugInfo/PDB/Native/ModuleDebugStream.h" 220b57cec5SDimitry Andric #include "llvm/DebugInfo/PDB/Native/NamedStreamMap.h" 230b57cec5SDimitry Andric #include "llvm/DebugInfo/PDB/Native/TpiStream.h" 240b57cec5SDimitry Andric #include "llvm/Support/Path.h" 250b57cec5SDimitry Andric 260b57cec5SDimitry Andric #include "lldb/Utility/LLDBAssert.h" 270b57cec5SDimitry Andric 280b57cec5SDimitry Andric using namespace lldb; 290b57cec5SDimitry Andric using namespace lldb_private; 300b57cec5SDimitry Andric using namespace lldb_private::npdb; 310b57cec5SDimitry Andric using namespace llvm::codeview; 320b57cec5SDimitry Andric using namespace llvm::pdb; 330b57cec5SDimitry Andric 340b57cec5SDimitry Andric static bool IsMainFile(llvm::StringRef main, llvm::StringRef other) { 350b57cec5SDimitry Andric if (main == other) 360b57cec5SDimitry Andric return true; 370b57cec5SDimitry Andric 380b57cec5SDimitry Andric // If the files refer to the local file system, we can just ask the file 390b57cec5SDimitry Andric // system if they're equivalent. But if the source isn't present on disk 400b57cec5SDimitry Andric // then we still want to try. 410b57cec5SDimitry Andric if (llvm::sys::fs::equivalent(main, other)) 420b57cec5SDimitry Andric return true; 430b57cec5SDimitry Andric 440b57cec5SDimitry Andric llvm::SmallString<64> normalized(other); 450b57cec5SDimitry Andric llvm::sys::path::native(normalized); 460b57cec5SDimitry Andric return main.equals_lower(normalized); 470b57cec5SDimitry Andric } 480b57cec5SDimitry Andric 490b57cec5SDimitry Andric static void ParseCompile3(const CVSymbol &sym, CompilandIndexItem &cci) { 500b57cec5SDimitry Andric cci.m_compile_opts.emplace(); 510b57cec5SDimitry Andric llvm::cantFail( 520b57cec5SDimitry Andric SymbolDeserializer::deserializeAs<Compile3Sym>(sym, *cci.m_compile_opts)); 530b57cec5SDimitry Andric } 540b57cec5SDimitry Andric 550b57cec5SDimitry Andric static void ParseObjname(const CVSymbol &sym, CompilandIndexItem &cci) { 560b57cec5SDimitry Andric cci.m_obj_name.emplace(); 570b57cec5SDimitry Andric llvm::cantFail( 580b57cec5SDimitry Andric SymbolDeserializer::deserializeAs<ObjNameSym>(sym, *cci.m_obj_name)); 590b57cec5SDimitry Andric } 600b57cec5SDimitry Andric 610b57cec5SDimitry Andric static void ParseBuildInfo(PdbIndex &index, const CVSymbol &sym, 620b57cec5SDimitry Andric CompilandIndexItem &cci) { 630b57cec5SDimitry Andric BuildInfoSym bis(SymbolRecordKind::BuildInfoSym); 640b57cec5SDimitry Andric llvm::cantFail(SymbolDeserializer::deserializeAs<BuildInfoSym>(sym, bis)); 650b57cec5SDimitry Andric 660b57cec5SDimitry Andric // S_BUILDINFO just points to an LF_BUILDINFO in the IPI stream. Let's do 670b57cec5SDimitry Andric // a little extra work to pull out the LF_BUILDINFO. 680b57cec5SDimitry Andric LazyRandomTypeCollection &types = index.ipi().typeCollection(); 690b57cec5SDimitry Andric llvm::Optional<CVType> cvt = types.tryGetType(bis.BuildId); 700b57cec5SDimitry Andric 710b57cec5SDimitry Andric if (!cvt || cvt->kind() != LF_BUILDINFO) 720b57cec5SDimitry Andric return; 730b57cec5SDimitry Andric 740b57cec5SDimitry Andric BuildInfoRecord bir; 750b57cec5SDimitry Andric llvm::cantFail(TypeDeserializer::deserializeAs<BuildInfoRecord>(*cvt, bir)); 760b57cec5SDimitry Andric cci.m_build_info.assign(bir.ArgIndices.begin(), bir.ArgIndices.end()); 770b57cec5SDimitry Andric } 780b57cec5SDimitry Andric 790b57cec5SDimitry Andric static void ParseExtendedInfo(PdbIndex &index, CompilandIndexItem &item) { 800b57cec5SDimitry Andric const CVSymbolArray &syms = item.m_debug_stream.getSymbolArray(); 810b57cec5SDimitry Andric 820b57cec5SDimitry Andric // This is a private function, it shouldn't be called if the information 830b57cec5SDimitry Andric // has already been parsed. 840b57cec5SDimitry Andric lldbassert(!item.m_obj_name); 850b57cec5SDimitry Andric lldbassert(!item.m_compile_opts); 860b57cec5SDimitry Andric lldbassert(item.m_build_info.empty()); 870b57cec5SDimitry Andric 880b57cec5SDimitry Andric // We're looking for 3 things. S_COMPILE3, S_OBJNAME, and S_BUILDINFO. 890b57cec5SDimitry Andric int found = 0; 900b57cec5SDimitry Andric for (const CVSymbol &sym : syms) { 910b57cec5SDimitry Andric switch (sym.kind()) { 920b57cec5SDimitry Andric case S_COMPILE3: 930b57cec5SDimitry Andric ParseCompile3(sym, item); 940b57cec5SDimitry Andric break; 950b57cec5SDimitry Andric case S_OBJNAME: 960b57cec5SDimitry Andric ParseObjname(sym, item); 970b57cec5SDimitry Andric break; 980b57cec5SDimitry Andric case S_BUILDINFO: 990b57cec5SDimitry Andric ParseBuildInfo(index, sym, item); 1000b57cec5SDimitry Andric break; 1010b57cec5SDimitry Andric default: 1020b57cec5SDimitry Andric continue; 1030b57cec5SDimitry Andric } 1040b57cec5SDimitry Andric if (++found >= 3) 1050b57cec5SDimitry Andric break; 1060b57cec5SDimitry Andric } 1070b57cec5SDimitry Andric } 1080b57cec5SDimitry Andric 1090b57cec5SDimitry Andric CompilandIndexItem::CompilandIndexItem( 1100b57cec5SDimitry Andric PdbCompilandId id, llvm::pdb::ModuleDebugStreamRef debug_stream, 1110b57cec5SDimitry Andric llvm::pdb::DbiModuleDescriptor descriptor) 1120b57cec5SDimitry Andric : m_id(id), m_debug_stream(std::move(debug_stream)), 1130b57cec5SDimitry Andric m_module_descriptor(std::move(descriptor)) {} 1140b57cec5SDimitry Andric 1150b57cec5SDimitry Andric CompilandIndexItem &CompileUnitIndex::GetOrCreateCompiland(uint16_t modi) { 1160b57cec5SDimitry Andric auto result = m_comp_units.try_emplace(modi, nullptr); 1170b57cec5SDimitry Andric if (!result.second) 1180b57cec5SDimitry Andric return *result.first->second; 1190b57cec5SDimitry Andric 1200b57cec5SDimitry Andric // Find the module list and load its debug information stream and cache it 1210b57cec5SDimitry Andric // since we need to use it for almost all interesting operations. 1220b57cec5SDimitry Andric const DbiModuleList &modules = m_index.dbi().modules(); 1230b57cec5SDimitry Andric llvm::pdb::DbiModuleDescriptor descriptor = modules.getModuleDescriptor(modi); 1240b57cec5SDimitry Andric uint16_t stream = descriptor.getModuleStreamIndex(); 1250b57cec5SDimitry Andric std::unique_ptr<llvm::msf::MappedBlockStream> stream_data = 1260b57cec5SDimitry Andric m_index.pdb().createIndexedStream(stream); 1270b57cec5SDimitry Andric 1280b57cec5SDimitry Andric 1290b57cec5SDimitry Andric std::unique_ptr<CompilandIndexItem>& cci = result.first->second; 1300b57cec5SDimitry Andric 1310b57cec5SDimitry Andric if (!stream_data) { 1320b57cec5SDimitry Andric llvm::pdb::ModuleDebugStreamRef debug_stream(descriptor, nullptr); 1330b57cec5SDimitry Andric cci = llvm::make_unique<CompilandIndexItem>(PdbCompilandId{ modi }, debug_stream, std::move(descriptor)); 1340b57cec5SDimitry Andric return *cci; 1350b57cec5SDimitry Andric } 1360b57cec5SDimitry Andric 1370b57cec5SDimitry Andric llvm::pdb::ModuleDebugStreamRef debug_stream(descriptor, 1380b57cec5SDimitry Andric std::move(stream_data)); 1390b57cec5SDimitry Andric 1400b57cec5SDimitry Andric cantFail(debug_stream.reload()); 1410b57cec5SDimitry Andric 1420b57cec5SDimitry Andric cci = llvm::make_unique<CompilandIndexItem>( 1430b57cec5SDimitry Andric PdbCompilandId{modi}, std::move(debug_stream), std::move(descriptor)); 1440b57cec5SDimitry Andric ParseExtendedInfo(m_index, *cci); 1450b57cec5SDimitry Andric 1460b57cec5SDimitry Andric cci->m_strings.initialize(debug_stream.getSubsectionsArray()); 1470b57cec5SDimitry Andric PDBStringTable &strings = cantFail(m_index.pdb().getStringTable()); 1480b57cec5SDimitry Andric cci->m_strings.setStrings(strings.getStringTable()); 1490b57cec5SDimitry Andric 1500b57cec5SDimitry Andric // We want the main source file to always comes first. Note that we can't 1510b57cec5SDimitry Andric // just push_back the main file onto the front because `GetMainSourceFile` 1520b57cec5SDimitry Andric // computes it in such a way that it doesn't own the resulting memory. So we 1530b57cec5SDimitry Andric // have to iterate the module file list comparing each one to the main file 1540b57cec5SDimitry Andric // name until we find it, and we can cache that one since the memory is backed 1550b57cec5SDimitry Andric // by a contiguous chunk inside the mapped PDB. 1560b57cec5SDimitry Andric llvm::SmallString<64> main_file = GetMainSourceFile(*cci); 1570b57cec5SDimitry Andric std::string s = main_file.str(); 1580b57cec5SDimitry Andric llvm::sys::path::native(main_file); 1590b57cec5SDimitry Andric 1600b57cec5SDimitry Andric uint32_t file_count = modules.getSourceFileCount(modi); 1610b57cec5SDimitry Andric cci->m_file_list.reserve(file_count); 1620b57cec5SDimitry Andric bool found_main_file = false; 1630b57cec5SDimitry Andric for (llvm::StringRef file : modules.source_files(modi)) { 1640b57cec5SDimitry Andric if (!found_main_file && IsMainFile(main_file, file)) { 1650b57cec5SDimitry Andric cci->m_file_list.insert(cci->m_file_list.begin(), file); 1660b57cec5SDimitry Andric found_main_file = true; 1670b57cec5SDimitry Andric continue; 1680b57cec5SDimitry Andric } 1690b57cec5SDimitry Andric cci->m_file_list.push_back(file); 1700b57cec5SDimitry Andric } 1710b57cec5SDimitry Andric 1720b57cec5SDimitry Andric return *cci; 1730b57cec5SDimitry Andric } 1740b57cec5SDimitry Andric 1750b57cec5SDimitry Andric const CompilandIndexItem *CompileUnitIndex::GetCompiland(uint16_t modi) const { 1760b57cec5SDimitry Andric auto iter = m_comp_units.find(modi); 1770b57cec5SDimitry Andric if (iter == m_comp_units.end()) 1780b57cec5SDimitry Andric return nullptr; 1790b57cec5SDimitry Andric return iter->second.get(); 1800b57cec5SDimitry Andric } 1810b57cec5SDimitry Andric 1820b57cec5SDimitry Andric CompilandIndexItem *CompileUnitIndex::GetCompiland(uint16_t modi) { 1830b57cec5SDimitry Andric auto iter = m_comp_units.find(modi); 1840b57cec5SDimitry Andric if (iter == m_comp_units.end()) 1850b57cec5SDimitry Andric return nullptr; 1860b57cec5SDimitry Andric return iter->second.get(); 1870b57cec5SDimitry Andric } 1880b57cec5SDimitry Andric 1890b57cec5SDimitry Andric llvm::SmallString<64> 1900b57cec5SDimitry Andric CompileUnitIndex::GetMainSourceFile(const CompilandIndexItem &item) const { 1910b57cec5SDimitry Andric // LF_BUILDINFO contains a list of arg indices which point to LF_STRING_ID 1920b57cec5SDimitry Andric // records in the IPI stream. The order of the arg indices is as follows: 1930b57cec5SDimitry Andric // [0] - working directory where compiler was invoked. 1940b57cec5SDimitry Andric // [1] - absolute path to compiler binary 1950b57cec5SDimitry Andric // [2] - source file name 1960b57cec5SDimitry Andric // [3] - path to compiler generated PDB (the /Zi PDB, although this entry gets 1970b57cec5SDimitry Andric // added even when using /Z7) 1980b57cec5SDimitry Andric // [4] - full command line invocation. 1990b57cec5SDimitry Andric // 2000b57cec5SDimitry Andric // We need to form the path [0]\[2] to generate the full path to the main 2010b57cec5SDimitry Andric // file.source 2020b57cec5SDimitry Andric if (item.m_build_info.size() < 3) 2030b57cec5SDimitry Andric return {""}; 2040b57cec5SDimitry Andric 2050b57cec5SDimitry Andric LazyRandomTypeCollection &types = m_index.ipi().typeCollection(); 2060b57cec5SDimitry Andric 2070b57cec5SDimitry Andric StringIdRecord working_dir; 2080b57cec5SDimitry Andric StringIdRecord file_name; 2090b57cec5SDimitry Andric CVType dir_cvt = types.getType(item.m_build_info[0]); 2100b57cec5SDimitry Andric CVType file_cvt = types.getType(item.m_build_info[2]); 2110b57cec5SDimitry Andric llvm::cantFail( 2120b57cec5SDimitry Andric TypeDeserializer::deserializeAs<StringIdRecord>(dir_cvt, working_dir)); 2130b57cec5SDimitry Andric llvm::cantFail( 2140b57cec5SDimitry Andric TypeDeserializer::deserializeAs<StringIdRecord>(file_cvt, file_name)); 2150b57cec5SDimitry Andric 2160b57cec5SDimitry Andric llvm::sys::path::Style style = working_dir.String.startswith("/") 2170b57cec5SDimitry Andric ? llvm::sys::path::Style::posix 2180b57cec5SDimitry Andric : llvm::sys::path::Style::windows; 2190b57cec5SDimitry Andric if (llvm::sys::path::is_absolute(file_name.String, style)) 2200b57cec5SDimitry Andric return file_name.String; 2210b57cec5SDimitry Andric 2220b57cec5SDimitry Andric llvm::SmallString<64> absolute_path = working_dir.String; 2230b57cec5SDimitry Andric llvm::sys::path::append(absolute_path, file_name.String); 2240b57cec5SDimitry Andric return absolute_path; 2250b57cec5SDimitry Andric } 226