1 //===-- CompileUnitIndex.cpp ----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "CompileUnitIndex.h"
10 
11 #include "PdbIndex.h"
12 #include "PdbUtil.h"
13 
14 #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
15 #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
16 #include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
17 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
18 #include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
19 #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
20 #include "llvm/DebugInfo/PDB/Native/InfoStream.h"
21 #include "llvm/DebugInfo/PDB/Native/ModuleDebugStream.h"
22 #include "llvm/DebugInfo/PDB/Native/NamedStreamMap.h"
23 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
24 #include "llvm/Support/Path.h"
25 
26 #include "lldb/Utility/LLDBAssert.h"
27 
28 using namespace lldb;
29 using namespace lldb_private;
30 using namespace lldb_private::npdb;
31 using namespace llvm::codeview;
32 using namespace llvm::pdb;
33 
34 static bool IsMainFile(llvm::StringRef main, llvm::StringRef other) {
35   if (main == other)
36     return true;
37 
38   // If the files refer to the local file system, we can just ask the file
39   // system if they're equivalent.  But if the source isn't present on disk
40   // then we still want to try.
41   if (llvm::sys::fs::equivalent(main, other))
42     return true;
43 
44   llvm::SmallString<64> normalized(other);
45   llvm::sys::path::native(normalized);
46   return main.equals_insensitive(normalized);
47 }
48 
49 static void ParseCompile3(const CVSymbol &sym, CompilandIndexItem &cci) {
50   cci.m_compile_opts.emplace();
51   llvm::cantFail(
52       SymbolDeserializer::deserializeAs<Compile3Sym>(sym, *cci.m_compile_opts));
53 }
54 
55 static void ParseObjname(const CVSymbol &sym, CompilandIndexItem &cci) {
56   cci.m_obj_name.emplace();
57   llvm::cantFail(
58       SymbolDeserializer::deserializeAs<ObjNameSym>(sym, *cci.m_obj_name));
59 }
60 
61 static void ParseBuildInfo(PdbIndex &index, const CVSymbol &sym,
62                            CompilandIndexItem &cci) {
63   BuildInfoSym bis(SymbolRecordKind::BuildInfoSym);
64   llvm::cantFail(SymbolDeserializer::deserializeAs<BuildInfoSym>(sym, bis));
65 
66   // S_BUILDINFO just points to an LF_BUILDINFO in the IPI stream.  Let's do
67   // a little extra work to pull out the LF_BUILDINFO.
68   LazyRandomTypeCollection &types = index.ipi().typeCollection();
69   std::optional<CVType> cvt = types.tryGetType(bis.BuildId);
70 
71   if (!cvt || cvt->kind() != LF_BUILDINFO)
72     return;
73 
74   BuildInfoRecord bir;
75   llvm::cantFail(TypeDeserializer::deserializeAs<BuildInfoRecord>(*cvt, bir));
76   cci.m_build_info.assign(bir.ArgIndices.begin(), bir.ArgIndices.end());
77 }
78 
79 static void ParseExtendedInfo(PdbIndex &index, CompilandIndexItem &item) {
80   const CVSymbolArray &syms = item.m_debug_stream.getSymbolArray();
81 
82   // This is a private function, it shouldn't be called if the information
83   // has already been parsed.
84   lldbassert(!item.m_obj_name);
85   lldbassert(!item.m_compile_opts);
86   lldbassert(item.m_build_info.empty());
87 
88   // We're looking for 3 things.  S_COMPILE3, S_OBJNAME, and S_BUILDINFO.
89   int found = 0;
90   for (const CVSymbol &sym : syms) {
91     switch (sym.kind()) {
92     case S_COMPILE3:
93       ParseCompile3(sym, item);
94       break;
95     case S_OBJNAME:
96       ParseObjname(sym, item);
97       break;
98     case S_BUILDINFO:
99       ParseBuildInfo(index, sym, item);
100       break;
101     default:
102       continue;
103     }
104     if (++found >= 3)
105       break;
106   }
107 }
108 
109 static void ParseInlineeLineTableForCompileUnit(CompilandIndexItem &item) {
110   for (const auto &ss : item.m_debug_stream.getSubsectionsArray()) {
111     if (ss.kind() != DebugSubsectionKind::InlineeLines)
112       continue;
113 
114     DebugInlineeLinesSubsectionRef inlinee_lines;
115     llvm::BinaryStreamReader reader(ss.getRecordData());
116     if (llvm::Error error = inlinee_lines.initialize(reader)) {
117       consumeError(std::move(error));
118       continue;
119     }
120 
121     for (const InlineeSourceLine &Line : inlinee_lines) {
122       item.m_inline_map[Line.Header->Inlinee] = Line;
123     }
124   }
125 }
126 
127 CompilandIndexItem::CompilandIndexItem(
128     PdbCompilandId id, llvm::pdb::ModuleDebugStreamRef debug_stream,
129     llvm::pdb::DbiModuleDescriptor descriptor)
130     : m_id(id), m_debug_stream(std::move(debug_stream)),
131       m_module_descriptor(std::move(descriptor)) {}
132 
133 CompilandIndexItem &CompileUnitIndex::GetOrCreateCompiland(uint16_t modi) {
134   auto result = m_comp_units.try_emplace(modi, nullptr);
135   if (!result.second)
136     return *result.first->second;
137 
138   // Find the module list and load its debug information stream and cache it
139   // since we need to use it for almost all interesting operations.
140   const DbiModuleList &modules = m_index.dbi().modules();
141   llvm::pdb::DbiModuleDescriptor descriptor = modules.getModuleDescriptor(modi);
142   uint16_t stream = descriptor.getModuleStreamIndex();
143   std::unique_ptr<llvm::msf::MappedBlockStream> stream_data =
144       m_index.pdb().createIndexedStream(stream);
145 
146 
147   std::unique_ptr<CompilandIndexItem>& cci = result.first->second;
148 
149   if (!stream_data) {
150     llvm::pdb::ModuleDebugStreamRef debug_stream(descriptor, nullptr);
151     cci = std::make_unique<CompilandIndexItem>(PdbCompilandId{ modi }, debug_stream, std::move(descriptor));
152     return *cci;
153   }
154 
155   llvm::pdb::ModuleDebugStreamRef debug_stream(descriptor,
156                                                std::move(stream_data));
157 
158   cantFail(debug_stream.reload());
159 
160   cci = std::make_unique<CompilandIndexItem>(
161       PdbCompilandId{modi}, std::move(debug_stream), std::move(descriptor));
162   ParseExtendedInfo(m_index, *cci);
163   ParseInlineeLineTableForCompileUnit(*cci);
164 
165   auto strings = m_index.pdb().getStringTable();
166   if (strings) {
167     cci->m_strings.initialize(cci->m_debug_stream.getSubsectionsArray());
168     cci->m_strings.setStrings(strings->getStringTable());
169   } else {
170     consumeError(strings.takeError());
171   }
172 
173   // We want the main source file to always comes first.  Note that we can't
174   // just push_back the main file onto the front because `GetMainSourceFile`
175   // computes it in such a way that it doesn't own the resulting memory.  So we
176   // have to iterate the module file list comparing each one to the main file
177   // name until we find it, and we can cache that one since the memory is backed
178   // by a contiguous chunk inside the mapped PDB.
179   llvm::SmallString<64> main_file = GetMainSourceFile(*cci);
180   std::string s = std::string(main_file.str());
181   llvm::sys::path::native(main_file);
182 
183   uint32_t file_count = modules.getSourceFileCount(modi);
184   cci->m_file_list.reserve(file_count);
185   bool found_main_file = false;
186   for (llvm::StringRef file : modules.source_files(modi)) {
187     if (!found_main_file && IsMainFile(main_file, file)) {
188       cci->m_file_list.insert(cci->m_file_list.begin(), file);
189       found_main_file = true;
190       continue;
191     }
192     cci->m_file_list.push_back(file);
193   }
194 
195   return *cci;
196 }
197 
198 const CompilandIndexItem *CompileUnitIndex::GetCompiland(uint16_t modi) const {
199   auto iter = m_comp_units.find(modi);
200   if (iter == m_comp_units.end())
201     return nullptr;
202   return iter->second.get();
203 }
204 
205 CompilandIndexItem *CompileUnitIndex::GetCompiland(uint16_t modi) {
206   auto iter = m_comp_units.find(modi);
207   if (iter == m_comp_units.end())
208     return nullptr;
209   return iter->second.get();
210 }
211 
212 llvm::SmallString<64>
213 CompileUnitIndex::GetMainSourceFile(const CompilandIndexItem &item) const {
214   // LF_BUILDINFO contains a list of arg indices which point to LF_STRING_ID
215   // records in the IPI stream.  The order of the arg indices is as follows:
216   // [0] - working directory where compiler was invoked.
217   // [1] - absolute path to compiler binary
218   // [2] - source file name
219   // [3] - path to compiler generated PDB (the /Zi PDB, although this entry gets
220   //       added even when using /Z7)
221   // [4] - full command line invocation.
222   //
223   // We need to form the path [0]\[2] to generate the full path to the main
224   // file.source
225   if (item.m_build_info.size() < 3)
226     return {""};
227 
228   LazyRandomTypeCollection &types = m_index.ipi().typeCollection();
229 
230   StringIdRecord working_dir;
231   StringIdRecord file_name;
232   CVType dir_cvt = types.getType(item.m_build_info[0]);
233   CVType file_cvt = types.getType(item.m_build_info[2]);
234   llvm::cantFail(
235       TypeDeserializer::deserializeAs<StringIdRecord>(dir_cvt, working_dir));
236   llvm::cantFail(
237       TypeDeserializer::deserializeAs<StringIdRecord>(file_cvt, file_name));
238 
239   llvm::sys::path::Style style = working_dir.String.startswith("/")
240                                      ? llvm::sys::path::Style::posix
241                                      : llvm::sys::path::Style::windows;
242   if (llvm::sys::path::is_absolute(file_name.String, style))
243     return file_name.String;
244 
245   llvm::SmallString<64> absolute_path = working_dir.String;
246   llvm::sys::path::append(absolute_path, file_name.String);
247   return absolute_path;
248 }
249