1 //===- InputFile.cpp ------------------------------------------ *- C++ --*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "llvm/DebugInfo/PDB/Native/InputFile.h"
10
11 #include "llvm/ADT/StringExtras.h"
12 #include "llvm/BinaryFormat/Magic.h"
13 #include "llvm/DebugInfo/CodeView/CodeView.h"
14 #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
15 #include "llvm/DebugInfo/CodeView/StringsAndChecksums.h"
16 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
17 #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
18 #include "llvm/DebugInfo/PDB/Native/FormatUtil.h"
19 #include "llvm/DebugInfo/PDB/Native/LinePrinter.h"
20 #include "llvm/DebugInfo/PDB/Native/NativeSession.h"
21 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
22 #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
23 #include "llvm/DebugInfo/PDB/Native/RawError.h"
24 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
25 #include "llvm/DebugInfo/PDB/PDB.h"
26 #include "llvm/Object/COFF.h"
27 #include "llvm/Support/FileSystem.h"
28 #include "llvm/Support/FormatVariadic.h"
29
30 using namespace llvm;
31 using namespace llvm::codeview;
32 using namespace llvm::object;
33 using namespace llvm::pdb;
34
35 InputFile::InputFile() = default;
36 InputFile::~InputFile() = default;
37
38 Expected<ModuleDebugStreamRef>
getModuleDebugStream(PDBFile & File,StringRef & ModuleName,uint32_t Index)39 llvm::pdb::getModuleDebugStream(PDBFile &File, StringRef &ModuleName,
40 uint32_t Index) {
41 Expected<DbiStream &> DbiOrErr = File.getPDBDbiStream();
42 if (!DbiOrErr)
43 return DbiOrErr.takeError();
44 DbiStream &Dbi = *DbiOrErr;
45 const auto &Modules = Dbi.modules();
46 if (Index >= Modules.getModuleCount())
47 return make_error<RawError>(raw_error_code::index_out_of_bounds,
48 "Invalid module index");
49
50 auto Modi = Modules.getModuleDescriptor(Index);
51
52 ModuleName = Modi.getModuleName();
53
54 uint16_t ModiStream = Modi.getModuleStreamIndex();
55 if (ModiStream == kInvalidStreamIndex)
56 return make_error<RawError>(raw_error_code::no_stream,
57 "Module stream not present");
58
59 auto ModStreamData = File.createIndexedStream(ModiStream);
60
61 ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData));
62 if (auto EC = ModS.reload())
63 return make_error<RawError>(raw_error_code::corrupt_file,
64 "Invalid module stream");
65
66 return std::move(ModS);
67 }
68
getModuleDebugStream(PDBFile & File,uint32_t Index)69 Expected<ModuleDebugStreamRef> llvm::pdb::getModuleDebugStream(PDBFile &File,
70 uint32_t Index) {
71 Expected<DbiStream &> DbiOrErr = File.getPDBDbiStream();
72 if (!DbiOrErr)
73 return DbiOrErr.takeError();
74 DbiStream &Dbi = *DbiOrErr;
75 const auto &Modules = Dbi.modules();
76 auto Modi = Modules.getModuleDescriptor(Index);
77
78 uint16_t ModiStream = Modi.getModuleStreamIndex();
79 if (ModiStream == kInvalidStreamIndex)
80 return make_error<RawError>(raw_error_code::no_stream,
81 "Module stream not present");
82
83 auto ModStreamData = File.createIndexedStream(ModiStream);
84
85 ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData));
86 if (Error Err = ModS.reload())
87 return make_error<RawError>(raw_error_code::corrupt_file,
88 "Invalid module stream");
89
90 return std::move(ModS);
91 }
92
isCodeViewDebugSubsection(object::SectionRef Section,StringRef Name,BinaryStreamReader & Reader)93 static inline bool isCodeViewDebugSubsection(object::SectionRef Section,
94 StringRef Name,
95 BinaryStreamReader &Reader) {
96 if (Expected<StringRef> NameOrErr = Section.getName()) {
97 if (*NameOrErr != Name)
98 return false;
99 } else {
100 consumeError(NameOrErr.takeError());
101 return false;
102 }
103
104 Expected<StringRef> ContentsOrErr = Section.getContents();
105 if (!ContentsOrErr) {
106 consumeError(ContentsOrErr.takeError());
107 return false;
108 }
109
110 Reader = BinaryStreamReader(*ContentsOrErr, llvm::endianness::little);
111 uint32_t Magic;
112 if (Reader.bytesRemaining() < sizeof(uint32_t))
113 return false;
114 cantFail(Reader.readInteger(Magic));
115 if (Magic != COFF::DEBUG_SECTION_MAGIC)
116 return false;
117 return true;
118 }
119
isDebugSSection(object::SectionRef Section,DebugSubsectionArray & Subsections)120 static inline bool isDebugSSection(object::SectionRef Section,
121 DebugSubsectionArray &Subsections) {
122 BinaryStreamReader Reader;
123 if (!isCodeViewDebugSubsection(Section, ".debug$S", Reader))
124 return false;
125
126 cantFail(Reader.readArray(Subsections, Reader.bytesRemaining()));
127 return true;
128 }
129
isDebugTSection(SectionRef Section,CVTypeArray & Types)130 static bool isDebugTSection(SectionRef Section, CVTypeArray &Types) {
131 BinaryStreamReader Reader;
132 if (!isCodeViewDebugSubsection(Section, ".debug$T", Reader) &&
133 !isCodeViewDebugSubsection(Section, ".debug$P", Reader))
134 return false;
135 cantFail(Reader.readArray(Types, Reader.bytesRemaining()));
136 return true;
137 }
138
formatChecksumKind(FileChecksumKind Kind)139 static std::string formatChecksumKind(FileChecksumKind Kind) {
140 switch (Kind) {
141 RETURN_CASE(FileChecksumKind, None, "None");
142 RETURN_CASE(FileChecksumKind, MD5, "MD5");
143 RETURN_CASE(FileChecksumKind, SHA1, "SHA-1");
144 RETURN_CASE(FileChecksumKind, SHA256, "SHA-256");
145 }
146 return formatUnknownEnum(Kind);
147 }
148
149 template <typename... Args>
formatInternal(LinePrinter & Printer,bool Append,Args &&...args)150 static void formatInternal(LinePrinter &Printer, bool Append, Args &&...args) {
151 if (Append)
152 Printer.format(std::forward<Args>(args)...);
153 else
154 Printer.formatLine(std::forward<Args>(args)...);
155 }
156
SymbolGroup(InputFile * File,uint32_t GroupIndex)157 SymbolGroup::SymbolGroup(InputFile *File, uint32_t GroupIndex) : File(File) {
158 if (!File)
159 return;
160
161 if (File->isPdb())
162 initializeForPdb(GroupIndex);
163 else {
164 Name = ".debug$S";
165 uint32_t I = 0;
166 for (const auto &S : File->obj().sections()) {
167 DebugSubsectionArray SS;
168 if (!isDebugSSection(S, SS))
169 continue;
170
171 if (!SC.hasChecksums() || !SC.hasStrings())
172 SC.initialize(SS);
173
174 if (I == GroupIndex)
175 Subsections = SS;
176
177 if (SC.hasChecksums() && SC.hasStrings())
178 break;
179 }
180 rebuildChecksumMap();
181 }
182 }
183
name() const184 StringRef SymbolGroup::name() const { return Name; }
185
updateDebugS(const codeview::DebugSubsectionArray & SS)186 void SymbolGroup::updateDebugS(const codeview::DebugSubsectionArray &SS) {
187 Subsections = SS;
188 }
189
updatePdbModi(uint32_t Modi)190 void SymbolGroup::updatePdbModi(uint32_t Modi) { initializeForPdb(Modi); }
191
initializeForPdb(uint32_t Modi)192 void SymbolGroup::initializeForPdb(uint32_t Modi) {
193 assert(File && File->isPdb());
194
195 // PDB always uses the same string table, but each module has its own
196 // checksums. So we only set the strings if they're not already set.
197 if (!SC.hasStrings()) {
198 auto StringTable = File->pdb().getStringTable();
199 if (StringTable)
200 SC.setStrings(StringTable->getStringTable());
201 else
202 consumeError(StringTable.takeError());
203 }
204
205 SC.resetChecksums();
206 auto MDS = getModuleDebugStream(File->pdb(), Name, Modi);
207 if (!MDS) {
208 consumeError(MDS.takeError());
209 return;
210 }
211
212 DebugStream = std::make_shared<ModuleDebugStreamRef>(std::move(*MDS));
213 Subsections = DebugStream->getSubsectionsArray();
214 SC.initialize(Subsections);
215 rebuildChecksumMap();
216 }
217
rebuildChecksumMap()218 void SymbolGroup::rebuildChecksumMap() {
219 if (!SC.hasChecksums())
220 return;
221
222 for (const auto &Entry : SC.checksums()) {
223 auto S = SC.strings().getString(Entry.FileNameOffset);
224 if (!S)
225 continue;
226 ChecksumsByFile[*S] = Entry;
227 }
228 }
229
getPdbModuleStream() const230 const ModuleDebugStreamRef &SymbolGroup::getPdbModuleStream() const {
231 assert(File && File->isPdb() && DebugStream);
232 return *DebugStream;
233 }
234
getNameFromStringTable(uint32_t Offset) const235 Expected<StringRef> SymbolGroup::getNameFromStringTable(uint32_t Offset) const {
236 return SC.strings().getString(Offset);
237 }
238
getNameFromChecksums(uint32_t Offset) const239 Expected<StringRef> SymbolGroup::getNameFromChecksums(uint32_t Offset) const {
240 StringRef Name;
241 if (!SC.hasChecksums()) {
242 return std::move(Name);
243 }
244
245 auto Iter = SC.checksums().getArray().at(Offset);
246 if (Iter == SC.checksums().getArray().end()) {
247 return std::move(Name);
248 }
249
250 uint32_t FO = Iter->FileNameOffset;
251 auto ExpectedFile = getNameFromStringTable(FO);
252 if (!ExpectedFile) {
253 return std::move(Name);
254 }
255
256 return *ExpectedFile;
257 }
258
formatFromFileName(LinePrinter & Printer,StringRef File,bool Append) const259 void SymbolGroup::formatFromFileName(LinePrinter &Printer, StringRef File,
260 bool Append) const {
261 auto FC = ChecksumsByFile.find(File);
262 if (FC == ChecksumsByFile.end()) {
263 formatInternal(Printer, Append, "- (no checksum) {0}", File);
264 return;
265 }
266
267 formatInternal(Printer, Append, "- ({0}: {1}) {2}",
268 formatChecksumKind(FC->getValue().Kind),
269 toHex(FC->getValue().Checksum), File);
270 }
271
formatFromChecksumsOffset(LinePrinter & Printer,uint32_t Offset,bool Append) const272 void SymbolGroup::formatFromChecksumsOffset(LinePrinter &Printer,
273 uint32_t Offset,
274 bool Append) const {
275 if (!SC.hasChecksums()) {
276 formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
277 return;
278 }
279
280 auto Iter = SC.checksums().getArray().at(Offset);
281 if (Iter == SC.checksums().getArray().end()) {
282 formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
283 return;
284 }
285
286 uint32_t FO = Iter->FileNameOffset;
287 auto ExpectedFile = getNameFromStringTable(FO);
288 if (!ExpectedFile) {
289 formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
290 consumeError(ExpectedFile.takeError());
291 return;
292 }
293 if (Iter->Kind == FileChecksumKind::None) {
294 formatInternal(Printer, Append, "{0} (no checksum)", *ExpectedFile);
295 } else {
296 formatInternal(Printer, Append, "{0} ({1}: {2})", *ExpectedFile,
297 formatChecksumKind(Iter->Kind), toHex(Iter->Checksum));
298 }
299 }
300
open(StringRef Path,bool AllowUnknownFile)301 Expected<InputFile> InputFile::open(StringRef Path, bool AllowUnknownFile) {
302 InputFile IF;
303 if (!llvm::sys::fs::exists(Path))
304 return make_error<StringError>(formatv("File {0} not found", Path),
305 inconvertibleErrorCode());
306
307 file_magic Magic;
308 if (auto EC = identify_magic(Path, Magic))
309 return make_error<StringError>(
310 formatv("Unable to identify file type for file {0}", Path), EC);
311
312 if (Magic == file_magic::coff_object) {
313 Expected<OwningBinary<Binary>> BinaryOrErr = createBinary(Path);
314 if (!BinaryOrErr)
315 return BinaryOrErr.takeError();
316
317 IF.CoffObject = std::move(*BinaryOrErr);
318 IF.PdbOrObj = llvm::cast<COFFObjectFile>(IF.CoffObject.getBinary());
319 return std::move(IF);
320 }
321
322 if (Magic == file_magic::pdb) {
323 std::unique_ptr<IPDBSession> Session;
324 if (auto Err = loadDataForPDB(PDB_ReaderType::Native, Path, Session))
325 return std::move(Err);
326
327 IF.PdbSession.reset(static_cast<NativeSession *>(Session.release()));
328 IF.PdbOrObj = &IF.PdbSession->getPDBFile();
329
330 return std::move(IF);
331 }
332
333 if (!AllowUnknownFile)
334 return make_error<StringError>(
335 formatv("File {0} is not a supported file type", Path),
336 inconvertibleErrorCode());
337
338 auto Result = MemoryBuffer::getFile(Path, /*IsText=*/false,
339 /*RequiresNullTerminator=*/false);
340 if (!Result)
341 return make_error<StringError>(
342 formatv("File {0} could not be opened", Path), Result.getError());
343
344 IF.UnknownFile = std::move(*Result);
345 IF.PdbOrObj = IF.UnknownFile.get();
346 return std::move(IF);
347 }
348
pdb()349 PDBFile &InputFile::pdb() {
350 assert(isPdb());
351 return *cast<PDBFile *>(PdbOrObj);
352 }
353
pdb() const354 const PDBFile &InputFile::pdb() const {
355 assert(isPdb());
356 return *cast<PDBFile *>(PdbOrObj);
357 }
358
obj()359 object::COFFObjectFile &InputFile::obj() {
360 assert(isObj());
361 return *cast<object::COFFObjectFile *>(PdbOrObj);
362 }
363
obj() const364 const object::COFFObjectFile &InputFile::obj() const {
365 assert(isObj());
366 return *cast<object::COFFObjectFile *>(PdbOrObj);
367 }
368
unknown()369 MemoryBuffer &InputFile::unknown() {
370 assert(isUnknown());
371 return *cast<MemoryBuffer *>(PdbOrObj);
372 }
373
unknown() const374 const MemoryBuffer &InputFile::unknown() const {
375 assert(isUnknown());
376 return *cast<MemoryBuffer *>(PdbOrObj);
377 }
378
getFilePath() const379 StringRef InputFile::getFilePath() const {
380 if (isPdb())
381 return pdb().getFilePath();
382 if (isObj())
383 return obj().getFileName();
384 assert(isUnknown());
385 return unknown().getBufferIdentifier();
386 }
387
hasTypes() const388 bool InputFile::hasTypes() const {
389 if (isPdb())
390 return pdb().hasPDBTpiStream();
391
392 for (const auto &Section : obj().sections()) {
393 CVTypeArray Types;
394 if (isDebugTSection(Section, Types))
395 return true;
396 }
397 return false;
398 }
399
hasIds() const400 bool InputFile::hasIds() const {
401 if (isObj())
402 return false;
403 return pdb().hasPDBIpiStream();
404 }
405
isPdb() const406 bool InputFile::isPdb() const { return isa<PDBFile *>(PdbOrObj); }
407
isObj() const408 bool InputFile::isObj() const {
409 return isa<object::COFFObjectFile *>(PdbOrObj);
410 }
411
isUnknown() const412 bool InputFile::isUnknown() const { return isa<MemoryBuffer *>(PdbOrObj); }
413
414 codeview::LazyRandomTypeCollection &
getOrCreateTypeCollection(TypeCollectionKind Kind)415 InputFile::getOrCreateTypeCollection(TypeCollectionKind Kind) {
416 if (Types && Kind == kTypes)
417 return *Types;
418 if (Ids && Kind == kIds)
419 return *Ids;
420
421 if (Kind == kIds) {
422 assert(isPdb() && pdb().hasPDBIpiStream());
423 }
424
425 // If the collection was already initialized, we should have just returned it
426 // in step 1.
427 if (isPdb()) {
428 TypeCollectionPtr &Collection = (Kind == kIds) ? Ids : Types;
429 auto &Stream = cantFail((Kind == kIds) ? pdb().getPDBIpiStream()
430 : pdb().getPDBTpiStream());
431
432 auto &Array = Stream.typeArray();
433 uint32_t Count = Stream.getNumTypeRecords();
434 auto Offsets = Stream.getTypeIndexOffsets();
435 Collection =
436 std::make_unique<LazyRandomTypeCollection>(Array, Count, Offsets);
437 return *Collection;
438 }
439
440 assert(isObj());
441 assert(Kind == kTypes);
442 assert(!Types);
443
444 for (const auto &Section : obj().sections()) {
445 CVTypeArray Records;
446 if (!isDebugTSection(Section, Records))
447 continue;
448
449 Types = std::make_unique<LazyRandomTypeCollection>(Records, 100);
450 return *Types;
451 }
452
453 Types = std::make_unique<LazyRandomTypeCollection>(100);
454 return *Types;
455 }
456
types()457 codeview::LazyRandomTypeCollection &InputFile::types() {
458 return getOrCreateTypeCollection(kTypes);
459 }
460
ids()461 codeview::LazyRandomTypeCollection &InputFile::ids() {
462 // Object files have only one type stream that contains both types and ids.
463 // Similarly, some PDBs don't contain an IPI stream, and for those both types
464 // and IDs are in the same stream.
465 if (isObj() || !pdb().hasPDBIpiStream())
466 return types();
467
468 return getOrCreateTypeCollection(kIds);
469 }
470
symbol_groups()471 iterator_range<SymbolGroupIterator> InputFile::symbol_groups() {
472 return make_range<SymbolGroupIterator>(symbol_groups_begin(),
473 symbol_groups_end());
474 }
475
symbol_groups_begin()476 SymbolGroupIterator InputFile::symbol_groups_begin() {
477 return SymbolGroupIterator(*this);
478 }
479
symbol_groups_end()480 SymbolGroupIterator InputFile::symbol_groups_end() {
481 return SymbolGroupIterator();
482 }
483
SymbolGroupIterator()484 SymbolGroupIterator::SymbolGroupIterator() : Value(nullptr) {}
485
SymbolGroupIterator(InputFile & File)486 SymbolGroupIterator::SymbolGroupIterator(InputFile &File) : Value(&File) {
487 if (File.isObj()) {
488 SectionIter = File.obj().section_begin();
489 scanToNextDebugS();
490 }
491 }
492
operator ==(const SymbolGroupIterator & R) const493 bool SymbolGroupIterator::operator==(const SymbolGroupIterator &R) const {
494 bool E = isEnd();
495 bool RE = R.isEnd();
496 if (E || RE)
497 return E == RE;
498
499 if (Value.File != R.Value.File)
500 return false;
501 return Index == R.Index;
502 }
503
operator *() const504 const SymbolGroup &SymbolGroupIterator::operator*() const {
505 assert(!isEnd());
506 return Value;
507 }
operator *()508 SymbolGroup &SymbolGroupIterator::operator*() {
509 assert(!isEnd());
510 return Value;
511 }
512
operator ++()513 SymbolGroupIterator &SymbolGroupIterator::operator++() {
514 assert(Value.File && !isEnd());
515 ++Index;
516 if (isEnd())
517 return *this;
518
519 if (Value.File->isPdb()) {
520 Value.updatePdbModi(Index);
521 return *this;
522 }
523
524 scanToNextDebugS();
525 return *this;
526 }
527
scanToNextDebugS()528 void SymbolGroupIterator::scanToNextDebugS() {
529 assert(SectionIter);
530 auto End = Value.File->obj().section_end();
531 auto &Iter = *SectionIter;
532 assert(!isEnd());
533
534 while (++Iter != End) {
535 DebugSubsectionArray SS;
536 SectionRef SR = *Iter;
537 if (!isDebugSSection(SR, SS))
538 continue;
539
540 Value.updateDebugS(SS);
541 return;
542 }
543 }
544
isEnd() const545 bool SymbolGroupIterator::isEnd() const {
546 if (!Value.File)
547 return true;
548 if (Value.File->isPdb()) {
549 DbiStream &Dbi = cantFail(Value.File->pdb().getPDBDbiStream());
550 uint32_t Count = Dbi.modules().getModuleCount();
551 assert(Index <= Count);
552 return Index == Count;
553 }
554
555 assert(SectionIter);
556 return *SectionIter == Value.File->obj().section_end();
557 }
558
isMyCode(const SymbolGroup & Group)559 static bool isMyCode(const SymbolGroup &Group) {
560 if (Group.getFile().isObj())
561 return true;
562
563 StringRef Name = Group.name();
564 if (Name.starts_with("Import:"))
565 return false;
566 if (Name.ends_with_insensitive(".dll"))
567 return false;
568 if (Name.equals_insensitive("* linker *"))
569 return false;
570 if (Name.starts_with_insensitive("f:\\binaries\\Intermediate\\vctools"))
571 return false;
572 if (Name.starts_with_insensitive("f:\\dd\\vctools\\crt"))
573 return false;
574 return true;
575 }
576
shouldDumpSymbolGroup(uint32_t Idx,const SymbolGroup & Group,const FilterOptions & Filters)577 bool llvm::pdb::shouldDumpSymbolGroup(uint32_t Idx, const SymbolGroup &Group,
578 const FilterOptions &Filters) {
579 if (Filters.JustMyCode && !isMyCode(Group))
580 return false;
581
582 // If the arg was not specified on the command line, always dump all modules.
583 if (!Filters.DumpModi)
584 return true;
585
586 // Otherwise, only dump if this is the same module specified.
587 return (Filters.DumpModi == Idx);
588 }
589