1 //===- InputFile.cpp ------------------------------------------ *- C++ --*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "llvm/DebugInfo/PDB/Native/InputFile.h"
10
11 #include "llvm/BinaryFormat/Magic.h"
12 #include "llvm/DebugInfo/CodeView/CodeView.h"
13 #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
14 #include "llvm/DebugInfo/CodeView/StringsAndChecksums.h"
15 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
16 #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
17 #include "llvm/DebugInfo/PDB/Native/FormatUtil.h"
18 #include "llvm/DebugInfo/PDB/Native/LinePrinter.h"
19 #include "llvm/DebugInfo/PDB/Native/NativeSession.h"
20 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
21 #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
22 #include "llvm/DebugInfo/PDB/Native/RawError.h"
23 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
24 #include "llvm/DebugInfo/PDB/PDB.h"
25 #include "llvm/Object/COFF.h"
26 #include "llvm/Support/FileSystem.h"
27 #include "llvm/Support/FormatVariadic.h"
28
29 using namespace llvm;
30 using namespace llvm::codeview;
31 using namespace llvm::object;
32 using namespace llvm::pdb;
33
34 InputFile::InputFile() = default;
35 InputFile::~InputFile() = default;
36
37 Expected<ModuleDebugStreamRef>
getModuleDebugStream(PDBFile & File,StringRef & ModuleName,uint32_t Index)38 llvm::pdb::getModuleDebugStream(PDBFile &File, StringRef &ModuleName,
39 uint32_t Index) {
40 Expected<DbiStream &> DbiOrErr = File.getPDBDbiStream();
41 if (!DbiOrErr)
42 return DbiOrErr.takeError();
43 DbiStream &Dbi = *DbiOrErr;
44 const auto &Modules = Dbi.modules();
45 if (Index >= Modules.getModuleCount())
46 return make_error<RawError>(raw_error_code::index_out_of_bounds,
47 "Invalid module index");
48
49 auto Modi = Modules.getModuleDescriptor(Index);
50
51 ModuleName = Modi.getModuleName();
52
53 uint16_t ModiStream = Modi.getModuleStreamIndex();
54 if (ModiStream == kInvalidStreamIndex)
55 return make_error<RawError>(raw_error_code::no_stream,
56 "Module stream not present");
57
58 auto ModStreamData = File.createIndexedStream(ModiStream);
59
60 ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData));
61 if (auto EC = ModS.reload())
62 return make_error<RawError>(raw_error_code::corrupt_file,
63 "Invalid module stream");
64
65 return std::move(ModS);
66 }
67
getModuleDebugStream(PDBFile & File,uint32_t Index)68 Expected<ModuleDebugStreamRef> llvm::pdb::getModuleDebugStream(PDBFile &File,
69 uint32_t Index) {
70 Expected<DbiStream &> DbiOrErr = File.getPDBDbiStream();
71 if (!DbiOrErr)
72 return DbiOrErr.takeError();
73 DbiStream &Dbi = *DbiOrErr;
74 const auto &Modules = Dbi.modules();
75 auto Modi = Modules.getModuleDescriptor(Index);
76
77 uint16_t ModiStream = Modi.getModuleStreamIndex();
78 if (ModiStream == kInvalidStreamIndex)
79 return make_error<RawError>(raw_error_code::no_stream,
80 "Module stream not present");
81
82 auto ModStreamData = File.createIndexedStream(ModiStream);
83
84 ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData));
85 if (Error Err = ModS.reload())
86 return make_error<RawError>(raw_error_code::corrupt_file,
87 "Invalid module stream");
88
89 return std::move(ModS);
90 }
91
isCodeViewDebugSubsection(object::SectionRef Section,StringRef Name,BinaryStreamReader & Reader)92 static inline bool isCodeViewDebugSubsection(object::SectionRef Section,
93 StringRef Name,
94 BinaryStreamReader &Reader) {
95 if (Expected<StringRef> NameOrErr = Section.getName()) {
96 if (*NameOrErr != Name)
97 return false;
98 } else {
99 consumeError(NameOrErr.takeError());
100 return false;
101 }
102
103 Expected<StringRef> ContentsOrErr = Section.getContents();
104 if (!ContentsOrErr) {
105 consumeError(ContentsOrErr.takeError());
106 return false;
107 }
108
109 Reader = BinaryStreamReader(*ContentsOrErr, support::little);
110 uint32_t Magic;
111 if (Reader.bytesRemaining() < sizeof(uint32_t))
112 return false;
113 cantFail(Reader.readInteger(Magic));
114 if (Magic != COFF::DEBUG_SECTION_MAGIC)
115 return false;
116 return true;
117 }
118
isDebugSSection(object::SectionRef Section,DebugSubsectionArray & Subsections)119 static inline bool isDebugSSection(object::SectionRef Section,
120 DebugSubsectionArray &Subsections) {
121 BinaryStreamReader Reader;
122 if (!isCodeViewDebugSubsection(Section, ".debug$S", Reader))
123 return false;
124
125 cantFail(Reader.readArray(Subsections, Reader.bytesRemaining()));
126 return true;
127 }
128
isDebugTSection(SectionRef Section,CVTypeArray & Types)129 static bool isDebugTSection(SectionRef Section, CVTypeArray &Types) {
130 BinaryStreamReader Reader;
131 if (!isCodeViewDebugSubsection(Section, ".debug$T", Reader) &&
132 !isCodeViewDebugSubsection(Section, ".debug$P", Reader))
133 return false;
134 cantFail(Reader.readArray(Types, Reader.bytesRemaining()));
135 return true;
136 }
137
formatChecksumKind(FileChecksumKind Kind)138 static std::string formatChecksumKind(FileChecksumKind Kind) {
139 switch (Kind) {
140 RETURN_CASE(FileChecksumKind, None, "None");
141 RETURN_CASE(FileChecksumKind, MD5, "MD5");
142 RETURN_CASE(FileChecksumKind, SHA1, "SHA-1");
143 RETURN_CASE(FileChecksumKind, SHA256, "SHA-256");
144 }
145 return formatUnknownEnum(Kind);
146 }
147
148 template <typename... Args>
formatInternal(LinePrinter & Printer,bool Append,Args &&...args)149 static void formatInternal(LinePrinter &Printer, bool Append, Args &&...args) {
150 if (Append)
151 Printer.format(std::forward<Args>(args)...);
152 else
153 Printer.formatLine(std::forward<Args>(args)...);
154 }
155
SymbolGroup(InputFile * File,uint32_t GroupIndex)156 SymbolGroup::SymbolGroup(InputFile *File, uint32_t GroupIndex) : File(File) {
157 if (!File)
158 return;
159
160 if (File->isPdb())
161 initializeForPdb(GroupIndex);
162 else {
163 Name = ".debug$S";
164 uint32_t I = 0;
165 for (const auto &S : File->obj().sections()) {
166 DebugSubsectionArray SS;
167 if (!isDebugSSection(S, SS))
168 continue;
169
170 if (!SC.hasChecksums() || !SC.hasStrings())
171 SC.initialize(SS);
172
173 if (I == GroupIndex)
174 Subsections = SS;
175
176 if (SC.hasChecksums() && SC.hasStrings())
177 break;
178 }
179 rebuildChecksumMap();
180 }
181 }
182
name() const183 StringRef SymbolGroup::name() const { return Name; }
184
updateDebugS(const codeview::DebugSubsectionArray & SS)185 void SymbolGroup::updateDebugS(const codeview::DebugSubsectionArray &SS) {
186 Subsections = SS;
187 }
188
updatePdbModi(uint32_t Modi)189 void SymbolGroup::updatePdbModi(uint32_t Modi) { initializeForPdb(Modi); }
190
initializeForPdb(uint32_t Modi)191 void SymbolGroup::initializeForPdb(uint32_t Modi) {
192 assert(File && File->isPdb());
193
194 // PDB always uses the same string table, but each module has its own
195 // checksums. So we only set the strings if they're not already set.
196 if (!SC.hasStrings()) {
197 auto StringTable = File->pdb().getStringTable();
198 if (StringTable)
199 SC.setStrings(StringTable->getStringTable());
200 else
201 consumeError(StringTable.takeError());
202 }
203
204 SC.resetChecksums();
205 auto MDS = getModuleDebugStream(File->pdb(), Name, Modi);
206 if (!MDS) {
207 consumeError(MDS.takeError());
208 return;
209 }
210
211 DebugStream = std::make_shared<ModuleDebugStreamRef>(std::move(*MDS));
212 Subsections = DebugStream->getSubsectionsArray();
213 SC.initialize(Subsections);
214 rebuildChecksumMap();
215 }
216
rebuildChecksumMap()217 void SymbolGroup::rebuildChecksumMap() {
218 if (!SC.hasChecksums())
219 return;
220
221 for (const auto &Entry : SC.checksums()) {
222 auto S = SC.strings().getString(Entry.FileNameOffset);
223 if (!S)
224 continue;
225 ChecksumsByFile[*S] = Entry;
226 }
227 }
228
getPdbModuleStream() const229 const ModuleDebugStreamRef &SymbolGroup::getPdbModuleStream() const {
230 assert(File && File->isPdb() && DebugStream);
231 return *DebugStream;
232 }
233
getNameFromStringTable(uint32_t Offset) const234 Expected<StringRef> SymbolGroup::getNameFromStringTable(uint32_t Offset) const {
235 return SC.strings().getString(Offset);
236 }
237
getNameFromChecksums(uint32_t Offset) const238 Expected<StringRef> SymbolGroup::getNameFromChecksums(uint32_t Offset) const {
239 StringRef Name;
240 if (!SC.hasChecksums()) {
241 return std::move(Name);
242 }
243
244 auto Iter = SC.checksums().getArray().at(Offset);
245 if (Iter == SC.checksums().getArray().end()) {
246 return std::move(Name);
247 }
248
249 uint32_t FO = Iter->FileNameOffset;
250 auto ExpectedFile = getNameFromStringTable(FO);
251 if (!ExpectedFile) {
252 return std::move(Name);
253 }
254
255 return *ExpectedFile;
256 }
257
formatFromFileName(LinePrinter & Printer,StringRef File,bool Append) const258 void SymbolGroup::formatFromFileName(LinePrinter &Printer, StringRef File,
259 bool Append) const {
260 auto FC = ChecksumsByFile.find(File);
261 if (FC == ChecksumsByFile.end()) {
262 formatInternal(Printer, Append, "- (no checksum) {0}", File);
263 return;
264 }
265
266 formatInternal(Printer, Append, "- ({0}: {1}) {2}",
267 formatChecksumKind(FC->getValue().Kind),
268 toHex(FC->getValue().Checksum), File);
269 }
270
formatFromChecksumsOffset(LinePrinter & Printer,uint32_t Offset,bool Append) const271 void SymbolGroup::formatFromChecksumsOffset(LinePrinter &Printer,
272 uint32_t Offset,
273 bool Append) const {
274 if (!SC.hasChecksums()) {
275 formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
276 return;
277 }
278
279 auto Iter = SC.checksums().getArray().at(Offset);
280 if (Iter == SC.checksums().getArray().end()) {
281 formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
282 return;
283 }
284
285 uint32_t FO = Iter->FileNameOffset;
286 auto ExpectedFile = getNameFromStringTable(FO);
287 if (!ExpectedFile) {
288 formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
289 consumeError(ExpectedFile.takeError());
290 return;
291 }
292 if (Iter->Kind == FileChecksumKind::None) {
293 formatInternal(Printer, Append, "{0} (no checksum)", *ExpectedFile);
294 } else {
295 formatInternal(Printer, Append, "{0} ({1}: {2})", *ExpectedFile,
296 formatChecksumKind(Iter->Kind), toHex(Iter->Checksum));
297 }
298 }
299
open(StringRef Path,bool AllowUnknownFile)300 Expected<InputFile> InputFile::open(StringRef Path, bool AllowUnknownFile) {
301 InputFile IF;
302 if (!llvm::sys::fs::exists(Path))
303 return make_error<StringError>(formatv("File {0} not found", Path),
304 inconvertibleErrorCode());
305
306 file_magic Magic;
307 if (auto EC = identify_magic(Path, Magic))
308 return make_error<StringError>(
309 formatv("Unable to identify file type for file {0}", Path), EC);
310
311 if (Magic == file_magic::coff_object) {
312 Expected<OwningBinary<Binary>> BinaryOrErr = createBinary(Path);
313 if (!BinaryOrErr)
314 return BinaryOrErr.takeError();
315
316 IF.CoffObject = std::move(*BinaryOrErr);
317 IF.PdbOrObj = llvm::cast<COFFObjectFile>(IF.CoffObject.getBinary());
318 return std::move(IF);
319 }
320
321 if (Magic == file_magic::pdb) {
322 std::unique_ptr<IPDBSession> Session;
323 if (auto Err = loadDataForPDB(PDB_ReaderType::Native, Path, Session))
324 return std::move(Err);
325
326 IF.PdbSession.reset(static_cast<NativeSession *>(Session.release()));
327 IF.PdbOrObj = &IF.PdbSession->getPDBFile();
328
329 return std::move(IF);
330 }
331
332 if (!AllowUnknownFile)
333 return make_error<StringError>(
334 formatv("File {0} is not a supported file type", Path),
335 inconvertibleErrorCode());
336
337 auto Result = MemoryBuffer::getFile(Path, /*IsText=*/false,
338 /*RequiresNullTerminator=*/false);
339 if (!Result)
340 return make_error<StringError>(
341 formatv("File {0} could not be opened", Path), Result.getError());
342
343 IF.UnknownFile = std::move(*Result);
344 IF.PdbOrObj = IF.UnknownFile.get();
345 return std::move(IF);
346 }
347
pdb()348 PDBFile &InputFile::pdb() {
349 assert(isPdb());
350 return *PdbOrObj.get<PDBFile *>();
351 }
352
pdb() const353 const PDBFile &InputFile::pdb() const {
354 assert(isPdb());
355 return *PdbOrObj.get<PDBFile *>();
356 }
357
obj()358 object::COFFObjectFile &InputFile::obj() {
359 assert(isObj());
360 return *PdbOrObj.get<object::COFFObjectFile *>();
361 }
362
obj() const363 const object::COFFObjectFile &InputFile::obj() const {
364 assert(isObj());
365 return *PdbOrObj.get<object::COFFObjectFile *>();
366 }
367
unknown()368 MemoryBuffer &InputFile::unknown() {
369 assert(isUnknown());
370 return *PdbOrObj.get<MemoryBuffer *>();
371 }
372
unknown() const373 const MemoryBuffer &InputFile::unknown() const {
374 assert(isUnknown());
375 return *PdbOrObj.get<MemoryBuffer *>();
376 }
377
getFilePath() const378 StringRef InputFile::getFilePath() const {
379 if (isPdb())
380 return pdb().getFilePath();
381 if (isObj())
382 return obj().getFileName();
383 assert(isUnknown());
384 return unknown().getBufferIdentifier();
385 }
386
hasTypes() const387 bool InputFile::hasTypes() const {
388 if (isPdb())
389 return pdb().hasPDBTpiStream();
390
391 for (const auto &Section : obj().sections()) {
392 CVTypeArray Types;
393 if (isDebugTSection(Section, Types))
394 return true;
395 }
396 return false;
397 }
398
hasIds() const399 bool InputFile::hasIds() const {
400 if (isObj())
401 return false;
402 return pdb().hasPDBIpiStream();
403 }
404
isPdb() const405 bool InputFile::isPdb() const { return PdbOrObj.is<PDBFile *>(); }
406
isObj() const407 bool InputFile::isObj() const {
408 return PdbOrObj.is<object::COFFObjectFile *>();
409 }
410
isUnknown() const411 bool InputFile::isUnknown() const { return PdbOrObj.is<MemoryBuffer *>(); }
412
413 codeview::LazyRandomTypeCollection &
getOrCreateTypeCollection(TypeCollectionKind Kind)414 InputFile::getOrCreateTypeCollection(TypeCollectionKind Kind) {
415 if (Types && Kind == kTypes)
416 return *Types;
417 if (Ids && Kind == kIds)
418 return *Ids;
419
420 if (Kind == kIds) {
421 assert(isPdb() && pdb().hasPDBIpiStream());
422 }
423
424 // If the collection was already initialized, we should have just returned it
425 // in step 1.
426 if (isPdb()) {
427 TypeCollectionPtr &Collection = (Kind == kIds) ? Ids : Types;
428 auto &Stream = cantFail((Kind == kIds) ? pdb().getPDBIpiStream()
429 : pdb().getPDBTpiStream());
430
431 auto &Array = Stream.typeArray();
432 uint32_t Count = Stream.getNumTypeRecords();
433 auto Offsets = Stream.getTypeIndexOffsets();
434 Collection =
435 std::make_unique<LazyRandomTypeCollection>(Array, Count, Offsets);
436 return *Collection;
437 }
438
439 assert(isObj());
440 assert(Kind == kTypes);
441 assert(!Types);
442
443 for (const auto &Section : obj().sections()) {
444 CVTypeArray Records;
445 if (!isDebugTSection(Section, Records))
446 continue;
447
448 Types = std::make_unique<LazyRandomTypeCollection>(Records, 100);
449 return *Types;
450 }
451
452 Types = std::make_unique<LazyRandomTypeCollection>(100);
453 return *Types;
454 }
455
types()456 codeview::LazyRandomTypeCollection &InputFile::types() {
457 return getOrCreateTypeCollection(kTypes);
458 }
459
ids()460 codeview::LazyRandomTypeCollection &InputFile::ids() {
461 // Object files have only one type stream that contains both types and ids.
462 // Similarly, some PDBs don't contain an IPI stream, and for those both types
463 // and IDs are in the same stream.
464 if (isObj() || !pdb().hasPDBIpiStream())
465 return types();
466
467 return getOrCreateTypeCollection(kIds);
468 }
469
symbol_groups()470 iterator_range<SymbolGroupIterator> InputFile::symbol_groups() {
471 return make_range<SymbolGroupIterator>(symbol_groups_begin(),
472 symbol_groups_end());
473 }
474
symbol_groups_begin()475 SymbolGroupIterator InputFile::symbol_groups_begin() {
476 return SymbolGroupIterator(*this);
477 }
478
symbol_groups_end()479 SymbolGroupIterator InputFile::symbol_groups_end() {
480 return SymbolGroupIterator();
481 }
482
SymbolGroupIterator()483 SymbolGroupIterator::SymbolGroupIterator() : Value(nullptr) {}
484
SymbolGroupIterator(InputFile & File)485 SymbolGroupIterator::SymbolGroupIterator(InputFile &File) : Value(&File) {
486 if (File.isObj()) {
487 SectionIter = File.obj().section_begin();
488 scanToNextDebugS();
489 }
490 }
491
operator ==(const SymbolGroupIterator & R) const492 bool SymbolGroupIterator::operator==(const SymbolGroupIterator &R) const {
493 bool E = isEnd();
494 bool RE = R.isEnd();
495 if (E || RE)
496 return E == RE;
497
498 if (Value.File != R.Value.File)
499 return false;
500 return Index == R.Index;
501 }
502
operator *() const503 const SymbolGroup &SymbolGroupIterator::operator*() const {
504 assert(!isEnd());
505 return Value;
506 }
operator *()507 SymbolGroup &SymbolGroupIterator::operator*() {
508 assert(!isEnd());
509 return Value;
510 }
511
operator ++()512 SymbolGroupIterator &SymbolGroupIterator::operator++() {
513 assert(Value.File && !isEnd());
514 ++Index;
515 if (isEnd())
516 return *this;
517
518 if (Value.File->isPdb()) {
519 Value.updatePdbModi(Index);
520 return *this;
521 }
522
523 scanToNextDebugS();
524 return *this;
525 }
526
scanToNextDebugS()527 void SymbolGroupIterator::scanToNextDebugS() {
528 assert(SectionIter);
529 auto End = Value.File->obj().section_end();
530 auto &Iter = *SectionIter;
531 assert(!isEnd());
532
533 while (++Iter != End) {
534 DebugSubsectionArray SS;
535 SectionRef SR = *Iter;
536 if (!isDebugSSection(SR, SS))
537 continue;
538
539 Value.updateDebugS(SS);
540 return;
541 }
542 }
543
isEnd() const544 bool SymbolGroupIterator::isEnd() const {
545 if (!Value.File)
546 return true;
547 if (Value.File->isPdb()) {
548 DbiStream &Dbi = cantFail(Value.File->pdb().getPDBDbiStream());
549 uint32_t Count = Dbi.modules().getModuleCount();
550 assert(Index <= Count);
551 return Index == Count;
552 }
553
554 assert(SectionIter);
555 return *SectionIter == Value.File->obj().section_end();
556 }
557
isMyCode(const SymbolGroup & Group)558 static bool isMyCode(const SymbolGroup &Group) {
559 if (Group.getFile().isObj())
560 return true;
561
562 StringRef Name = Group.name();
563 if (Name.startswith("Import:"))
564 return false;
565 if (Name.endswith_insensitive(".dll"))
566 return false;
567 if (Name.equals_insensitive("* linker *"))
568 return false;
569 if (Name.startswith_insensitive("f:\\binaries\\Intermediate\\vctools"))
570 return false;
571 if (Name.startswith_insensitive("f:\\dd\\vctools\\crt"))
572 return false;
573 return true;
574 }
575
shouldDumpSymbolGroup(uint32_t Idx,const SymbolGroup & Group,const FilterOptions & Filters)576 bool llvm::pdb::shouldDumpSymbolGroup(uint32_t Idx, const SymbolGroup &Group,
577 const FilterOptions &Filters) {
578 if (Filters.JustMyCode && !isMyCode(Group))
579 return false;
580
581 // If the arg was not specified on the command line, always dump all modules.
582 if (!Filters.DumpModi)
583 return true;
584
585 // Otherwise, only dump if this is the same module specified.
586 return (Filters.DumpModi == Idx);
587 }
588