1 //===- InputFile.cpp ------------------------------------------ *- C++ --*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/DebugInfo/PDB/Native/InputFile.h"
10 
11 #include "llvm/BinaryFormat/Magic.h"
12 #include "llvm/DebugInfo/CodeView/CodeView.h"
13 #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
14 #include "llvm/DebugInfo/CodeView/StringsAndChecksums.h"
15 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
16 #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
17 #include "llvm/DebugInfo/PDB/Native/FormatUtil.h"
18 #include "llvm/DebugInfo/PDB/Native/LinePrinter.h"
19 #include "llvm/DebugInfo/PDB/Native/NativeSession.h"
20 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
21 #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
22 #include "llvm/DebugInfo/PDB/Native/RawError.h"
23 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
24 #include "llvm/DebugInfo/PDB/PDB.h"
25 #include "llvm/Object/COFF.h"
26 #include "llvm/Support/FileSystem.h"
27 #include "llvm/Support/FormatVariadic.h"
28 
29 using namespace llvm;
30 using namespace llvm::codeview;
31 using namespace llvm::object;
32 using namespace llvm::pdb;
33 
34 InputFile::InputFile() = default;
35 InputFile::~InputFile() = default;
36 
37 Expected<ModuleDebugStreamRef>
getModuleDebugStream(PDBFile & File,StringRef & ModuleName,uint32_t Index)38 llvm::pdb::getModuleDebugStream(PDBFile &File, StringRef &ModuleName,
39                                 uint32_t Index) {
40   Expected<DbiStream &> DbiOrErr = File.getPDBDbiStream();
41   if (!DbiOrErr)
42     return DbiOrErr.takeError();
43   DbiStream &Dbi = *DbiOrErr;
44   const auto &Modules = Dbi.modules();
45   if (Index >= Modules.getModuleCount())
46     return make_error<RawError>(raw_error_code::index_out_of_bounds,
47                                 "Invalid module index");
48 
49   auto Modi = Modules.getModuleDescriptor(Index);
50 
51   ModuleName = Modi.getModuleName();
52 
53   uint16_t ModiStream = Modi.getModuleStreamIndex();
54   if (ModiStream == kInvalidStreamIndex)
55     return make_error<RawError>(raw_error_code::no_stream,
56                                 "Module stream not present");
57 
58   auto ModStreamData = File.createIndexedStream(ModiStream);
59 
60   ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData));
61   if (auto EC = ModS.reload())
62     return make_error<RawError>(raw_error_code::corrupt_file,
63                                 "Invalid module stream");
64 
65   return std::move(ModS);
66 }
67 
getModuleDebugStream(PDBFile & File,uint32_t Index)68 Expected<ModuleDebugStreamRef> llvm::pdb::getModuleDebugStream(PDBFile &File,
69                                                                uint32_t Index) {
70   Expected<DbiStream &> DbiOrErr = File.getPDBDbiStream();
71   if (!DbiOrErr)
72     return DbiOrErr.takeError();
73   DbiStream &Dbi = *DbiOrErr;
74   const auto &Modules = Dbi.modules();
75   auto Modi = Modules.getModuleDescriptor(Index);
76 
77   uint16_t ModiStream = Modi.getModuleStreamIndex();
78   if (ModiStream == kInvalidStreamIndex)
79     return make_error<RawError>(raw_error_code::no_stream,
80                                 "Module stream not present");
81 
82   auto ModStreamData = File.createIndexedStream(ModiStream);
83 
84   ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData));
85   if (Error Err = ModS.reload())
86     return make_error<RawError>(raw_error_code::corrupt_file,
87                                 "Invalid module stream");
88 
89   return std::move(ModS);
90 }
91 
isCodeViewDebugSubsection(object::SectionRef Section,StringRef Name,BinaryStreamReader & Reader)92 static inline bool isCodeViewDebugSubsection(object::SectionRef Section,
93                                              StringRef Name,
94                                              BinaryStreamReader &Reader) {
95   if (Expected<StringRef> NameOrErr = Section.getName()) {
96     if (*NameOrErr != Name)
97       return false;
98   } else {
99     consumeError(NameOrErr.takeError());
100     return false;
101   }
102 
103   Expected<StringRef> ContentsOrErr = Section.getContents();
104   if (!ContentsOrErr) {
105     consumeError(ContentsOrErr.takeError());
106     return false;
107   }
108 
109   Reader = BinaryStreamReader(*ContentsOrErr, support::little);
110   uint32_t Magic;
111   if (Reader.bytesRemaining() < sizeof(uint32_t))
112     return false;
113   cantFail(Reader.readInteger(Magic));
114   if (Magic != COFF::DEBUG_SECTION_MAGIC)
115     return false;
116   return true;
117 }
118 
isDebugSSection(object::SectionRef Section,DebugSubsectionArray & Subsections)119 static inline bool isDebugSSection(object::SectionRef Section,
120                                    DebugSubsectionArray &Subsections) {
121   BinaryStreamReader Reader;
122   if (!isCodeViewDebugSubsection(Section, ".debug$S", Reader))
123     return false;
124 
125   cantFail(Reader.readArray(Subsections, Reader.bytesRemaining()));
126   return true;
127 }
128 
isDebugTSection(SectionRef Section,CVTypeArray & Types)129 static bool isDebugTSection(SectionRef Section, CVTypeArray &Types) {
130   BinaryStreamReader Reader;
131   if (!isCodeViewDebugSubsection(Section, ".debug$T", Reader) &&
132       !isCodeViewDebugSubsection(Section, ".debug$P", Reader))
133     return false;
134   cantFail(Reader.readArray(Types, Reader.bytesRemaining()));
135   return true;
136 }
137 
formatChecksumKind(FileChecksumKind Kind)138 static std::string formatChecksumKind(FileChecksumKind Kind) {
139   switch (Kind) {
140     RETURN_CASE(FileChecksumKind, None, "None");
141     RETURN_CASE(FileChecksumKind, MD5, "MD5");
142     RETURN_CASE(FileChecksumKind, SHA1, "SHA-1");
143     RETURN_CASE(FileChecksumKind, SHA256, "SHA-256");
144   }
145   return formatUnknownEnum(Kind);
146 }
147 
148 template <typename... Args>
formatInternal(LinePrinter & Printer,bool Append,Args &&...args)149 static void formatInternal(LinePrinter &Printer, bool Append, Args &&...args) {
150   if (Append)
151     Printer.format(std::forward<Args>(args)...);
152   else
153     Printer.formatLine(std::forward<Args>(args)...);
154 }
155 
SymbolGroup(InputFile * File,uint32_t GroupIndex)156 SymbolGroup::SymbolGroup(InputFile *File, uint32_t GroupIndex) : File(File) {
157   if (!File)
158     return;
159 
160   if (File->isPdb())
161     initializeForPdb(GroupIndex);
162   else {
163     Name = ".debug$S";
164     uint32_t I = 0;
165     for (const auto &S : File->obj().sections()) {
166       DebugSubsectionArray SS;
167       if (!isDebugSSection(S, SS))
168         continue;
169 
170       if (!SC.hasChecksums() || !SC.hasStrings())
171         SC.initialize(SS);
172 
173       if (I == GroupIndex)
174         Subsections = SS;
175 
176       if (SC.hasChecksums() && SC.hasStrings())
177         break;
178     }
179     rebuildChecksumMap();
180   }
181 }
182 
name() const183 StringRef SymbolGroup::name() const { return Name; }
184 
updateDebugS(const codeview::DebugSubsectionArray & SS)185 void SymbolGroup::updateDebugS(const codeview::DebugSubsectionArray &SS) {
186   Subsections = SS;
187 }
188 
updatePdbModi(uint32_t Modi)189 void SymbolGroup::updatePdbModi(uint32_t Modi) { initializeForPdb(Modi); }
190 
initializeForPdb(uint32_t Modi)191 void SymbolGroup::initializeForPdb(uint32_t Modi) {
192   assert(File && File->isPdb());
193 
194   // PDB always uses the same string table, but each module has its own
195   // checksums.  So we only set the strings if they're not already set.
196   if (!SC.hasStrings()) {
197     auto StringTable = File->pdb().getStringTable();
198     if (StringTable)
199       SC.setStrings(StringTable->getStringTable());
200     else
201       consumeError(StringTable.takeError());
202   }
203 
204   SC.resetChecksums();
205   auto MDS = getModuleDebugStream(File->pdb(), Name, Modi);
206   if (!MDS) {
207     consumeError(MDS.takeError());
208     return;
209   }
210 
211   DebugStream = std::make_shared<ModuleDebugStreamRef>(std::move(*MDS));
212   Subsections = DebugStream->getSubsectionsArray();
213   SC.initialize(Subsections);
214   rebuildChecksumMap();
215 }
216 
rebuildChecksumMap()217 void SymbolGroup::rebuildChecksumMap() {
218   if (!SC.hasChecksums())
219     return;
220 
221   for (const auto &Entry : SC.checksums()) {
222     auto S = SC.strings().getString(Entry.FileNameOffset);
223     if (!S)
224       continue;
225     ChecksumsByFile[*S] = Entry;
226   }
227 }
228 
getPdbModuleStream() const229 const ModuleDebugStreamRef &SymbolGroup::getPdbModuleStream() const {
230   assert(File && File->isPdb() && DebugStream);
231   return *DebugStream;
232 }
233 
getNameFromStringTable(uint32_t Offset) const234 Expected<StringRef> SymbolGroup::getNameFromStringTable(uint32_t Offset) const {
235   return SC.strings().getString(Offset);
236 }
237 
getNameFromChecksums(uint32_t Offset) const238 Expected<StringRef> SymbolGroup::getNameFromChecksums(uint32_t Offset) const {
239   StringRef Name;
240   if (!SC.hasChecksums()) {
241     return std::move(Name);
242   }
243 
244   auto Iter = SC.checksums().getArray().at(Offset);
245   if (Iter == SC.checksums().getArray().end()) {
246     return std::move(Name);
247   }
248 
249   uint32_t FO = Iter->FileNameOffset;
250   auto ExpectedFile = getNameFromStringTable(FO);
251   if (!ExpectedFile) {
252     return std::move(Name);
253   }
254 
255   return *ExpectedFile;
256 }
257 
formatFromFileName(LinePrinter & Printer,StringRef File,bool Append) const258 void SymbolGroup::formatFromFileName(LinePrinter &Printer, StringRef File,
259                                      bool Append) const {
260   auto FC = ChecksumsByFile.find(File);
261   if (FC == ChecksumsByFile.end()) {
262     formatInternal(Printer, Append, "- (no checksum) {0}", File);
263     return;
264   }
265 
266   formatInternal(Printer, Append, "- ({0}: {1}) {2}",
267                  formatChecksumKind(FC->getValue().Kind),
268                  toHex(FC->getValue().Checksum), File);
269 }
270 
formatFromChecksumsOffset(LinePrinter & Printer,uint32_t Offset,bool Append) const271 void SymbolGroup::formatFromChecksumsOffset(LinePrinter &Printer,
272                                             uint32_t Offset,
273                                             bool Append) const {
274   if (!SC.hasChecksums()) {
275     formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
276     return;
277   }
278 
279   auto Iter = SC.checksums().getArray().at(Offset);
280   if (Iter == SC.checksums().getArray().end()) {
281     formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
282     return;
283   }
284 
285   uint32_t FO = Iter->FileNameOffset;
286   auto ExpectedFile = getNameFromStringTable(FO);
287   if (!ExpectedFile) {
288     formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
289     consumeError(ExpectedFile.takeError());
290     return;
291   }
292   if (Iter->Kind == FileChecksumKind::None) {
293     formatInternal(Printer, Append, "{0} (no checksum)", *ExpectedFile);
294   } else {
295     formatInternal(Printer, Append, "{0} ({1}: {2})", *ExpectedFile,
296                    formatChecksumKind(Iter->Kind), toHex(Iter->Checksum));
297   }
298 }
299 
open(StringRef Path,bool AllowUnknownFile)300 Expected<InputFile> InputFile::open(StringRef Path, bool AllowUnknownFile) {
301   InputFile IF;
302   if (!llvm::sys::fs::exists(Path))
303     return make_error<StringError>(formatv("File {0} not found", Path),
304                                    inconvertibleErrorCode());
305 
306   file_magic Magic;
307   if (auto EC = identify_magic(Path, Magic))
308     return make_error<StringError>(
309         formatv("Unable to identify file type for file {0}", Path), EC);
310 
311   if (Magic == file_magic::coff_object) {
312     Expected<OwningBinary<Binary>> BinaryOrErr = createBinary(Path);
313     if (!BinaryOrErr)
314       return BinaryOrErr.takeError();
315 
316     IF.CoffObject = std::move(*BinaryOrErr);
317     IF.PdbOrObj = llvm::cast<COFFObjectFile>(IF.CoffObject.getBinary());
318     return std::move(IF);
319   }
320 
321   if (Magic == file_magic::pdb) {
322     std::unique_ptr<IPDBSession> Session;
323     if (auto Err = loadDataForPDB(PDB_ReaderType::Native, Path, Session))
324       return std::move(Err);
325 
326     IF.PdbSession.reset(static_cast<NativeSession *>(Session.release()));
327     IF.PdbOrObj = &IF.PdbSession->getPDBFile();
328 
329     return std::move(IF);
330   }
331 
332   if (!AllowUnknownFile)
333     return make_error<StringError>(
334         formatv("File {0} is not a supported file type", Path),
335         inconvertibleErrorCode());
336 
337   auto Result = MemoryBuffer::getFile(Path, /*IsText=*/false,
338                                       /*RequiresNullTerminator=*/false);
339   if (!Result)
340     return make_error<StringError>(
341         formatv("File {0} could not be opened", Path), Result.getError());
342 
343   IF.UnknownFile = std::move(*Result);
344   IF.PdbOrObj = IF.UnknownFile.get();
345   return std::move(IF);
346 }
347 
pdb()348 PDBFile &InputFile::pdb() {
349   assert(isPdb());
350   return *PdbOrObj.get<PDBFile *>();
351 }
352 
pdb() const353 const PDBFile &InputFile::pdb() const {
354   assert(isPdb());
355   return *PdbOrObj.get<PDBFile *>();
356 }
357 
obj()358 object::COFFObjectFile &InputFile::obj() {
359   assert(isObj());
360   return *PdbOrObj.get<object::COFFObjectFile *>();
361 }
362 
obj() const363 const object::COFFObjectFile &InputFile::obj() const {
364   assert(isObj());
365   return *PdbOrObj.get<object::COFFObjectFile *>();
366 }
367 
unknown()368 MemoryBuffer &InputFile::unknown() {
369   assert(isUnknown());
370   return *PdbOrObj.get<MemoryBuffer *>();
371 }
372 
unknown() const373 const MemoryBuffer &InputFile::unknown() const {
374   assert(isUnknown());
375   return *PdbOrObj.get<MemoryBuffer *>();
376 }
377 
getFilePath() const378 StringRef InputFile::getFilePath() const {
379   if (isPdb())
380     return pdb().getFilePath();
381   if (isObj())
382     return obj().getFileName();
383   assert(isUnknown());
384   return unknown().getBufferIdentifier();
385 }
386 
hasTypes() const387 bool InputFile::hasTypes() const {
388   if (isPdb())
389     return pdb().hasPDBTpiStream();
390 
391   for (const auto &Section : obj().sections()) {
392     CVTypeArray Types;
393     if (isDebugTSection(Section, Types))
394       return true;
395   }
396   return false;
397 }
398 
hasIds() const399 bool InputFile::hasIds() const {
400   if (isObj())
401     return false;
402   return pdb().hasPDBIpiStream();
403 }
404 
isPdb() const405 bool InputFile::isPdb() const { return PdbOrObj.is<PDBFile *>(); }
406 
isObj() const407 bool InputFile::isObj() const {
408   return PdbOrObj.is<object::COFFObjectFile *>();
409 }
410 
isUnknown() const411 bool InputFile::isUnknown() const { return PdbOrObj.is<MemoryBuffer *>(); }
412 
413 codeview::LazyRandomTypeCollection &
getOrCreateTypeCollection(TypeCollectionKind Kind)414 InputFile::getOrCreateTypeCollection(TypeCollectionKind Kind) {
415   if (Types && Kind == kTypes)
416     return *Types;
417   if (Ids && Kind == kIds)
418     return *Ids;
419 
420   if (Kind == kIds) {
421     assert(isPdb() && pdb().hasPDBIpiStream());
422   }
423 
424   // If the collection was already initialized, we should have just returned it
425   // in step 1.
426   if (isPdb()) {
427     TypeCollectionPtr &Collection = (Kind == kIds) ? Ids : Types;
428     auto &Stream = cantFail((Kind == kIds) ? pdb().getPDBIpiStream()
429                                            : pdb().getPDBTpiStream());
430 
431     auto &Array = Stream.typeArray();
432     uint32_t Count = Stream.getNumTypeRecords();
433     auto Offsets = Stream.getTypeIndexOffsets();
434     Collection =
435         std::make_unique<LazyRandomTypeCollection>(Array, Count, Offsets);
436     return *Collection;
437   }
438 
439   assert(isObj());
440   assert(Kind == kTypes);
441   assert(!Types);
442 
443   for (const auto &Section : obj().sections()) {
444     CVTypeArray Records;
445     if (!isDebugTSection(Section, Records))
446       continue;
447 
448     Types = std::make_unique<LazyRandomTypeCollection>(Records, 100);
449     return *Types;
450   }
451 
452   Types = std::make_unique<LazyRandomTypeCollection>(100);
453   return *Types;
454 }
455 
types()456 codeview::LazyRandomTypeCollection &InputFile::types() {
457   return getOrCreateTypeCollection(kTypes);
458 }
459 
ids()460 codeview::LazyRandomTypeCollection &InputFile::ids() {
461   // Object files have only one type stream that contains both types and ids.
462   // Similarly, some PDBs don't contain an IPI stream, and for those both types
463   // and IDs are in the same stream.
464   if (isObj() || !pdb().hasPDBIpiStream())
465     return types();
466 
467   return getOrCreateTypeCollection(kIds);
468 }
469 
symbol_groups()470 iterator_range<SymbolGroupIterator> InputFile::symbol_groups() {
471   return make_range<SymbolGroupIterator>(symbol_groups_begin(),
472                                          symbol_groups_end());
473 }
474 
symbol_groups_begin()475 SymbolGroupIterator InputFile::symbol_groups_begin() {
476   return SymbolGroupIterator(*this);
477 }
478 
symbol_groups_end()479 SymbolGroupIterator InputFile::symbol_groups_end() {
480   return SymbolGroupIterator();
481 }
482 
SymbolGroupIterator()483 SymbolGroupIterator::SymbolGroupIterator() : Value(nullptr) {}
484 
SymbolGroupIterator(InputFile & File)485 SymbolGroupIterator::SymbolGroupIterator(InputFile &File) : Value(&File) {
486   if (File.isObj()) {
487     SectionIter = File.obj().section_begin();
488     scanToNextDebugS();
489   }
490 }
491 
operator ==(const SymbolGroupIterator & R) const492 bool SymbolGroupIterator::operator==(const SymbolGroupIterator &R) const {
493   bool E = isEnd();
494   bool RE = R.isEnd();
495   if (E || RE)
496     return E == RE;
497 
498   if (Value.File != R.Value.File)
499     return false;
500   return Index == R.Index;
501 }
502 
operator *() const503 const SymbolGroup &SymbolGroupIterator::operator*() const {
504   assert(!isEnd());
505   return Value;
506 }
operator *()507 SymbolGroup &SymbolGroupIterator::operator*() {
508   assert(!isEnd());
509   return Value;
510 }
511 
operator ++()512 SymbolGroupIterator &SymbolGroupIterator::operator++() {
513   assert(Value.File && !isEnd());
514   ++Index;
515   if (isEnd())
516     return *this;
517 
518   if (Value.File->isPdb()) {
519     Value.updatePdbModi(Index);
520     return *this;
521   }
522 
523   scanToNextDebugS();
524   return *this;
525 }
526 
scanToNextDebugS()527 void SymbolGroupIterator::scanToNextDebugS() {
528   assert(SectionIter);
529   auto End = Value.File->obj().section_end();
530   auto &Iter = *SectionIter;
531   assert(!isEnd());
532 
533   while (++Iter != End) {
534     DebugSubsectionArray SS;
535     SectionRef SR = *Iter;
536     if (!isDebugSSection(SR, SS))
537       continue;
538 
539     Value.updateDebugS(SS);
540     return;
541   }
542 }
543 
isEnd() const544 bool SymbolGroupIterator::isEnd() const {
545   if (!Value.File)
546     return true;
547   if (Value.File->isPdb()) {
548     DbiStream &Dbi = cantFail(Value.File->pdb().getPDBDbiStream());
549     uint32_t Count = Dbi.modules().getModuleCount();
550     assert(Index <= Count);
551     return Index == Count;
552   }
553 
554   assert(SectionIter);
555   return *SectionIter == Value.File->obj().section_end();
556 }
557 
isMyCode(const SymbolGroup & Group)558 static bool isMyCode(const SymbolGroup &Group) {
559   if (Group.getFile().isObj())
560     return true;
561 
562   StringRef Name = Group.name();
563   if (Name.startswith("Import:"))
564     return false;
565   if (Name.endswith_insensitive(".dll"))
566     return false;
567   if (Name.equals_insensitive("* linker *"))
568     return false;
569   if (Name.startswith_insensitive("f:\\binaries\\Intermediate\\vctools"))
570     return false;
571   if (Name.startswith_insensitive("f:\\dd\\vctools\\crt"))
572     return false;
573   return true;
574 }
575 
shouldDumpSymbolGroup(uint32_t Idx,const SymbolGroup & Group,const FilterOptions & Filters)576 bool llvm::pdb::shouldDumpSymbolGroup(uint32_t Idx, const SymbolGroup &Group,
577                                       const FilterOptions &Filters) {
578   if (Filters.JustMyCode && !isMyCode(Group))
579     return false;
580 
581   // If the arg was not specified on the command line, always dump all modules.
582   if (!Filters.DumpModi)
583     return true;
584 
585   // Otherwise, only dump if this is the same module specified.
586   return (Filters.DumpModi == Idx);
587 }
588