1 //===- InputFile.cpp ------------------------------------------ *- C++ --*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/DebugInfo/PDB/Native/InputFile.h"
10 
11 #include "llvm/ADT/StringExtras.h"
12 #include "llvm/BinaryFormat/Magic.h"
13 #include "llvm/DebugInfo/CodeView/CodeView.h"
14 #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
15 #include "llvm/DebugInfo/CodeView/StringsAndChecksums.h"
16 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
17 #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
18 #include "llvm/DebugInfo/PDB/Native/FormatUtil.h"
19 #include "llvm/DebugInfo/PDB/Native/LinePrinter.h"
20 #include "llvm/DebugInfo/PDB/Native/NativeSession.h"
21 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
22 #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
23 #include "llvm/DebugInfo/PDB/Native/RawError.h"
24 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
25 #include "llvm/DebugInfo/PDB/PDB.h"
26 #include "llvm/Object/COFF.h"
27 #include "llvm/Support/FileSystem.h"
28 #include "llvm/Support/FormatVariadic.h"
29 
30 using namespace llvm;
31 using namespace llvm::codeview;
32 using namespace llvm::object;
33 using namespace llvm::pdb;
34 
35 InputFile::InputFile() = default;
36 InputFile::~InputFile() = default;
37 
38 Expected<ModuleDebugStreamRef>
39 llvm::pdb::getModuleDebugStream(PDBFile &File, StringRef &ModuleName,
40                                 uint32_t Index) {
41   Expected<DbiStream &> DbiOrErr = File.getPDBDbiStream();
42   if (!DbiOrErr)
43     return DbiOrErr.takeError();
44   DbiStream &Dbi = *DbiOrErr;
45   const auto &Modules = Dbi.modules();
46   if (Index >= Modules.getModuleCount())
47     return make_error<RawError>(raw_error_code::index_out_of_bounds,
48                                 "Invalid module index");
49 
50   auto Modi = Modules.getModuleDescriptor(Index);
51 
52   ModuleName = Modi.getModuleName();
53 
54   uint16_t ModiStream = Modi.getModuleStreamIndex();
55   if (ModiStream == kInvalidStreamIndex)
56     return make_error<RawError>(raw_error_code::no_stream,
57                                 "Module stream not present");
58 
59   auto ModStreamData = File.createIndexedStream(ModiStream);
60 
61   ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData));
62   if (auto EC = ModS.reload())
63     return make_error<RawError>(raw_error_code::corrupt_file,
64                                 "Invalid module stream");
65 
66   return std::move(ModS);
67 }
68 
69 Expected<ModuleDebugStreamRef> llvm::pdb::getModuleDebugStream(PDBFile &File,
70                                                                uint32_t Index) {
71   Expected<DbiStream &> DbiOrErr = File.getPDBDbiStream();
72   if (!DbiOrErr)
73     return DbiOrErr.takeError();
74   DbiStream &Dbi = *DbiOrErr;
75   const auto &Modules = Dbi.modules();
76   auto Modi = Modules.getModuleDescriptor(Index);
77 
78   uint16_t ModiStream = Modi.getModuleStreamIndex();
79   if (ModiStream == kInvalidStreamIndex)
80     return make_error<RawError>(raw_error_code::no_stream,
81                                 "Module stream not present");
82 
83   auto ModStreamData = File.createIndexedStream(ModiStream);
84 
85   ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData));
86   if (Error Err = ModS.reload())
87     return make_error<RawError>(raw_error_code::corrupt_file,
88                                 "Invalid module stream");
89 
90   return std::move(ModS);
91 }
92 
93 static inline bool isCodeViewDebugSubsection(object::SectionRef Section,
94                                              StringRef Name,
95                                              BinaryStreamReader &Reader) {
96   if (Expected<StringRef> NameOrErr = Section.getName()) {
97     if (*NameOrErr != Name)
98       return false;
99   } else {
100     consumeError(NameOrErr.takeError());
101     return false;
102   }
103 
104   Expected<StringRef> ContentsOrErr = Section.getContents();
105   if (!ContentsOrErr) {
106     consumeError(ContentsOrErr.takeError());
107     return false;
108   }
109 
110   Reader = BinaryStreamReader(*ContentsOrErr, support::little);
111   uint32_t Magic;
112   if (Reader.bytesRemaining() < sizeof(uint32_t))
113     return false;
114   cantFail(Reader.readInteger(Magic));
115   if (Magic != COFF::DEBUG_SECTION_MAGIC)
116     return false;
117   return true;
118 }
119 
120 static inline bool isDebugSSection(object::SectionRef Section,
121                                    DebugSubsectionArray &Subsections) {
122   BinaryStreamReader Reader;
123   if (!isCodeViewDebugSubsection(Section, ".debug$S", Reader))
124     return false;
125 
126   cantFail(Reader.readArray(Subsections, Reader.bytesRemaining()));
127   return true;
128 }
129 
130 static bool isDebugTSection(SectionRef Section, CVTypeArray &Types) {
131   BinaryStreamReader Reader;
132   if (!isCodeViewDebugSubsection(Section, ".debug$T", Reader) &&
133       !isCodeViewDebugSubsection(Section, ".debug$P", Reader))
134     return false;
135   cantFail(Reader.readArray(Types, Reader.bytesRemaining()));
136   return true;
137 }
138 
139 static std::string formatChecksumKind(FileChecksumKind Kind) {
140   switch (Kind) {
141     RETURN_CASE(FileChecksumKind, None, "None");
142     RETURN_CASE(FileChecksumKind, MD5, "MD5");
143     RETURN_CASE(FileChecksumKind, SHA1, "SHA-1");
144     RETURN_CASE(FileChecksumKind, SHA256, "SHA-256");
145   }
146   return formatUnknownEnum(Kind);
147 }
148 
149 template <typename... Args>
150 static void formatInternal(LinePrinter &Printer, bool Append, Args &&...args) {
151   if (Append)
152     Printer.format(std::forward<Args>(args)...);
153   else
154     Printer.formatLine(std::forward<Args>(args)...);
155 }
156 
157 SymbolGroup::SymbolGroup(InputFile *File, uint32_t GroupIndex) : File(File) {
158   if (!File)
159     return;
160 
161   if (File->isPdb())
162     initializeForPdb(GroupIndex);
163   else {
164     Name = ".debug$S";
165     uint32_t I = 0;
166     for (const auto &S : File->obj().sections()) {
167       DebugSubsectionArray SS;
168       if (!isDebugSSection(S, SS))
169         continue;
170 
171       if (!SC.hasChecksums() || !SC.hasStrings())
172         SC.initialize(SS);
173 
174       if (I == GroupIndex)
175         Subsections = SS;
176 
177       if (SC.hasChecksums() && SC.hasStrings())
178         break;
179     }
180     rebuildChecksumMap();
181   }
182 }
183 
184 StringRef SymbolGroup::name() const { return Name; }
185 
186 void SymbolGroup::updateDebugS(const codeview::DebugSubsectionArray &SS) {
187   Subsections = SS;
188 }
189 
190 void SymbolGroup::updatePdbModi(uint32_t Modi) { initializeForPdb(Modi); }
191 
192 void SymbolGroup::initializeForPdb(uint32_t Modi) {
193   assert(File && File->isPdb());
194 
195   // PDB always uses the same string table, but each module has its own
196   // checksums.  So we only set the strings if they're not already set.
197   if (!SC.hasStrings()) {
198     auto StringTable = File->pdb().getStringTable();
199     if (StringTable)
200       SC.setStrings(StringTable->getStringTable());
201     else
202       consumeError(StringTable.takeError());
203   }
204 
205   SC.resetChecksums();
206   auto MDS = getModuleDebugStream(File->pdb(), Name, Modi);
207   if (!MDS) {
208     consumeError(MDS.takeError());
209     return;
210   }
211 
212   DebugStream = std::make_shared<ModuleDebugStreamRef>(std::move(*MDS));
213   Subsections = DebugStream->getSubsectionsArray();
214   SC.initialize(Subsections);
215   rebuildChecksumMap();
216 }
217 
218 void SymbolGroup::rebuildChecksumMap() {
219   if (!SC.hasChecksums())
220     return;
221 
222   for (const auto &Entry : SC.checksums()) {
223     auto S = SC.strings().getString(Entry.FileNameOffset);
224     if (!S)
225       continue;
226     ChecksumsByFile[*S] = Entry;
227   }
228 }
229 
230 const ModuleDebugStreamRef &SymbolGroup::getPdbModuleStream() const {
231   assert(File && File->isPdb() && DebugStream);
232   return *DebugStream;
233 }
234 
235 Expected<StringRef> SymbolGroup::getNameFromStringTable(uint32_t Offset) const {
236   return SC.strings().getString(Offset);
237 }
238 
239 Expected<StringRef> SymbolGroup::getNameFromChecksums(uint32_t Offset) const {
240   StringRef Name;
241   if (!SC.hasChecksums()) {
242     return std::move(Name);
243   }
244 
245   auto Iter = SC.checksums().getArray().at(Offset);
246   if (Iter == SC.checksums().getArray().end()) {
247     return std::move(Name);
248   }
249 
250   uint32_t FO = Iter->FileNameOffset;
251   auto ExpectedFile = getNameFromStringTable(FO);
252   if (!ExpectedFile) {
253     return std::move(Name);
254   }
255 
256   return *ExpectedFile;
257 }
258 
259 void SymbolGroup::formatFromFileName(LinePrinter &Printer, StringRef File,
260                                      bool Append) const {
261   auto FC = ChecksumsByFile.find(File);
262   if (FC == ChecksumsByFile.end()) {
263     formatInternal(Printer, Append, "- (no checksum) {0}", File);
264     return;
265   }
266 
267   formatInternal(Printer, Append, "- ({0}: {1}) {2}",
268                  formatChecksumKind(FC->getValue().Kind),
269                  toHex(FC->getValue().Checksum), File);
270 }
271 
272 void SymbolGroup::formatFromChecksumsOffset(LinePrinter &Printer,
273                                             uint32_t Offset,
274                                             bool Append) const {
275   if (!SC.hasChecksums()) {
276     formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
277     return;
278   }
279 
280   auto Iter = SC.checksums().getArray().at(Offset);
281   if (Iter == SC.checksums().getArray().end()) {
282     formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
283     return;
284   }
285 
286   uint32_t FO = Iter->FileNameOffset;
287   auto ExpectedFile = getNameFromStringTable(FO);
288   if (!ExpectedFile) {
289     formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
290     consumeError(ExpectedFile.takeError());
291     return;
292   }
293   if (Iter->Kind == FileChecksumKind::None) {
294     formatInternal(Printer, Append, "{0} (no checksum)", *ExpectedFile);
295   } else {
296     formatInternal(Printer, Append, "{0} ({1}: {2})", *ExpectedFile,
297                    formatChecksumKind(Iter->Kind), toHex(Iter->Checksum));
298   }
299 }
300 
301 Expected<InputFile> InputFile::open(StringRef Path, bool AllowUnknownFile) {
302   InputFile IF;
303   if (!llvm::sys::fs::exists(Path))
304     return make_error<StringError>(formatv("File {0} not found", Path),
305                                    inconvertibleErrorCode());
306 
307   file_magic Magic;
308   if (auto EC = identify_magic(Path, Magic))
309     return make_error<StringError>(
310         formatv("Unable to identify file type for file {0}", Path), EC);
311 
312   if (Magic == file_magic::coff_object) {
313     Expected<OwningBinary<Binary>> BinaryOrErr = createBinary(Path);
314     if (!BinaryOrErr)
315       return BinaryOrErr.takeError();
316 
317     IF.CoffObject = std::move(*BinaryOrErr);
318     IF.PdbOrObj = llvm::cast<COFFObjectFile>(IF.CoffObject.getBinary());
319     return std::move(IF);
320   }
321 
322   if (Magic == file_magic::pdb) {
323     std::unique_ptr<IPDBSession> Session;
324     if (auto Err = loadDataForPDB(PDB_ReaderType::Native, Path, Session))
325       return std::move(Err);
326 
327     IF.PdbSession.reset(static_cast<NativeSession *>(Session.release()));
328     IF.PdbOrObj = &IF.PdbSession->getPDBFile();
329 
330     return std::move(IF);
331   }
332 
333   if (!AllowUnknownFile)
334     return make_error<StringError>(
335         formatv("File {0} is not a supported file type", Path),
336         inconvertibleErrorCode());
337 
338   auto Result = MemoryBuffer::getFile(Path, /*IsText=*/false,
339                                       /*RequiresNullTerminator=*/false);
340   if (!Result)
341     return make_error<StringError>(
342         formatv("File {0} could not be opened", Path), Result.getError());
343 
344   IF.UnknownFile = std::move(*Result);
345   IF.PdbOrObj = IF.UnknownFile.get();
346   return std::move(IF);
347 }
348 
349 PDBFile &InputFile::pdb() {
350   assert(isPdb());
351   return *cast<PDBFile *>(PdbOrObj);
352 }
353 
354 const PDBFile &InputFile::pdb() const {
355   assert(isPdb());
356   return *cast<PDBFile *>(PdbOrObj);
357 }
358 
359 object::COFFObjectFile &InputFile::obj() {
360   assert(isObj());
361   return *cast<object::COFFObjectFile *>(PdbOrObj);
362 }
363 
364 const object::COFFObjectFile &InputFile::obj() const {
365   assert(isObj());
366   return *cast<object::COFFObjectFile *>(PdbOrObj);
367 }
368 
369 MemoryBuffer &InputFile::unknown() {
370   assert(isUnknown());
371   return *cast<MemoryBuffer *>(PdbOrObj);
372 }
373 
374 const MemoryBuffer &InputFile::unknown() const {
375   assert(isUnknown());
376   return *cast<MemoryBuffer *>(PdbOrObj);
377 }
378 
379 StringRef InputFile::getFilePath() const {
380   if (isPdb())
381     return pdb().getFilePath();
382   if (isObj())
383     return obj().getFileName();
384   assert(isUnknown());
385   return unknown().getBufferIdentifier();
386 }
387 
388 bool InputFile::hasTypes() const {
389   if (isPdb())
390     return pdb().hasPDBTpiStream();
391 
392   for (const auto &Section : obj().sections()) {
393     CVTypeArray Types;
394     if (isDebugTSection(Section, Types))
395       return true;
396   }
397   return false;
398 }
399 
400 bool InputFile::hasIds() const {
401   if (isObj())
402     return false;
403   return pdb().hasPDBIpiStream();
404 }
405 
406 bool InputFile::isPdb() const { return isa<PDBFile *>(PdbOrObj); }
407 
408 bool InputFile::isObj() const {
409   return isa<object::COFFObjectFile *>(PdbOrObj);
410 }
411 
412 bool InputFile::isUnknown() const { return isa<MemoryBuffer *>(PdbOrObj); }
413 
414 codeview::LazyRandomTypeCollection &
415 InputFile::getOrCreateTypeCollection(TypeCollectionKind Kind) {
416   if (Types && Kind == kTypes)
417     return *Types;
418   if (Ids && Kind == kIds)
419     return *Ids;
420 
421   if (Kind == kIds) {
422     assert(isPdb() && pdb().hasPDBIpiStream());
423   }
424 
425   // If the collection was already initialized, we should have just returned it
426   // in step 1.
427   if (isPdb()) {
428     TypeCollectionPtr &Collection = (Kind == kIds) ? Ids : Types;
429     auto &Stream = cantFail((Kind == kIds) ? pdb().getPDBIpiStream()
430                                            : pdb().getPDBTpiStream());
431 
432     auto &Array = Stream.typeArray();
433     uint32_t Count = Stream.getNumTypeRecords();
434     auto Offsets = Stream.getTypeIndexOffsets();
435     Collection =
436         std::make_unique<LazyRandomTypeCollection>(Array, Count, Offsets);
437     return *Collection;
438   }
439 
440   assert(isObj());
441   assert(Kind == kTypes);
442   assert(!Types);
443 
444   for (const auto &Section : obj().sections()) {
445     CVTypeArray Records;
446     if (!isDebugTSection(Section, Records))
447       continue;
448 
449     Types = std::make_unique<LazyRandomTypeCollection>(Records, 100);
450     return *Types;
451   }
452 
453   Types = std::make_unique<LazyRandomTypeCollection>(100);
454   return *Types;
455 }
456 
457 codeview::LazyRandomTypeCollection &InputFile::types() {
458   return getOrCreateTypeCollection(kTypes);
459 }
460 
461 codeview::LazyRandomTypeCollection &InputFile::ids() {
462   // Object files have only one type stream that contains both types and ids.
463   // Similarly, some PDBs don't contain an IPI stream, and for those both types
464   // and IDs are in the same stream.
465   if (isObj() || !pdb().hasPDBIpiStream())
466     return types();
467 
468   return getOrCreateTypeCollection(kIds);
469 }
470 
471 iterator_range<SymbolGroupIterator> InputFile::symbol_groups() {
472   return make_range<SymbolGroupIterator>(symbol_groups_begin(),
473                                          symbol_groups_end());
474 }
475 
476 SymbolGroupIterator InputFile::symbol_groups_begin() {
477   return SymbolGroupIterator(*this);
478 }
479 
480 SymbolGroupIterator InputFile::symbol_groups_end() {
481   return SymbolGroupIterator();
482 }
483 
484 SymbolGroupIterator::SymbolGroupIterator() : Value(nullptr) {}
485 
486 SymbolGroupIterator::SymbolGroupIterator(InputFile &File) : Value(&File) {
487   if (File.isObj()) {
488     SectionIter = File.obj().section_begin();
489     scanToNextDebugS();
490   }
491 }
492 
493 bool SymbolGroupIterator::operator==(const SymbolGroupIterator &R) const {
494   bool E = isEnd();
495   bool RE = R.isEnd();
496   if (E || RE)
497     return E == RE;
498 
499   if (Value.File != R.Value.File)
500     return false;
501   return Index == R.Index;
502 }
503 
504 const SymbolGroup &SymbolGroupIterator::operator*() const {
505   assert(!isEnd());
506   return Value;
507 }
508 SymbolGroup &SymbolGroupIterator::operator*() {
509   assert(!isEnd());
510   return Value;
511 }
512 
513 SymbolGroupIterator &SymbolGroupIterator::operator++() {
514   assert(Value.File && !isEnd());
515   ++Index;
516   if (isEnd())
517     return *this;
518 
519   if (Value.File->isPdb()) {
520     Value.updatePdbModi(Index);
521     return *this;
522   }
523 
524   scanToNextDebugS();
525   return *this;
526 }
527 
528 void SymbolGroupIterator::scanToNextDebugS() {
529   assert(SectionIter);
530   auto End = Value.File->obj().section_end();
531   auto &Iter = *SectionIter;
532   assert(!isEnd());
533 
534   while (++Iter != End) {
535     DebugSubsectionArray SS;
536     SectionRef SR = *Iter;
537     if (!isDebugSSection(SR, SS))
538       continue;
539 
540     Value.updateDebugS(SS);
541     return;
542   }
543 }
544 
545 bool SymbolGroupIterator::isEnd() const {
546   if (!Value.File)
547     return true;
548   if (Value.File->isPdb()) {
549     DbiStream &Dbi = cantFail(Value.File->pdb().getPDBDbiStream());
550     uint32_t Count = Dbi.modules().getModuleCount();
551     assert(Index <= Count);
552     return Index == Count;
553   }
554 
555   assert(SectionIter);
556   return *SectionIter == Value.File->obj().section_end();
557 }
558 
559 static bool isMyCode(const SymbolGroup &Group) {
560   if (Group.getFile().isObj())
561     return true;
562 
563   StringRef Name = Group.name();
564   if (Name.startswith("Import:"))
565     return false;
566   if (Name.ends_with_insensitive(".dll"))
567     return false;
568   if (Name.equals_insensitive("* linker *"))
569     return false;
570   if (Name.starts_with_insensitive("f:\\binaries\\Intermediate\\vctools"))
571     return false;
572   if (Name.starts_with_insensitive("f:\\dd\\vctools\\crt"))
573     return false;
574   return true;
575 }
576 
577 bool llvm::pdb::shouldDumpSymbolGroup(uint32_t Idx, const SymbolGroup &Group,
578                                       const FilterOptions &Filters) {
579   if (Filters.JustMyCode && !isMyCode(Group))
580     return false;
581 
582   // If the arg was not specified on the command line, always dump all modules.
583   if (!Filters.DumpModi)
584     return true;
585 
586   // Otherwise, only dump if this is the same module specified.
587   return (Filters.DumpModi == Idx);
588 }
589