1 //===- InputFile.cpp ------------------------------------------ *- C++ --*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "InputFile.h"
10 
11 #include "FormatUtil.h"
12 #include "LinePrinter.h"
13 
14 #include "llvm/BinaryFormat/Magic.h"
15 #include "llvm/DebugInfo/CodeView/CodeView.h"
16 #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
17 #include "llvm/DebugInfo/CodeView/StringsAndChecksums.h"
18 #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
19 #include "llvm/DebugInfo/PDB/Native/NativeSession.h"
20 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
21 #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
22 #include "llvm/DebugInfo/PDB/Native/RawError.h"
23 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
24 #include "llvm/DebugInfo/PDB/PDB.h"
25 #include "llvm/Object/COFF.h"
26 #include "llvm/Support/FileSystem.h"
27 #include "llvm/Support/FormatVariadic.h"
28 
29 using namespace llvm;
30 using namespace llvm::codeview;
31 using namespace llvm::object;
32 using namespace llvm::pdb;
33 
InputFile()34 InputFile::InputFile() {}
~InputFile()35 InputFile::~InputFile() {}
36 
37 static Expected<ModuleDebugStreamRef>
getModuleDebugStream(PDBFile & File,StringRef & ModuleName,uint32_t Index)38 getModuleDebugStream(PDBFile &File, StringRef &ModuleName, uint32_t Index) {
39   ExitOnError Err("Unexpected error: ");
40 
41   auto &Dbi = Err(File.getPDBDbiStream());
42   const auto &Modules = Dbi.modules();
43   if (Index >= Modules.getModuleCount())
44     return make_error<RawError>(raw_error_code::index_out_of_bounds,
45                                 "Invalid module index");
46 
47   auto Modi = Modules.getModuleDescriptor(Index);
48 
49   ModuleName = Modi.getModuleName();
50 
51   uint16_t ModiStream = Modi.getModuleStreamIndex();
52   if (ModiStream == kInvalidStreamIndex)
53     return make_error<RawError>(raw_error_code::no_stream,
54                                 "Module stream not present");
55 
56   auto ModStreamData = File.createIndexedStream(ModiStream);
57 
58   ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData));
59   if (auto EC = ModS.reload())
60     return make_error<RawError>(raw_error_code::corrupt_file,
61                                 "Invalid module stream");
62 
63   return std::move(ModS);
64 }
65 
isCodeViewDebugSubsection(object::SectionRef Section,StringRef Name,BinaryStreamReader & Reader)66 static inline bool isCodeViewDebugSubsection(object::SectionRef Section,
67                                              StringRef Name,
68                                              BinaryStreamReader &Reader) {
69   if (Expected<StringRef> NameOrErr = Section.getName()) {
70     if (*NameOrErr != Name)
71       return false;
72   } else {
73     consumeError(NameOrErr.takeError());
74     return false;
75   }
76 
77   Expected<StringRef> ContentsOrErr = Section.getContents();
78   if (!ContentsOrErr) {
79     consumeError(ContentsOrErr.takeError());
80     return false;
81   }
82 
83   Reader = BinaryStreamReader(*ContentsOrErr, support::little);
84   uint32_t Magic;
85   if (Reader.bytesRemaining() < sizeof(uint32_t))
86     return false;
87   cantFail(Reader.readInteger(Magic));
88   if (Magic != COFF::DEBUG_SECTION_MAGIC)
89     return false;
90   return true;
91 }
92 
isDebugSSection(object::SectionRef Section,DebugSubsectionArray & Subsections)93 static inline bool isDebugSSection(object::SectionRef Section,
94                                    DebugSubsectionArray &Subsections) {
95   BinaryStreamReader Reader;
96   if (!isCodeViewDebugSubsection(Section, ".debug$S", Reader))
97     return false;
98 
99   cantFail(Reader.readArray(Subsections, Reader.bytesRemaining()));
100   return true;
101 }
102 
isDebugTSection(SectionRef Section,CVTypeArray & Types)103 static bool isDebugTSection(SectionRef Section, CVTypeArray &Types) {
104   BinaryStreamReader Reader;
105   if (!isCodeViewDebugSubsection(Section, ".debug$T", Reader) &&
106       !isCodeViewDebugSubsection(Section, ".debug$P", Reader))
107     return false;
108   cantFail(Reader.readArray(Types, Reader.bytesRemaining()));
109   return true;
110 }
111 
formatChecksumKind(FileChecksumKind Kind)112 static std::string formatChecksumKind(FileChecksumKind Kind) {
113   switch (Kind) {
114     RETURN_CASE(FileChecksumKind, None, "None");
115     RETURN_CASE(FileChecksumKind, MD5, "MD5");
116     RETURN_CASE(FileChecksumKind, SHA1, "SHA-1");
117     RETURN_CASE(FileChecksumKind, SHA256, "SHA-256");
118   }
119   return formatUnknownEnum(Kind);
120 }
121 
122 template <typename... Args>
formatInternal(LinePrinter & Printer,bool Append,Args &&...args)123 static void formatInternal(LinePrinter &Printer, bool Append, Args &&... args) {
124   if (Append)
125     Printer.format(std::forward<Args>(args)...);
126   else
127     Printer.formatLine(std::forward<Args>(args)...);
128 }
129 
SymbolGroup(InputFile * File,uint32_t GroupIndex)130 SymbolGroup::SymbolGroup(InputFile *File, uint32_t GroupIndex) : File(File) {
131   if (!File)
132     return;
133 
134   if (File->isPdb())
135     initializeForPdb(GroupIndex);
136   else {
137     Name = ".debug$S";
138     uint32_t I = 0;
139     for (const auto &S : File->obj().sections()) {
140       DebugSubsectionArray SS;
141       if (!isDebugSSection(S, SS))
142         continue;
143 
144       if (!SC.hasChecksums() || !SC.hasStrings())
145         SC.initialize(SS);
146 
147       if (I == GroupIndex)
148         Subsections = SS;
149 
150       if (SC.hasChecksums() && SC.hasStrings())
151         break;
152     }
153     rebuildChecksumMap();
154   }
155 }
156 
name() const157 StringRef SymbolGroup::name() const { return Name; }
158 
updateDebugS(const codeview::DebugSubsectionArray & SS)159 void SymbolGroup::updateDebugS(const codeview::DebugSubsectionArray &SS) {
160   Subsections = SS;
161 }
162 
updatePdbModi(uint32_t Modi)163 void SymbolGroup::updatePdbModi(uint32_t Modi) { initializeForPdb(Modi); }
164 
initializeForPdb(uint32_t Modi)165 void SymbolGroup::initializeForPdb(uint32_t Modi) {
166   assert(File && File->isPdb());
167 
168   // PDB always uses the same string table, but each module has its own
169   // checksums.  So we only set the strings if they're not already set.
170   if (!SC.hasStrings()) {
171     auto StringTable = File->pdb().getStringTable();
172     if (StringTable)
173       SC.setStrings(StringTable->getStringTable());
174     else
175       consumeError(StringTable.takeError());
176   }
177 
178   SC.resetChecksums();
179   auto MDS = getModuleDebugStream(File->pdb(), Name, Modi);
180   if (!MDS) {
181     consumeError(MDS.takeError());
182     return;
183   }
184 
185   DebugStream = std::make_shared<ModuleDebugStreamRef>(std::move(*MDS));
186   Subsections = DebugStream->getSubsectionsArray();
187   SC.initialize(Subsections);
188   rebuildChecksumMap();
189 }
190 
rebuildChecksumMap()191 void SymbolGroup::rebuildChecksumMap() {
192   if (!SC.hasChecksums())
193     return;
194 
195   for (const auto &Entry : SC.checksums()) {
196     auto S = SC.strings().getString(Entry.FileNameOffset);
197     if (!S)
198       continue;
199     ChecksumsByFile[*S] = Entry;
200   }
201 }
202 
getPdbModuleStream() const203 const ModuleDebugStreamRef &SymbolGroup::getPdbModuleStream() const {
204   assert(File && File->isPdb() && DebugStream);
205   return *DebugStream;
206 }
207 
getNameFromStringTable(uint32_t Offset) const208 Expected<StringRef> SymbolGroup::getNameFromStringTable(uint32_t Offset) const {
209   return SC.strings().getString(Offset);
210 }
211 
formatFromFileName(LinePrinter & Printer,StringRef File,bool Append) const212 void SymbolGroup::formatFromFileName(LinePrinter &Printer, StringRef File,
213                                      bool Append) const {
214   auto FC = ChecksumsByFile.find(File);
215   if (FC == ChecksumsByFile.end()) {
216     formatInternal(Printer, Append, "- (no checksum) {0}", File);
217     return;
218   }
219 
220   formatInternal(Printer, Append, "- ({0}: {1}) {2}",
221                  formatChecksumKind(FC->getValue().Kind),
222                  toHex(FC->getValue().Checksum), File);
223 }
224 
formatFromChecksumsOffset(LinePrinter & Printer,uint32_t Offset,bool Append) const225 void SymbolGroup::formatFromChecksumsOffset(LinePrinter &Printer,
226                                             uint32_t Offset,
227                                             bool Append) const {
228   if (!SC.hasChecksums()) {
229     formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
230     return;
231   }
232 
233   auto Iter = SC.checksums().getArray().at(Offset);
234   if (Iter == SC.checksums().getArray().end()) {
235     formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
236     return;
237   }
238 
239   uint32_t FO = Iter->FileNameOffset;
240   auto ExpectedFile = getNameFromStringTable(FO);
241   if (!ExpectedFile) {
242     formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
243     consumeError(ExpectedFile.takeError());
244     return;
245   }
246   if (Iter->Kind == FileChecksumKind::None) {
247     formatInternal(Printer, Append, "{0} (no checksum)", *ExpectedFile);
248   } else {
249     formatInternal(Printer, Append, "{0} ({1}: {2})", *ExpectedFile,
250                    formatChecksumKind(Iter->Kind), toHex(Iter->Checksum));
251   }
252 }
253 
open(StringRef Path,bool AllowUnknownFile)254 Expected<InputFile> InputFile::open(StringRef Path, bool AllowUnknownFile) {
255   InputFile IF;
256   if (!llvm::sys::fs::exists(Path))
257     return make_error<StringError>(formatv("File {0} not found", Path),
258                                    inconvertibleErrorCode());
259 
260   file_magic Magic;
261   if (auto EC = identify_magic(Path, Magic))
262     return make_error<StringError>(
263         formatv("Unable to identify file type for file {0}", Path), EC);
264 
265   if (Magic == file_magic::coff_object) {
266     Expected<OwningBinary<Binary>> BinaryOrErr = createBinary(Path);
267     if (!BinaryOrErr)
268       return BinaryOrErr.takeError();
269 
270     IF.CoffObject = std::move(*BinaryOrErr);
271     IF.PdbOrObj = llvm::cast<COFFObjectFile>(IF.CoffObject.getBinary());
272     return std::move(IF);
273   }
274 
275   if (Magic == file_magic::pdb) {
276     std::unique_ptr<IPDBSession> Session;
277     if (auto Err = loadDataForPDB(PDB_ReaderType::Native, Path, Session))
278       return std::move(Err);
279 
280     IF.PdbSession.reset(static_cast<NativeSession *>(Session.release()));
281     IF.PdbOrObj = &IF.PdbSession->getPDBFile();
282 
283     return std::move(IF);
284   }
285 
286   if (!AllowUnknownFile)
287     return make_error<StringError>(
288         formatv("File {0} is not a supported file type", Path),
289         inconvertibleErrorCode());
290 
291   auto Result = MemoryBuffer::getFile(Path, /*IsText=*/false,
292                                       /*RequiresNullTerminator=*/false);
293   if (!Result)
294     return make_error<StringError>(
295         formatv("File {0} could not be opened", Path), Result.getError());
296 
297   IF.UnknownFile = std::move(*Result);
298   IF.PdbOrObj = IF.UnknownFile.get();
299   return std::move(IF);
300 }
301 
pdb()302 PDBFile &InputFile::pdb() {
303   assert(isPdb());
304   return *PdbOrObj.get<PDBFile *>();
305 }
306 
pdb() const307 const PDBFile &InputFile::pdb() const {
308   assert(isPdb());
309   return *PdbOrObj.get<PDBFile *>();
310 }
311 
obj()312 object::COFFObjectFile &InputFile::obj() {
313   assert(isObj());
314   return *PdbOrObj.get<object::COFFObjectFile *>();
315 }
316 
obj() const317 const object::COFFObjectFile &InputFile::obj() const {
318   assert(isObj());
319   return *PdbOrObj.get<object::COFFObjectFile *>();
320 }
321 
unknown()322 MemoryBuffer &InputFile::unknown() {
323   assert(isUnknown());
324   return *PdbOrObj.get<MemoryBuffer *>();
325 }
326 
unknown() const327 const MemoryBuffer &InputFile::unknown() const {
328   assert(isUnknown());
329   return *PdbOrObj.get<MemoryBuffer *>();
330 }
331 
getFilePath() const332 StringRef InputFile::getFilePath() const {
333   if (isPdb())
334     return pdb().getFilePath();
335   if (isObj())
336     return obj().getFileName();
337   assert(isUnknown());
338   return unknown().getBufferIdentifier();
339 }
340 
hasTypes() const341 bool InputFile::hasTypes() const {
342   if (isPdb())
343     return pdb().hasPDBTpiStream();
344 
345   for (const auto &Section : obj().sections()) {
346     CVTypeArray Types;
347     if (isDebugTSection(Section, Types))
348       return true;
349   }
350   return false;
351 }
352 
hasIds() const353 bool InputFile::hasIds() const {
354   if (isObj())
355     return false;
356   return pdb().hasPDBIpiStream();
357 }
358 
isPdb() const359 bool InputFile::isPdb() const { return PdbOrObj.is<PDBFile *>(); }
360 
isObj() const361 bool InputFile::isObj() const {
362   return PdbOrObj.is<object::COFFObjectFile *>();
363 }
364 
isUnknown() const365 bool InputFile::isUnknown() const { return PdbOrObj.is<MemoryBuffer *>(); }
366 
367 codeview::LazyRandomTypeCollection &
getOrCreateTypeCollection(TypeCollectionKind Kind)368 InputFile::getOrCreateTypeCollection(TypeCollectionKind Kind) {
369   if (Types && Kind == kTypes)
370     return *Types;
371   if (Ids && Kind == kIds)
372     return *Ids;
373 
374   if (Kind == kIds) {
375     assert(isPdb() && pdb().hasPDBIpiStream());
376   }
377 
378   // If the collection was already initialized, we should have just returned it
379   // in step 1.
380   if (isPdb()) {
381     TypeCollectionPtr &Collection = (Kind == kIds) ? Ids : Types;
382     auto &Stream = cantFail((Kind == kIds) ? pdb().getPDBIpiStream()
383                                            : pdb().getPDBTpiStream());
384 
385     auto &Array = Stream.typeArray();
386     uint32_t Count = Stream.getNumTypeRecords();
387     auto Offsets = Stream.getTypeIndexOffsets();
388     Collection =
389         std::make_unique<LazyRandomTypeCollection>(Array, Count, Offsets);
390     return *Collection;
391   }
392 
393   assert(isObj());
394   assert(Kind == kTypes);
395   assert(!Types);
396 
397   for (const auto &Section : obj().sections()) {
398     CVTypeArray Records;
399     if (!isDebugTSection(Section, Records))
400       continue;
401 
402     Types = std::make_unique<LazyRandomTypeCollection>(Records, 100);
403     return *Types;
404   }
405 
406   Types = std::make_unique<LazyRandomTypeCollection>(100);
407   return *Types;
408 }
409 
types()410 codeview::LazyRandomTypeCollection &InputFile::types() {
411   return getOrCreateTypeCollection(kTypes);
412 }
413 
ids()414 codeview::LazyRandomTypeCollection &InputFile::ids() {
415   // Object files have only one type stream that contains both types and ids.
416   // Similarly, some PDBs don't contain an IPI stream, and for those both types
417   // and IDs are in the same stream.
418   if (isObj() || !pdb().hasPDBIpiStream())
419     return types();
420 
421   return getOrCreateTypeCollection(kIds);
422 }
423 
symbol_groups()424 iterator_range<SymbolGroupIterator> InputFile::symbol_groups() {
425   return make_range<SymbolGroupIterator>(symbol_groups_begin(),
426                                          symbol_groups_end());
427 }
428 
symbol_groups_begin()429 SymbolGroupIterator InputFile::symbol_groups_begin() {
430   return SymbolGroupIterator(*this);
431 }
432 
symbol_groups_end()433 SymbolGroupIterator InputFile::symbol_groups_end() {
434   return SymbolGroupIterator();
435 }
436 
SymbolGroupIterator()437 SymbolGroupIterator::SymbolGroupIterator() : Value(nullptr) {}
438 
SymbolGroupIterator(InputFile & File)439 SymbolGroupIterator::SymbolGroupIterator(InputFile &File) : Value(&File) {
440   if (File.isObj()) {
441     SectionIter = File.obj().section_begin();
442     scanToNextDebugS();
443   }
444 }
445 
operator ==(const SymbolGroupIterator & R) const446 bool SymbolGroupIterator::operator==(const SymbolGroupIterator &R) const {
447   bool E = isEnd();
448   bool RE = R.isEnd();
449   if (E || RE)
450     return E == RE;
451 
452   if (Value.File != R.Value.File)
453     return false;
454   return Index == R.Index;
455 }
456 
operator *() const457 const SymbolGroup &SymbolGroupIterator::operator*() const {
458   assert(!isEnd());
459   return Value;
460 }
operator *()461 SymbolGroup &SymbolGroupIterator::operator*() {
462   assert(!isEnd());
463   return Value;
464 }
465 
operator ++()466 SymbolGroupIterator &SymbolGroupIterator::operator++() {
467   assert(Value.File && !isEnd());
468   ++Index;
469   if (isEnd())
470     return *this;
471 
472   if (Value.File->isPdb()) {
473     Value.updatePdbModi(Index);
474     return *this;
475   }
476 
477   scanToNextDebugS();
478   return *this;
479 }
480 
scanToNextDebugS()481 void SymbolGroupIterator::scanToNextDebugS() {
482   assert(SectionIter.hasValue());
483   auto End = Value.File->obj().section_end();
484   auto &Iter = *SectionIter;
485   assert(!isEnd());
486 
487   while (++Iter != End) {
488     DebugSubsectionArray SS;
489     SectionRef SR = *Iter;
490     if (!isDebugSSection(SR, SS))
491       continue;
492 
493     Value.updateDebugS(SS);
494     return;
495   }
496 }
497 
isEnd() const498 bool SymbolGroupIterator::isEnd() const {
499   if (!Value.File)
500     return true;
501   if (Value.File->isPdb()) {
502     auto &Dbi = cantFail(Value.File->pdb().getPDBDbiStream());
503     uint32_t Count = Dbi.modules().getModuleCount();
504     assert(Index <= Count);
505     return Index == Count;
506   }
507 
508   assert(SectionIter.hasValue());
509   return *SectionIter == Value.File->obj().section_end();
510 }
511