1 //===- PDBFile.cpp - Low level interface to a PDB file ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
10 #include "llvm/ADT/ArrayRef.h"
11 #include "llvm/ADT/STLExtras.h"
12 #include "llvm/DebugInfo/MSF/MSFCommon.h"
13 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
14 #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
15 #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
16 #include "llvm/DebugInfo/PDB/Native/InfoStream.h"
17 #include "llvm/DebugInfo/PDB/Native/InjectedSourceStream.h"
18 #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
19 #include "llvm/DebugInfo/PDB/Native/PublicsStream.h"
20 #include "llvm/DebugInfo/PDB/Native/RawError.h"
21 #include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
22 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
23 #include "llvm/Support/BinaryStream.h"
24 #include "llvm/Support/BinaryStreamArray.h"
25 #include "llvm/Support/BinaryStreamReader.h"
26 #include "llvm/Support/Endian.h"
27 #include "llvm/Support/Error.h"
28 #include "llvm/Support/Path.h"
29 #include <algorithm>
30 #include <cassert>
31 #include <cstdint>
32 
33 using namespace llvm;
34 using namespace llvm::codeview;
35 using namespace llvm::msf;
36 using namespace llvm::pdb;
37 
38 namespace {
39 typedef FixedStreamArray<support::ulittle32_t> ulittle_array;
40 } // end anonymous namespace
41 
42 PDBFile::PDBFile(StringRef Path, std::unique_ptr<BinaryStream> PdbFileBuffer,
43                  BumpPtrAllocator &Allocator)
44     : FilePath(std::string(Path)), Allocator(Allocator),
45       Buffer(std::move(PdbFileBuffer)) {}
46 
47 PDBFile::~PDBFile() = default;
48 
49 StringRef PDBFile::getFilePath() const { return FilePath; }
50 
51 StringRef PDBFile::getFileDirectory() const {
52   return sys::path::parent_path(FilePath);
53 }
54 
55 uint32_t PDBFile::getBlockSize() const { return ContainerLayout.SB->BlockSize; }
56 
57 uint32_t PDBFile::getFreeBlockMapBlock() const {
58   return ContainerLayout.SB->FreeBlockMapBlock;
59 }
60 
61 uint32_t PDBFile::getBlockCount() const {
62   return ContainerLayout.SB->NumBlocks;
63 }
64 
65 uint32_t PDBFile::getNumDirectoryBytes() const {
66   return ContainerLayout.SB->NumDirectoryBytes;
67 }
68 
69 uint32_t PDBFile::getBlockMapIndex() const {
70   return ContainerLayout.SB->BlockMapAddr;
71 }
72 
73 uint32_t PDBFile::getUnknown1() const { return ContainerLayout.SB->Unknown1; }
74 
75 uint32_t PDBFile::getNumDirectoryBlocks() const {
76   return msf::bytesToBlocks(ContainerLayout.SB->NumDirectoryBytes,
77                             ContainerLayout.SB->BlockSize);
78 }
79 
80 uint64_t PDBFile::getBlockMapOffset() const {
81   return (uint64_t)ContainerLayout.SB->BlockMapAddr *
82          ContainerLayout.SB->BlockSize;
83 }
84 
85 uint32_t PDBFile::getNumStreams() const {
86   return ContainerLayout.StreamSizes.size();
87 }
88 
89 uint32_t PDBFile::getMaxStreamSize() const {
90   return *std::max_element(ContainerLayout.StreamSizes.begin(),
91                            ContainerLayout.StreamSizes.end());
92 }
93 
94 uint32_t PDBFile::getStreamByteSize(uint32_t StreamIndex) const {
95   return ContainerLayout.StreamSizes[StreamIndex];
96 }
97 
98 ArrayRef<support::ulittle32_t>
99 PDBFile::getStreamBlockList(uint32_t StreamIndex) const {
100   return ContainerLayout.StreamMap[StreamIndex];
101 }
102 
103 uint32_t PDBFile::getFileSize() const { return Buffer->getLength(); }
104 
105 Expected<ArrayRef<uint8_t>> PDBFile::getBlockData(uint32_t BlockIndex,
106                                                   uint32_t NumBytes) const {
107   uint64_t StreamBlockOffset = msf::blockToOffset(BlockIndex, getBlockSize());
108 
109   ArrayRef<uint8_t> Result;
110   if (auto EC = Buffer->readBytes(StreamBlockOffset, NumBytes, Result))
111     return std::move(EC);
112   return Result;
113 }
114 
115 Error PDBFile::setBlockData(uint32_t BlockIndex, uint32_t Offset,
116                             ArrayRef<uint8_t> Data) const {
117   return make_error<RawError>(raw_error_code::not_writable,
118                               "PDBFile is immutable");
119 }
120 
121 Error PDBFile::parseFileHeaders() {
122   BinaryStreamReader Reader(*Buffer);
123 
124   // Initialize SB.
125   const msf::SuperBlock *SB = nullptr;
126   if (auto EC = Reader.readObject(SB)) {
127     consumeError(std::move(EC));
128     return make_error<RawError>(raw_error_code::corrupt_file,
129                                 "MSF superblock is missing");
130   }
131 
132   if (auto EC = msf::validateSuperBlock(*SB))
133     return EC;
134 
135   if (Buffer->getLength() % SB->BlockSize != 0)
136     return make_error<RawError>(raw_error_code::corrupt_file,
137                                 "File size is not a multiple of block size");
138   ContainerLayout.SB = SB;
139 
140   // Initialize Free Page Map.
141   ContainerLayout.FreePageMap.resize(SB->NumBlocks);
142   // The Fpm exists either at block 1 or block 2 of the MSF.  However, this
143   // allows for a maximum of getBlockSize() * 8 blocks bits in the Fpm, and
144   // thusly an equal number of total blocks in the file.  For a block size
145   // of 4KiB (very common), this would yield 32KiB total blocks in file, for a
146   // maximum file size of 32KiB * 4KiB = 128MiB.  Obviously this won't do, so
147   // the Fpm is split across the file at `getBlockSize()` intervals.  As a
148   // result, every block whose index is of the form |{1,2} + getBlockSize() * k|
149   // for any non-negative integer k is an Fpm block.  In theory, we only really
150   // need to reserve blocks of the form |{1,2} + getBlockSize() * 8 * k|, but
151   // current versions of the MSF format already expect the Fpm to be arranged
152   // at getBlockSize() intervals, so we have to be compatible.
153   // See the function fpmPn() for more information:
154   // https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/msf/msf.cpp#L489
155   auto FpmStream =
156       MappedBlockStream::createFpmStream(ContainerLayout, *Buffer, Allocator);
157   BinaryStreamReader FpmReader(*FpmStream);
158   ArrayRef<uint8_t> FpmBytes;
159   if (auto EC = FpmReader.readBytes(FpmBytes, FpmReader.bytesRemaining()))
160     return EC;
161   uint32_t BlocksRemaining = getBlockCount();
162   uint32_t BI = 0;
163   for (auto Byte : FpmBytes) {
164     uint32_t BlocksThisByte = std::min(BlocksRemaining, 8U);
165     for (uint32_t I = 0; I < BlocksThisByte; ++I) {
166       if (Byte & (1 << I))
167         ContainerLayout.FreePageMap[BI] = true;
168       --BlocksRemaining;
169       ++BI;
170     }
171   }
172 
173   Reader.setOffset(getBlockMapOffset());
174   if (auto EC = Reader.readArray(ContainerLayout.DirectoryBlocks,
175                                  getNumDirectoryBlocks()))
176     return EC;
177 
178   return Error::success();
179 }
180 
181 Error PDBFile::parseStreamData() {
182   assert(ContainerLayout.SB);
183   if (DirectoryStream)
184     return Error::success();
185 
186   uint32_t NumStreams = 0;
187 
188   // Normally you can't use a MappedBlockStream without having fully parsed the
189   // PDB file, because it accesses the directory and various other things, which
190   // is exactly what we are attempting to parse.  By specifying a custom
191   // subclass of IPDBStreamData which only accesses the fields that have already
192   // been parsed, we can avoid this and reuse MappedBlockStream.
193   auto DS = MappedBlockStream::createDirectoryStream(ContainerLayout, *Buffer,
194                                                      Allocator);
195   BinaryStreamReader Reader(*DS);
196   if (auto EC = Reader.readInteger(NumStreams))
197     return EC;
198 
199   if (auto EC = Reader.readArray(ContainerLayout.StreamSizes, NumStreams))
200     return EC;
201   for (uint32_t I = 0; I < NumStreams; ++I) {
202     uint32_t StreamSize = getStreamByteSize(I);
203     // FIXME: What does StreamSize ~0U mean?
204     uint64_t NumExpectedStreamBlocks =
205         StreamSize == UINT32_MAX
206             ? 0
207             : msf::bytesToBlocks(StreamSize, ContainerLayout.SB->BlockSize);
208 
209     // For convenience, we store the block array contiguously.  This is because
210     // if someone calls setStreamMap(), it is more convenient to be able to call
211     // it with an ArrayRef instead of setting up a StreamRef.  Since the
212     // DirectoryStream is cached in the class and thus lives for the life of the
213     // class, we can be guaranteed that readArray() will return a stable
214     // reference, even if it has to allocate from its internal pool.
215     ArrayRef<support::ulittle32_t> Blocks;
216     if (auto EC = Reader.readArray(Blocks, NumExpectedStreamBlocks))
217       return EC;
218     for (uint32_t Block : Blocks) {
219       uint64_t BlockEndOffset =
220           (uint64_t)(Block + 1) * ContainerLayout.SB->BlockSize;
221       if (BlockEndOffset > getFileSize())
222         return make_error<RawError>(raw_error_code::corrupt_file,
223                                     "Stream block map is corrupt.");
224     }
225     ContainerLayout.StreamMap.push_back(Blocks);
226   }
227 
228   // We should have read exactly SB->NumDirectoryBytes bytes.
229   assert(Reader.bytesRemaining() == 0);
230   DirectoryStream = std::move(DS);
231   return Error::success();
232 }
233 
234 ArrayRef<support::ulittle32_t> PDBFile::getDirectoryBlockArray() const {
235   return ContainerLayout.DirectoryBlocks;
236 }
237 
238 std::unique_ptr<MappedBlockStream>
239 PDBFile::createIndexedStream(uint16_t SN) const {
240   if (SN == kInvalidStreamIndex)
241     return nullptr;
242   return MappedBlockStream::createIndexedStream(ContainerLayout, *Buffer, SN,
243                                                 Allocator);
244 }
245 
246 MSFStreamLayout PDBFile::getStreamLayout(uint32_t StreamIdx) const {
247   MSFStreamLayout Result;
248   auto Blocks = getStreamBlockList(StreamIdx);
249   Result.Blocks.assign(Blocks.begin(), Blocks.end());
250   Result.Length = getStreamByteSize(StreamIdx);
251   return Result;
252 }
253 
254 msf::MSFStreamLayout PDBFile::getFpmStreamLayout() const {
255   return msf::getFpmStreamLayout(ContainerLayout);
256 }
257 
258 Expected<GlobalsStream &> PDBFile::getPDBGlobalsStream() {
259   if (!Globals) {
260     auto DbiS = getPDBDbiStream();
261     if (!DbiS)
262       return DbiS.takeError();
263 
264     auto GlobalS =
265         safelyCreateIndexedStream(DbiS->getGlobalSymbolStreamIndex());
266     if (!GlobalS)
267       return GlobalS.takeError();
268     auto TempGlobals = std::make_unique<GlobalsStream>(std::move(*GlobalS));
269     if (auto EC = TempGlobals->reload())
270       return std::move(EC);
271     Globals = std::move(TempGlobals);
272   }
273   return *Globals;
274 }
275 
276 Expected<InfoStream &> PDBFile::getPDBInfoStream() {
277   if (!Info) {
278     auto InfoS = safelyCreateIndexedStream(StreamPDB);
279     if (!InfoS)
280       return InfoS.takeError();
281     auto TempInfo = std::make_unique<InfoStream>(std::move(*InfoS));
282     if (auto EC = TempInfo->reload())
283       return std::move(EC);
284     Info = std::move(TempInfo);
285   }
286   return *Info;
287 }
288 
289 Expected<DbiStream &> PDBFile::getPDBDbiStream() {
290   if (!Dbi) {
291     auto DbiS = safelyCreateIndexedStream(StreamDBI);
292     if (!DbiS)
293       return DbiS.takeError();
294     auto TempDbi = std::make_unique<DbiStream>(std::move(*DbiS));
295     if (auto EC = TempDbi->reload(this))
296       return std::move(EC);
297     Dbi = std::move(TempDbi);
298   }
299   return *Dbi;
300 }
301 
302 Expected<TpiStream &> PDBFile::getPDBTpiStream() {
303   if (!Tpi) {
304     auto TpiS = safelyCreateIndexedStream(StreamTPI);
305     if (!TpiS)
306       return TpiS.takeError();
307     auto TempTpi = std::make_unique<TpiStream>(*this, std::move(*TpiS));
308     if (auto EC = TempTpi->reload())
309       return std::move(EC);
310     Tpi = std::move(TempTpi);
311   }
312   return *Tpi;
313 }
314 
315 Expected<TpiStream &> PDBFile::getPDBIpiStream() {
316   if (!Ipi) {
317     if (!hasPDBIpiStream())
318       return make_error<RawError>(raw_error_code::no_stream);
319 
320     auto IpiS = safelyCreateIndexedStream(StreamIPI);
321     if (!IpiS)
322       return IpiS.takeError();
323     auto TempIpi = std::make_unique<TpiStream>(*this, std::move(*IpiS));
324     if (auto EC = TempIpi->reload())
325       return std::move(EC);
326     Ipi = std::move(TempIpi);
327   }
328   return *Ipi;
329 }
330 
331 Expected<PublicsStream &> PDBFile::getPDBPublicsStream() {
332   if (!Publics) {
333     auto DbiS = getPDBDbiStream();
334     if (!DbiS)
335       return DbiS.takeError();
336 
337     auto PublicS =
338         safelyCreateIndexedStream(DbiS->getPublicSymbolStreamIndex());
339     if (!PublicS)
340       return PublicS.takeError();
341     auto TempPublics = std::make_unique<PublicsStream>(std::move(*PublicS));
342     if (auto EC = TempPublics->reload())
343       return std::move(EC);
344     Publics = std::move(TempPublics);
345   }
346   return *Publics;
347 }
348 
349 Expected<SymbolStream &> PDBFile::getPDBSymbolStream() {
350   if (!Symbols) {
351     auto DbiS = getPDBDbiStream();
352     if (!DbiS)
353       return DbiS.takeError();
354 
355     uint32_t SymbolStreamNum = DbiS->getSymRecordStreamIndex();
356     auto SymbolS = safelyCreateIndexedStream(SymbolStreamNum);
357     if (!SymbolS)
358       return SymbolS.takeError();
359 
360     auto TempSymbols = std::make_unique<SymbolStream>(std::move(*SymbolS));
361     if (auto EC = TempSymbols->reload())
362       return std::move(EC);
363     Symbols = std::move(TempSymbols);
364   }
365   return *Symbols;
366 }
367 
368 Expected<PDBStringTable &> PDBFile::getStringTable() {
369   if (!Strings) {
370     auto NS = safelyCreateNamedStream("/names");
371     if (!NS)
372       return NS.takeError();
373 
374     auto N = std::make_unique<PDBStringTable>();
375     BinaryStreamReader Reader(**NS);
376     if (auto EC = N->reload(Reader))
377       return std::move(EC);
378     assert(Reader.bytesRemaining() == 0);
379     StringTableStream = std::move(*NS);
380     Strings = std::move(N);
381   }
382   return *Strings;
383 }
384 
385 Expected<InjectedSourceStream &> PDBFile::getInjectedSourceStream() {
386   if (!InjectedSources) {
387     auto IJS = safelyCreateNamedStream("/src/headerblock");
388     if (!IJS)
389       return IJS.takeError();
390 
391     auto Strings = getStringTable();
392     if (!Strings)
393       return Strings.takeError();
394 
395     auto IJ = std::make_unique<InjectedSourceStream>(std::move(*IJS));
396     if (auto EC = IJ->reload(*Strings))
397       return std::move(EC);
398     InjectedSources = std::move(IJ);
399   }
400   return *InjectedSources;
401 }
402 
403 uint32_t PDBFile::getPointerSize() {
404   auto DbiS = getPDBDbiStream();
405   if (!DbiS)
406     return 0;
407   PDB_Machine Machine = DbiS->getMachineType();
408   if (Machine == PDB_Machine::Amd64)
409     return 8;
410   return 4;
411 }
412 
413 bool PDBFile::hasPDBDbiStream() const {
414   return StreamDBI < getNumStreams() && getStreamByteSize(StreamDBI) > 0;
415 }
416 
417 bool PDBFile::hasPDBGlobalsStream() {
418   auto DbiS = getPDBDbiStream();
419   if (!DbiS) {
420     consumeError(DbiS.takeError());
421     return false;
422   }
423 
424   return DbiS->getGlobalSymbolStreamIndex() < getNumStreams();
425 }
426 
427 bool PDBFile::hasPDBInfoStream() const { return StreamPDB < getNumStreams(); }
428 
429 bool PDBFile::hasPDBIpiStream() const {
430   if (!hasPDBInfoStream())
431     return false;
432 
433   if (StreamIPI >= getNumStreams())
434     return false;
435 
436   auto &InfoStream = cantFail(const_cast<PDBFile *>(this)->getPDBInfoStream());
437   return InfoStream.containsIdStream();
438 }
439 
440 bool PDBFile::hasPDBPublicsStream() {
441   auto DbiS = getPDBDbiStream();
442   if (!DbiS) {
443     consumeError(DbiS.takeError());
444     return false;
445   }
446   return DbiS->getPublicSymbolStreamIndex() < getNumStreams();
447 }
448 
449 bool PDBFile::hasPDBSymbolStream() {
450   auto DbiS = getPDBDbiStream();
451   if (!DbiS)
452     return false;
453   return DbiS->getSymRecordStreamIndex() < getNumStreams();
454 }
455 
456 bool PDBFile::hasPDBTpiStream() const { return StreamTPI < getNumStreams(); }
457 
458 bool PDBFile::hasPDBStringTable() {
459   auto IS = getPDBInfoStream();
460   if (!IS)
461     return false;
462   Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex("/names");
463   if (!ExpectedNSI) {
464     consumeError(ExpectedNSI.takeError());
465     return false;
466   }
467   assert(*ExpectedNSI < getNumStreams());
468   return true;
469 }
470 
471 bool PDBFile::hasPDBInjectedSourceStream() {
472   auto IS = getPDBInfoStream();
473   if (!IS)
474     return false;
475   Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex("/src/headerblock");
476   if (!ExpectedNSI) {
477     consumeError(ExpectedNSI.takeError());
478     return false;
479   }
480   assert(*ExpectedNSI < getNumStreams());
481   return true;
482 }
483 
484 /// Wrapper around MappedBlockStream::createIndexedStream() that checks if a
485 /// stream with that index actually exists.  If it does not, the return value
486 /// will have an MSFError with code msf_error_code::no_stream.  Else, the return
487 /// value will contain the stream returned by createIndexedStream().
488 Expected<std::unique_ptr<MappedBlockStream>>
489 PDBFile::safelyCreateIndexedStream(uint32_t StreamIndex) const {
490   if (StreamIndex >= getNumStreams())
491     // This rejects kInvalidStreamIndex with an error as well.
492     return make_error<RawError>(raw_error_code::no_stream);
493   return createIndexedStream(StreamIndex);
494 }
495 
496 Expected<std::unique_ptr<MappedBlockStream>>
497 PDBFile::safelyCreateNamedStream(StringRef Name) {
498   auto IS = getPDBInfoStream();
499   if (!IS)
500     return IS.takeError();
501 
502   Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex(Name);
503   if (!ExpectedNSI)
504     return ExpectedNSI.takeError();
505   uint32_t NameStreamIndex = *ExpectedNSI;
506 
507   return safelyCreateIndexedStream(NameStreamIndex);
508 }
509