1 //===- PDBFile.cpp - Low level interface to a PDB file ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
10 #include "llvm/ADT/ArrayRef.h"
11 #include "llvm/DebugInfo/MSF/MSFCommon.h"
12 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
13 #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
14 #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
15 #include "llvm/DebugInfo/PDB/Native/InfoStream.h"
16 #include "llvm/DebugInfo/PDB/Native/InjectedSourceStream.h"
17 #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
18 #include "llvm/DebugInfo/PDB/Native/PublicsStream.h"
19 #include "llvm/DebugInfo/PDB/Native/RawError.h"
20 #include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
21 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
22 #include "llvm/Support/BinaryStream.h"
23 #include "llvm/Support/BinaryStreamArray.h"
24 #include "llvm/Support/BinaryStreamReader.h"
25 #include "llvm/Support/Endian.h"
26 #include "llvm/Support/Error.h"
27 #include "llvm/Support/Path.h"
28 #include <algorithm>
29 #include <cassert>
30 #include <cstdint>
31 
32 using namespace llvm;
33 using namespace llvm::codeview;
34 using namespace llvm::msf;
35 using namespace llvm::pdb;
36 
37 namespace {
38 typedef FixedStreamArray<support::ulittle32_t> ulittle_array;
39 } // end anonymous namespace
40 
PDBFile(StringRef Path,std::unique_ptr<BinaryStream> PdbFileBuffer,BumpPtrAllocator & Allocator)41 PDBFile::PDBFile(StringRef Path, std::unique_ptr<BinaryStream> PdbFileBuffer,
42                  BumpPtrAllocator &Allocator)
43     : FilePath(std::string(Path)), Allocator(Allocator),
44       Buffer(std::move(PdbFileBuffer)) {}
45 
46 PDBFile::~PDBFile() = default;
47 
getFilePath() const48 StringRef PDBFile::getFilePath() const { return FilePath; }
49 
getFileDirectory() const50 StringRef PDBFile::getFileDirectory() const {
51   return sys::path::parent_path(FilePath);
52 }
53 
getBlockSize() const54 uint32_t PDBFile::getBlockSize() const { return ContainerLayout.SB->BlockSize; }
55 
getFreeBlockMapBlock() const56 uint32_t PDBFile::getFreeBlockMapBlock() const {
57   return ContainerLayout.SB->FreeBlockMapBlock;
58 }
59 
getBlockCount() const60 uint32_t PDBFile::getBlockCount() const {
61   return ContainerLayout.SB->NumBlocks;
62 }
63 
getNumDirectoryBytes() const64 uint32_t PDBFile::getNumDirectoryBytes() const {
65   return ContainerLayout.SB->NumDirectoryBytes;
66 }
67 
getBlockMapIndex() const68 uint32_t PDBFile::getBlockMapIndex() const {
69   return ContainerLayout.SB->BlockMapAddr;
70 }
71 
getUnknown1() const72 uint32_t PDBFile::getUnknown1() const { return ContainerLayout.SB->Unknown1; }
73 
getNumDirectoryBlocks() const74 uint32_t PDBFile::getNumDirectoryBlocks() const {
75   return msf::bytesToBlocks(ContainerLayout.SB->NumDirectoryBytes,
76                             ContainerLayout.SB->BlockSize);
77 }
78 
getBlockMapOffset() const79 uint64_t PDBFile::getBlockMapOffset() const {
80   return (uint64_t)ContainerLayout.SB->BlockMapAddr *
81          ContainerLayout.SB->BlockSize;
82 }
83 
getNumStreams() const84 uint32_t PDBFile::getNumStreams() const {
85   return ContainerLayout.StreamSizes.size();
86 }
87 
getMaxStreamSize() const88 uint32_t PDBFile::getMaxStreamSize() const {
89   return *std::max_element(ContainerLayout.StreamSizes.begin(),
90                            ContainerLayout.StreamSizes.end());
91 }
92 
getStreamByteSize(uint32_t StreamIndex) const93 uint32_t PDBFile::getStreamByteSize(uint32_t StreamIndex) const {
94   return ContainerLayout.StreamSizes[StreamIndex];
95 }
96 
97 ArrayRef<support::ulittle32_t>
getStreamBlockList(uint32_t StreamIndex) const98 PDBFile::getStreamBlockList(uint32_t StreamIndex) const {
99   return ContainerLayout.StreamMap[StreamIndex];
100 }
101 
getFileSize() const102 uint64_t PDBFile::getFileSize() const { return Buffer->getLength(); }
103 
getBlockData(uint32_t BlockIndex,uint32_t NumBytes) const104 Expected<ArrayRef<uint8_t>> PDBFile::getBlockData(uint32_t BlockIndex,
105                                                   uint32_t NumBytes) const {
106   uint64_t StreamBlockOffset = msf::blockToOffset(BlockIndex, getBlockSize());
107 
108   ArrayRef<uint8_t> Result;
109   if (auto EC = Buffer->readBytes(StreamBlockOffset, NumBytes, Result))
110     return std::move(EC);
111   return Result;
112 }
113 
setBlockData(uint32_t BlockIndex,uint32_t Offset,ArrayRef<uint8_t> Data) const114 Error PDBFile::setBlockData(uint32_t BlockIndex, uint32_t Offset,
115                             ArrayRef<uint8_t> Data) const {
116   return make_error<RawError>(raw_error_code::not_writable,
117                               "PDBFile is immutable");
118 }
119 
parseFileHeaders()120 Error PDBFile::parseFileHeaders() {
121   BinaryStreamReader Reader(*Buffer);
122 
123   // Initialize SB.
124   const msf::SuperBlock *SB = nullptr;
125   if (auto EC = Reader.readObject(SB)) {
126     consumeError(std::move(EC));
127     return make_error<RawError>(raw_error_code::corrupt_file,
128                                 "MSF superblock is missing");
129   }
130 
131   if (auto EC = msf::validateSuperBlock(*SB))
132     return EC;
133 
134   if (Buffer->getLength() % SB->BlockSize != 0)
135     return make_error<RawError>(raw_error_code::corrupt_file,
136                                 "File size is not a multiple of block size");
137   ContainerLayout.SB = SB;
138 
139   // Initialize Free Page Map.
140   ContainerLayout.FreePageMap.resize(SB->NumBlocks);
141   // The Fpm exists either at block 1 or block 2 of the MSF.  However, this
142   // allows for a maximum of getBlockSize() * 8 blocks bits in the Fpm, and
143   // thusly an equal number of total blocks in the file.  For a block size
144   // of 4KiB (very common), this would yield 32KiB total blocks in file, for a
145   // maximum file size of 32KiB * 4KiB = 128MiB.  Obviously this won't do, so
146   // the Fpm is split across the file at `getBlockSize()` intervals.  As a
147   // result, every block whose index is of the form |{1,2} + getBlockSize() * k|
148   // for any non-negative integer k is an Fpm block.  In theory, we only really
149   // need to reserve blocks of the form |{1,2} + getBlockSize() * 8 * k|, but
150   // current versions of the MSF format already expect the Fpm to be arranged
151   // at getBlockSize() intervals, so we have to be compatible.
152   // See the function fpmPn() for more information:
153   // https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/msf/msf.cpp#L489
154   auto FpmStream =
155       MappedBlockStream::createFpmStream(ContainerLayout, *Buffer, Allocator);
156   BinaryStreamReader FpmReader(*FpmStream);
157   ArrayRef<uint8_t> FpmBytes;
158   if (auto EC = FpmReader.readBytes(FpmBytes, FpmReader.bytesRemaining()))
159     return EC;
160   uint32_t BlocksRemaining = getBlockCount();
161   uint32_t BI = 0;
162   for (auto Byte : FpmBytes) {
163     uint32_t BlocksThisByte = std::min(BlocksRemaining, 8U);
164     for (uint32_t I = 0; I < BlocksThisByte; ++I) {
165       if (Byte & (1 << I))
166         ContainerLayout.FreePageMap[BI] = true;
167       --BlocksRemaining;
168       ++BI;
169     }
170   }
171 
172   Reader.setOffset(getBlockMapOffset());
173   if (auto EC = Reader.readArray(ContainerLayout.DirectoryBlocks,
174                                  getNumDirectoryBlocks()))
175     return EC;
176 
177   return Error::success();
178 }
179 
parseStreamData()180 Error PDBFile::parseStreamData() {
181   assert(ContainerLayout.SB);
182   if (DirectoryStream)
183     return Error::success();
184 
185   uint32_t NumStreams = 0;
186 
187   // Normally you can't use a MappedBlockStream without having fully parsed the
188   // PDB file, because it accesses the directory and various other things, which
189   // is exactly what we are attempting to parse.  By specifying a custom
190   // subclass of IPDBStreamData which only accesses the fields that have already
191   // been parsed, we can avoid this and reuse MappedBlockStream.
192   auto DS = MappedBlockStream::createDirectoryStream(ContainerLayout, *Buffer,
193                                                      Allocator);
194   BinaryStreamReader Reader(*DS);
195   if (auto EC = Reader.readInteger(NumStreams))
196     return EC;
197 
198   if (auto EC = Reader.readArray(ContainerLayout.StreamSizes, NumStreams))
199     return EC;
200   for (uint32_t I = 0; I < NumStreams; ++I) {
201     uint32_t StreamSize = getStreamByteSize(I);
202     // FIXME: What does StreamSize ~0U mean?
203     uint64_t NumExpectedStreamBlocks =
204         StreamSize == UINT32_MAX
205             ? 0
206             : msf::bytesToBlocks(StreamSize, ContainerLayout.SB->BlockSize);
207 
208     // For convenience, we store the block array contiguously.  This is because
209     // if someone calls setStreamMap(), it is more convenient to be able to call
210     // it with an ArrayRef instead of setting up a StreamRef.  Since the
211     // DirectoryStream is cached in the class and thus lives for the life of the
212     // class, we can be guaranteed that readArray() will return a stable
213     // reference, even if it has to allocate from its internal pool.
214     ArrayRef<support::ulittle32_t> Blocks;
215     if (auto EC = Reader.readArray(Blocks, NumExpectedStreamBlocks))
216       return EC;
217     for (uint32_t Block : Blocks) {
218       uint64_t BlockEndOffset =
219           (uint64_t)(Block + 1) * ContainerLayout.SB->BlockSize;
220       if (BlockEndOffset > getFileSize())
221         return make_error<RawError>(raw_error_code::corrupt_file,
222                                     "Stream block map is corrupt.");
223     }
224     ContainerLayout.StreamMap.push_back(Blocks);
225   }
226 
227   // We should have read exactly SB->NumDirectoryBytes bytes.
228   assert(Reader.bytesRemaining() == 0);
229   DirectoryStream = std::move(DS);
230   return Error::success();
231 }
232 
getDirectoryBlockArray() const233 ArrayRef<support::ulittle32_t> PDBFile::getDirectoryBlockArray() const {
234   return ContainerLayout.DirectoryBlocks;
235 }
236 
237 std::unique_ptr<MappedBlockStream>
createIndexedStream(uint16_t SN) const238 PDBFile::createIndexedStream(uint16_t SN) const {
239   if (SN == kInvalidStreamIndex)
240     return nullptr;
241   return MappedBlockStream::createIndexedStream(ContainerLayout, *Buffer, SN,
242                                                 Allocator);
243 }
244 
getStreamLayout(uint32_t StreamIdx) const245 MSFStreamLayout PDBFile::getStreamLayout(uint32_t StreamIdx) const {
246   MSFStreamLayout Result;
247   auto Blocks = getStreamBlockList(StreamIdx);
248   Result.Blocks.assign(Blocks.begin(), Blocks.end());
249   Result.Length = getStreamByteSize(StreamIdx);
250   return Result;
251 }
252 
getFpmStreamLayout() const253 msf::MSFStreamLayout PDBFile::getFpmStreamLayout() const {
254   return msf::getFpmStreamLayout(ContainerLayout);
255 }
256 
getPDBGlobalsStream()257 Expected<GlobalsStream &> PDBFile::getPDBGlobalsStream() {
258   if (!Globals) {
259     auto DbiS = getPDBDbiStream();
260     if (!DbiS)
261       return DbiS.takeError();
262 
263     auto GlobalS =
264         safelyCreateIndexedStream(DbiS->getGlobalSymbolStreamIndex());
265     if (!GlobalS)
266       return GlobalS.takeError();
267     auto TempGlobals = std::make_unique<GlobalsStream>(std::move(*GlobalS));
268     if (auto EC = TempGlobals->reload())
269       return std::move(EC);
270     Globals = std::move(TempGlobals);
271   }
272   return *Globals;
273 }
274 
getPDBInfoStream()275 Expected<InfoStream &> PDBFile::getPDBInfoStream() {
276   if (!Info) {
277     auto InfoS = safelyCreateIndexedStream(StreamPDB);
278     if (!InfoS)
279       return InfoS.takeError();
280     auto TempInfo = std::make_unique<InfoStream>(std::move(*InfoS));
281     if (auto EC = TempInfo->reload())
282       return std::move(EC);
283     Info = std::move(TempInfo);
284   }
285   return *Info;
286 }
287 
getPDBDbiStream()288 Expected<DbiStream &> PDBFile::getPDBDbiStream() {
289   if (!Dbi) {
290     auto DbiS = safelyCreateIndexedStream(StreamDBI);
291     if (!DbiS)
292       return DbiS.takeError();
293     auto TempDbi = std::make_unique<DbiStream>(std::move(*DbiS));
294     if (auto EC = TempDbi->reload(this))
295       return std::move(EC);
296     Dbi = std::move(TempDbi);
297   }
298   return *Dbi;
299 }
300 
getPDBTpiStream()301 Expected<TpiStream &> PDBFile::getPDBTpiStream() {
302   if (!Tpi) {
303     auto TpiS = safelyCreateIndexedStream(StreamTPI);
304     if (!TpiS)
305       return TpiS.takeError();
306     auto TempTpi = std::make_unique<TpiStream>(*this, std::move(*TpiS));
307     if (auto EC = TempTpi->reload())
308       return std::move(EC);
309     Tpi = std::move(TempTpi);
310   }
311   return *Tpi;
312 }
313 
getPDBIpiStream()314 Expected<TpiStream &> PDBFile::getPDBIpiStream() {
315   if (!Ipi) {
316     if (!hasPDBIpiStream())
317       return make_error<RawError>(raw_error_code::no_stream);
318 
319     auto IpiS = safelyCreateIndexedStream(StreamIPI);
320     if (!IpiS)
321       return IpiS.takeError();
322     auto TempIpi = std::make_unique<TpiStream>(*this, std::move(*IpiS));
323     if (auto EC = TempIpi->reload())
324       return std::move(EC);
325     Ipi = std::move(TempIpi);
326   }
327   return *Ipi;
328 }
329 
getPDBPublicsStream()330 Expected<PublicsStream &> PDBFile::getPDBPublicsStream() {
331   if (!Publics) {
332     auto DbiS = getPDBDbiStream();
333     if (!DbiS)
334       return DbiS.takeError();
335 
336     auto PublicS =
337         safelyCreateIndexedStream(DbiS->getPublicSymbolStreamIndex());
338     if (!PublicS)
339       return PublicS.takeError();
340     auto TempPublics = std::make_unique<PublicsStream>(std::move(*PublicS));
341     if (auto EC = TempPublics->reload())
342       return std::move(EC);
343     Publics = std::move(TempPublics);
344   }
345   return *Publics;
346 }
347 
getPDBSymbolStream()348 Expected<SymbolStream &> PDBFile::getPDBSymbolStream() {
349   if (!Symbols) {
350     auto DbiS = getPDBDbiStream();
351     if (!DbiS)
352       return DbiS.takeError();
353 
354     uint32_t SymbolStreamNum = DbiS->getSymRecordStreamIndex();
355     auto SymbolS = safelyCreateIndexedStream(SymbolStreamNum);
356     if (!SymbolS)
357       return SymbolS.takeError();
358 
359     auto TempSymbols = std::make_unique<SymbolStream>(std::move(*SymbolS));
360     if (auto EC = TempSymbols->reload())
361       return std::move(EC);
362     Symbols = std::move(TempSymbols);
363   }
364   return *Symbols;
365 }
366 
getStringTable()367 Expected<PDBStringTable &> PDBFile::getStringTable() {
368   if (!Strings) {
369     auto NS = safelyCreateNamedStream("/names");
370     if (!NS)
371       return NS.takeError();
372 
373     auto N = std::make_unique<PDBStringTable>();
374     BinaryStreamReader Reader(**NS);
375     if (auto EC = N->reload(Reader))
376       return std::move(EC);
377     assert(Reader.bytesRemaining() == 0);
378     StringTableStream = std::move(*NS);
379     Strings = std::move(N);
380   }
381   return *Strings;
382 }
383 
getInjectedSourceStream()384 Expected<InjectedSourceStream &> PDBFile::getInjectedSourceStream() {
385   if (!InjectedSources) {
386     auto IJS = safelyCreateNamedStream("/src/headerblock");
387     if (!IJS)
388       return IJS.takeError();
389 
390     auto Strings = getStringTable();
391     if (!Strings)
392       return Strings.takeError();
393 
394     auto IJ = std::make_unique<InjectedSourceStream>(std::move(*IJS));
395     if (auto EC = IJ->reload(*Strings))
396       return std::move(EC);
397     InjectedSources = std::move(IJ);
398   }
399   return *InjectedSources;
400 }
401 
getPointerSize()402 uint32_t PDBFile::getPointerSize() {
403   auto DbiS = getPDBDbiStream();
404   if (!DbiS)
405     return 0;
406   PDB_Machine Machine = DbiS->getMachineType();
407   if (Machine == PDB_Machine::Amd64)
408     return 8;
409   return 4;
410 }
411 
hasPDBDbiStream() const412 bool PDBFile::hasPDBDbiStream() const {
413   return StreamDBI < getNumStreams() && getStreamByteSize(StreamDBI) > 0;
414 }
415 
hasPDBGlobalsStream()416 bool PDBFile::hasPDBGlobalsStream() {
417   auto DbiS = getPDBDbiStream();
418   if (!DbiS) {
419     consumeError(DbiS.takeError());
420     return false;
421   }
422 
423   return DbiS->getGlobalSymbolStreamIndex() < getNumStreams();
424 }
425 
hasPDBInfoStream() const426 bool PDBFile::hasPDBInfoStream() const { return StreamPDB < getNumStreams(); }
427 
hasPDBIpiStream() const428 bool PDBFile::hasPDBIpiStream() const {
429   if (!hasPDBInfoStream())
430     return false;
431 
432   if (StreamIPI >= getNumStreams())
433     return false;
434 
435   auto &InfoStream = cantFail(const_cast<PDBFile *>(this)->getPDBInfoStream());
436   return InfoStream.containsIdStream();
437 }
438 
hasPDBPublicsStream()439 bool PDBFile::hasPDBPublicsStream() {
440   auto DbiS = getPDBDbiStream();
441   if (!DbiS) {
442     consumeError(DbiS.takeError());
443     return false;
444   }
445   return DbiS->getPublicSymbolStreamIndex() < getNumStreams();
446 }
447 
hasPDBSymbolStream()448 bool PDBFile::hasPDBSymbolStream() {
449   auto DbiS = getPDBDbiStream();
450   if (!DbiS)
451     return false;
452   return DbiS->getSymRecordStreamIndex() < getNumStreams();
453 }
454 
hasPDBTpiStream() const455 bool PDBFile::hasPDBTpiStream() const { return StreamTPI < getNumStreams(); }
456 
hasPDBStringTable()457 bool PDBFile::hasPDBStringTable() {
458   auto IS = getPDBInfoStream();
459   if (!IS)
460     return false;
461   Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex("/names");
462   if (!ExpectedNSI) {
463     consumeError(ExpectedNSI.takeError());
464     return false;
465   }
466   assert(*ExpectedNSI < getNumStreams());
467   return true;
468 }
469 
hasPDBInjectedSourceStream()470 bool PDBFile::hasPDBInjectedSourceStream() {
471   auto IS = getPDBInfoStream();
472   if (!IS)
473     return false;
474   Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex("/src/headerblock");
475   if (!ExpectedNSI) {
476     consumeError(ExpectedNSI.takeError());
477     return false;
478   }
479   assert(*ExpectedNSI < getNumStreams());
480   return true;
481 }
482 
483 /// Wrapper around MappedBlockStream::createIndexedStream() that checks if a
484 /// stream with that index actually exists.  If it does not, the return value
485 /// will have an MSFError with code msf_error_code::no_stream.  Else, the return
486 /// value will contain the stream returned by createIndexedStream().
487 Expected<std::unique_ptr<MappedBlockStream>>
safelyCreateIndexedStream(uint32_t StreamIndex) const488 PDBFile::safelyCreateIndexedStream(uint32_t StreamIndex) const {
489   if (StreamIndex >= getNumStreams())
490     // This rejects kInvalidStreamIndex with an error as well.
491     return make_error<RawError>(raw_error_code::no_stream);
492   return createIndexedStream(StreamIndex);
493 }
494 
495 Expected<std::unique_ptr<MappedBlockStream>>
safelyCreateNamedStream(StringRef Name)496 PDBFile::safelyCreateNamedStream(StringRef Name) {
497   auto IS = getPDBInfoStream();
498   if (!IS)
499     return IS.takeError();
500 
501   Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex(Name);
502   if (!ExpectedNSI)
503     return ExpectedNSI.takeError();
504   uint32_t NameStreamIndex = *ExpectedNSI;
505 
506   return safelyCreateIndexedStream(NameStreamIndex);
507 }
508