1 //===- PDBFile.cpp - Low level interface to a PDB file ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
10 #include "llvm/ADT/ArrayRef.h"
11 #include "llvm/ADT/STLExtras.h"
12 #include "llvm/DebugInfo/MSF/MSFCommon.h"
13 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
14 #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
15 #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
16 #include "llvm/DebugInfo/PDB/Native/InfoStream.h"
17 #include "llvm/DebugInfo/PDB/Native/InjectedSourceStream.h"
18 #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
19 #include "llvm/DebugInfo/PDB/Native/PublicsStream.h"
20 #include "llvm/DebugInfo/PDB/Native/RawError.h"
21 #include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
22 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
23 #include "llvm/Support/BinaryStream.h"
24 #include "llvm/Support/BinaryStreamArray.h"
25 #include "llvm/Support/BinaryStreamReader.h"
26 #include "llvm/Support/Endian.h"
27 #include "llvm/Support/Error.h"
28 #include "llvm/Support/Path.h"
29 #include <algorithm>
30 #include <cassert>
31 #include <cstdint>
32
33 using namespace llvm;
34 using namespace llvm::codeview;
35 using namespace llvm::msf;
36 using namespace llvm::pdb;
37
38 namespace {
39 typedef FixedStreamArray<support::ulittle32_t> ulittle_array;
40 } // end anonymous namespace
41
PDBFile(StringRef Path,std::unique_ptr<BinaryStream> PdbFileBuffer,BumpPtrAllocator & Allocator)42 PDBFile::PDBFile(StringRef Path, std::unique_ptr<BinaryStream> PdbFileBuffer,
43 BumpPtrAllocator &Allocator)
44 : FilePath(std::string(Path)), Allocator(Allocator),
45 Buffer(std::move(PdbFileBuffer)) {}
46
47 PDBFile::~PDBFile() = default;
48
getFilePath() const49 StringRef PDBFile::getFilePath() const { return FilePath; }
50
getFileDirectory() const51 StringRef PDBFile::getFileDirectory() const {
52 return sys::path::parent_path(FilePath);
53 }
54
getBlockSize() const55 uint32_t PDBFile::getBlockSize() const { return ContainerLayout.SB->BlockSize; }
56
getFreeBlockMapBlock() const57 uint32_t PDBFile::getFreeBlockMapBlock() const {
58 return ContainerLayout.SB->FreeBlockMapBlock;
59 }
60
getBlockCount() const61 uint32_t PDBFile::getBlockCount() const {
62 return ContainerLayout.SB->NumBlocks;
63 }
64
getNumDirectoryBytes() const65 uint32_t PDBFile::getNumDirectoryBytes() const {
66 return ContainerLayout.SB->NumDirectoryBytes;
67 }
68
getBlockMapIndex() const69 uint32_t PDBFile::getBlockMapIndex() const {
70 return ContainerLayout.SB->BlockMapAddr;
71 }
72
getUnknown1() const73 uint32_t PDBFile::getUnknown1() const { return ContainerLayout.SB->Unknown1; }
74
getNumDirectoryBlocks() const75 uint32_t PDBFile::getNumDirectoryBlocks() const {
76 return msf::bytesToBlocks(ContainerLayout.SB->NumDirectoryBytes,
77 ContainerLayout.SB->BlockSize);
78 }
79
getBlockMapOffset() const80 uint64_t PDBFile::getBlockMapOffset() const {
81 return (uint64_t)ContainerLayout.SB->BlockMapAddr *
82 ContainerLayout.SB->BlockSize;
83 }
84
getNumStreams() const85 uint32_t PDBFile::getNumStreams() const {
86 return ContainerLayout.StreamSizes.size();
87 }
88
getMaxStreamSize() const89 uint32_t PDBFile::getMaxStreamSize() const {
90 return *std::max_element(ContainerLayout.StreamSizes.begin(),
91 ContainerLayout.StreamSizes.end());
92 }
93
getStreamByteSize(uint32_t StreamIndex) const94 uint32_t PDBFile::getStreamByteSize(uint32_t StreamIndex) const {
95 return ContainerLayout.StreamSizes[StreamIndex];
96 }
97
98 ArrayRef<support::ulittle32_t>
getStreamBlockList(uint32_t StreamIndex) const99 PDBFile::getStreamBlockList(uint32_t StreamIndex) const {
100 return ContainerLayout.StreamMap[StreamIndex];
101 }
102
getFileSize() const103 uint32_t PDBFile::getFileSize() const { return Buffer->getLength(); }
104
getBlockData(uint32_t BlockIndex,uint32_t NumBytes) const105 Expected<ArrayRef<uint8_t>> PDBFile::getBlockData(uint32_t BlockIndex,
106 uint32_t NumBytes) const {
107 uint64_t StreamBlockOffset = msf::blockToOffset(BlockIndex, getBlockSize());
108
109 ArrayRef<uint8_t> Result;
110 if (auto EC = Buffer->readBytes(StreamBlockOffset, NumBytes, Result))
111 return std::move(EC);
112 return Result;
113 }
114
setBlockData(uint32_t BlockIndex,uint32_t Offset,ArrayRef<uint8_t> Data) const115 Error PDBFile::setBlockData(uint32_t BlockIndex, uint32_t Offset,
116 ArrayRef<uint8_t> Data) const {
117 return make_error<RawError>(raw_error_code::not_writable,
118 "PDBFile is immutable");
119 }
120
parseFileHeaders()121 Error PDBFile::parseFileHeaders() {
122 BinaryStreamReader Reader(*Buffer);
123
124 // Initialize SB.
125 const msf::SuperBlock *SB = nullptr;
126 if (auto EC = Reader.readObject(SB)) {
127 consumeError(std::move(EC));
128 return make_error<RawError>(raw_error_code::corrupt_file,
129 "MSF superblock is missing");
130 }
131
132 if (auto EC = msf::validateSuperBlock(*SB))
133 return EC;
134
135 if (Buffer->getLength() % SB->BlockSize != 0)
136 return make_error<RawError>(raw_error_code::corrupt_file,
137 "File size is not a multiple of block size");
138 ContainerLayout.SB = SB;
139
140 // Initialize Free Page Map.
141 ContainerLayout.FreePageMap.resize(SB->NumBlocks);
142 // The Fpm exists either at block 1 or block 2 of the MSF. However, this
143 // allows for a maximum of getBlockSize() * 8 blocks bits in the Fpm, and
144 // thusly an equal number of total blocks in the file. For a block size
145 // of 4KiB (very common), this would yield 32KiB total blocks in file, for a
146 // maximum file size of 32KiB * 4KiB = 128MiB. Obviously this won't do, so
147 // the Fpm is split across the file at `getBlockSize()` intervals. As a
148 // result, every block whose index is of the form |{1,2} + getBlockSize() * k|
149 // for any non-negative integer k is an Fpm block. In theory, we only really
150 // need to reserve blocks of the form |{1,2} + getBlockSize() * 8 * k|, but
151 // current versions of the MSF format already expect the Fpm to be arranged
152 // at getBlockSize() intervals, so we have to be compatible.
153 // See the function fpmPn() for more information:
154 // https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/msf/msf.cpp#L489
155 auto FpmStream =
156 MappedBlockStream::createFpmStream(ContainerLayout, *Buffer, Allocator);
157 BinaryStreamReader FpmReader(*FpmStream);
158 ArrayRef<uint8_t> FpmBytes;
159 if (auto EC = FpmReader.readBytes(FpmBytes, FpmReader.bytesRemaining()))
160 return EC;
161 uint32_t BlocksRemaining = getBlockCount();
162 uint32_t BI = 0;
163 for (auto Byte : FpmBytes) {
164 uint32_t BlocksThisByte = std::min(BlocksRemaining, 8U);
165 for (uint32_t I = 0; I < BlocksThisByte; ++I) {
166 if (Byte & (1 << I))
167 ContainerLayout.FreePageMap[BI] = true;
168 --BlocksRemaining;
169 ++BI;
170 }
171 }
172
173 Reader.setOffset(getBlockMapOffset());
174 if (auto EC = Reader.readArray(ContainerLayout.DirectoryBlocks,
175 getNumDirectoryBlocks()))
176 return EC;
177
178 return Error::success();
179 }
180
parseStreamData()181 Error PDBFile::parseStreamData() {
182 assert(ContainerLayout.SB);
183 if (DirectoryStream)
184 return Error::success();
185
186 uint32_t NumStreams = 0;
187
188 // Normally you can't use a MappedBlockStream without having fully parsed the
189 // PDB file, because it accesses the directory and various other things, which
190 // is exactly what we are attempting to parse. By specifying a custom
191 // subclass of IPDBStreamData which only accesses the fields that have already
192 // been parsed, we can avoid this and reuse MappedBlockStream.
193 auto DS = MappedBlockStream::createDirectoryStream(ContainerLayout, *Buffer,
194 Allocator);
195 BinaryStreamReader Reader(*DS);
196 if (auto EC = Reader.readInteger(NumStreams))
197 return EC;
198
199 if (auto EC = Reader.readArray(ContainerLayout.StreamSizes, NumStreams))
200 return EC;
201 for (uint32_t I = 0; I < NumStreams; ++I) {
202 uint32_t StreamSize = getStreamByteSize(I);
203 // FIXME: What does StreamSize ~0U mean?
204 uint64_t NumExpectedStreamBlocks =
205 StreamSize == UINT32_MAX
206 ? 0
207 : msf::bytesToBlocks(StreamSize, ContainerLayout.SB->BlockSize);
208
209 // For convenience, we store the block array contiguously. This is because
210 // if someone calls setStreamMap(), it is more convenient to be able to call
211 // it with an ArrayRef instead of setting up a StreamRef. Since the
212 // DirectoryStream is cached in the class and thus lives for the life of the
213 // class, we can be guaranteed that readArray() will return a stable
214 // reference, even if it has to allocate from its internal pool.
215 ArrayRef<support::ulittle32_t> Blocks;
216 if (auto EC = Reader.readArray(Blocks, NumExpectedStreamBlocks))
217 return EC;
218 for (uint32_t Block : Blocks) {
219 uint64_t BlockEndOffset =
220 (uint64_t)(Block + 1) * ContainerLayout.SB->BlockSize;
221 if (BlockEndOffset > getFileSize())
222 return make_error<RawError>(raw_error_code::corrupt_file,
223 "Stream block map is corrupt.");
224 }
225 ContainerLayout.StreamMap.push_back(Blocks);
226 }
227
228 // We should have read exactly SB->NumDirectoryBytes bytes.
229 assert(Reader.bytesRemaining() == 0);
230 DirectoryStream = std::move(DS);
231 return Error::success();
232 }
233
getDirectoryBlockArray() const234 ArrayRef<support::ulittle32_t> PDBFile::getDirectoryBlockArray() const {
235 return ContainerLayout.DirectoryBlocks;
236 }
237
238 std::unique_ptr<MappedBlockStream>
createIndexedStream(uint16_t SN) const239 PDBFile::createIndexedStream(uint16_t SN) const {
240 if (SN == kInvalidStreamIndex)
241 return nullptr;
242 return MappedBlockStream::createIndexedStream(ContainerLayout, *Buffer, SN,
243 Allocator);
244 }
245
getStreamLayout(uint32_t StreamIdx) const246 MSFStreamLayout PDBFile::getStreamLayout(uint32_t StreamIdx) const {
247 MSFStreamLayout Result;
248 auto Blocks = getStreamBlockList(StreamIdx);
249 Result.Blocks.assign(Blocks.begin(), Blocks.end());
250 Result.Length = getStreamByteSize(StreamIdx);
251 return Result;
252 }
253
getFpmStreamLayout() const254 msf::MSFStreamLayout PDBFile::getFpmStreamLayout() const {
255 return msf::getFpmStreamLayout(ContainerLayout);
256 }
257
getPDBGlobalsStream()258 Expected<GlobalsStream &> PDBFile::getPDBGlobalsStream() {
259 if (!Globals) {
260 auto DbiS = getPDBDbiStream();
261 if (!DbiS)
262 return DbiS.takeError();
263
264 auto GlobalS =
265 safelyCreateIndexedStream(DbiS->getGlobalSymbolStreamIndex());
266 if (!GlobalS)
267 return GlobalS.takeError();
268 auto TempGlobals = std::make_unique<GlobalsStream>(std::move(*GlobalS));
269 if (auto EC = TempGlobals->reload())
270 return std::move(EC);
271 Globals = std::move(TempGlobals);
272 }
273 return *Globals;
274 }
275
getPDBInfoStream()276 Expected<InfoStream &> PDBFile::getPDBInfoStream() {
277 if (!Info) {
278 auto InfoS = safelyCreateIndexedStream(StreamPDB);
279 if (!InfoS)
280 return InfoS.takeError();
281 auto TempInfo = std::make_unique<InfoStream>(std::move(*InfoS));
282 if (auto EC = TempInfo->reload())
283 return std::move(EC);
284 Info = std::move(TempInfo);
285 }
286 return *Info;
287 }
288
getPDBDbiStream()289 Expected<DbiStream &> PDBFile::getPDBDbiStream() {
290 if (!Dbi) {
291 auto DbiS = safelyCreateIndexedStream(StreamDBI);
292 if (!DbiS)
293 return DbiS.takeError();
294 auto TempDbi = std::make_unique<DbiStream>(std::move(*DbiS));
295 if (auto EC = TempDbi->reload(this))
296 return std::move(EC);
297 Dbi = std::move(TempDbi);
298 }
299 return *Dbi;
300 }
301
getPDBTpiStream()302 Expected<TpiStream &> PDBFile::getPDBTpiStream() {
303 if (!Tpi) {
304 auto TpiS = safelyCreateIndexedStream(StreamTPI);
305 if (!TpiS)
306 return TpiS.takeError();
307 auto TempTpi = std::make_unique<TpiStream>(*this, std::move(*TpiS));
308 if (auto EC = TempTpi->reload())
309 return std::move(EC);
310 Tpi = std::move(TempTpi);
311 }
312 return *Tpi;
313 }
314
getPDBIpiStream()315 Expected<TpiStream &> PDBFile::getPDBIpiStream() {
316 if (!Ipi) {
317 if (!hasPDBIpiStream())
318 return make_error<RawError>(raw_error_code::no_stream);
319
320 auto IpiS = safelyCreateIndexedStream(StreamIPI);
321 if (!IpiS)
322 return IpiS.takeError();
323 auto TempIpi = std::make_unique<TpiStream>(*this, std::move(*IpiS));
324 if (auto EC = TempIpi->reload())
325 return std::move(EC);
326 Ipi = std::move(TempIpi);
327 }
328 return *Ipi;
329 }
330
getPDBPublicsStream()331 Expected<PublicsStream &> PDBFile::getPDBPublicsStream() {
332 if (!Publics) {
333 auto DbiS = getPDBDbiStream();
334 if (!DbiS)
335 return DbiS.takeError();
336
337 auto PublicS =
338 safelyCreateIndexedStream(DbiS->getPublicSymbolStreamIndex());
339 if (!PublicS)
340 return PublicS.takeError();
341 auto TempPublics = std::make_unique<PublicsStream>(std::move(*PublicS));
342 if (auto EC = TempPublics->reload())
343 return std::move(EC);
344 Publics = std::move(TempPublics);
345 }
346 return *Publics;
347 }
348
getPDBSymbolStream()349 Expected<SymbolStream &> PDBFile::getPDBSymbolStream() {
350 if (!Symbols) {
351 auto DbiS = getPDBDbiStream();
352 if (!DbiS)
353 return DbiS.takeError();
354
355 uint32_t SymbolStreamNum = DbiS->getSymRecordStreamIndex();
356 auto SymbolS = safelyCreateIndexedStream(SymbolStreamNum);
357 if (!SymbolS)
358 return SymbolS.takeError();
359
360 auto TempSymbols = std::make_unique<SymbolStream>(std::move(*SymbolS));
361 if (auto EC = TempSymbols->reload())
362 return std::move(EC);
363 Symbols = std::move(TempSymbols);
364 }
365 return *Symbols;
366 }
367
getStringTable()368 Expected<PDBStringTable &> PDBFile::getStringTable() {
369 if (!Strings) {
370 auto NS = safelyCreateNamedStream("/names");
371 if (!NS)
372 return NS.takeError();
373
374 auto N = std::make_unique<PDBStringTable>();
375 BinaryStreamReader Reader(**NS);
376 if (auto EC = N->reload(Reader))
377 return std::move(EC);
378 assert(Reader.bytesRemaining() == 0);
379 StringTableStream = std::move(*NS);
380 Strings = std::move(N);
381 }
382 return *Strings;
383 }
384
getInjectedSourceStream()385 Expected<InjectedSourceStream &> PDBFile::getInjectedSourceStream() {
386 if (!InjectedSources) {
387 auto IJS = safelyCreateNamedStream("/src/headerblock");
388 if (!IJS)
389 return IJS.takeError();
390
391 auto Strings = getStringTable();
392 if (!Strings)
393 return Strings.takeError();
394
395 auto IJ = std::make_unique<InjectedSourceStream>(std::move(*IJS));
396 if (auto EC = IJ->reload(*Strings))
397 return std::move(EC);
398 InjectedSources = std::move(IJ);
399 }
400 return *InjectedSources;
401 }
402
getPointerSize()403 uint32_t PDBFile::getPointerSize() {
404 auto DbiS = getPDBDbiStream();
405 if (!DbiS)
406 return 0;
407 PDB_Machine Machine = DbiS->getMachineType();
408 if (Machine == PDB_Machine::Amd64)
409 return 8;
410 return 4;
411 }
412
hasPDBDbiStream() const413 bool PDBFile::hasPDBDbiStream() const {
414 return StreamDBI < getNumStreams() && getStreamByteSize(StreamDBI) > 0;
415 }
416
hasPDBGlobalsStream()417 bool PDBFile::hasPDBGlobalsStream() {
418 auto DbiS = getPDBDbiStream();
419 if (!DbiS) {
420 consumeError(DbiS.takeError());
421 return false;
422 }
423
424 return DbiS->getGlobalSymbolStreamIndex() < getNumStreams();
425 }
426
hasPDBInfoStream() const427 bool PDBFile::hasPDBInfoStream() const { return StreamPDB < getNumStreams(); }
428
hasPDBIpiStream() const429 bool PDBFile::hasPDBIpiStream() const {
430 if (!hasPDBInfoStream())
431 return false;
432
433 if (StreamIPI >= getNumStreams())
434 return false;
435
436 auto &InfoStream = cantFail(const_cast<PDBFile *>(this)->getPDBInfoStream());
437 return InfoStream.containsIdStream();
438 }
439
hasPDBPublicsStream()440 bool PDBFile::hasPDBPublicsStream() {
441 auto DbiS = getPDBDbiStream();
442 if (!DbiS) {
443 consumeError(DbiS.takeError());
444 return false;
445 }
446 return DbiS->getPublicSymbolStreamIndex() < getNumStreams();
447 }
448
hasPDBSymbolStream()449 bool PDBFile::hasPDBSymbolStream() {
450 auto DbiS = getPDBDbiStream();
451 if (!DbiS)
452 return false;
453 return DbiS->getSymRecordStreamIndex() < getNumStreams();
454 }
455
hasPDBTpiStream() const456 bool PDBFile::hasPDBTpiStream() const { return StreamTPI < getNumStreams(); }
457
hasPDBStringTable()458 bool PDBFile::hasPDBStringTable() {
459 auto IS = getPDBInfoStream();
460 if (!IS)
461 return false;
462 Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex("/names");
463 if (!ExpectedNSI) {
464 consumeError(ExpectedNSI.takeError());
465 return false;
466 }
467 assert(*ExpectedNSI < getNumStreams());
468 return true;
469 }
470
hasPDBInjectedSourceStream()471 bool PDBFile::hasPDBInjectedSourceStream() {
472 auto IS = getPDBInfoStream();
473 if (!IS)
474 return false;
475 Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex("/src/headerblock");
476 if (!ExpectedNSI) {
477 consumeError(ExpectedNSI.takeError());
478 return false;
479 }
480 assert(*ExpectedNSI < getNumStreams());
481 return true;
482 }
483
484 /// Wrapper around MappedBlockStream::createIndexedStream() that checks if a
485 /// stream with that index actually exists. If it does not, the return value
486 /// will have an MSFError with code msf_error_code::no_stream. Else, the return
487 /// value will contain the stream returned by createIndexedStream().
488 Expected<std::unique_ptr<MappedBlockStream>>
safelyCreateIndexedStream(uint32_t StreamIndex) const489 PDBFile::safelyCreateIndexedStream(uint32_t StreamIndex) const {
490 if (StreamIndex >= getNumStreams())
491 // This rejects kInvalidStreamIndex with an error as well.
492 return make_error<RawError>(raw_error_code::no_stream);
493 return createIndexedStream(StreamIndex);
494 }
495
496 Expected<std::unique_ptr<MappedBlockStream>>
safelyCreateNamedStream(StringRef Name)497 PDBFile::safelyCreateNamedStream(StringRef Name) {
498 auto IS = getPDBInfoStream();
499 if (!IS)
500 return IS.takeError();
501
502 Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex(Name);
503 if (!ExpectedNSI)
504 return ExpectedNSI.takeError();
505 uint32_t NameStreamIndex = *ExpectedNSI;
506
507 return safelyCreateIndexedStream(NameStreamIndex);
508 }
509