1 //===- BitstreamReader.h - Low-level bitstream reader interface -*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This header defines the BitstreamReader class. This class can be used to 10 // read an arbitrary bitstream, regardless of its contents. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_BITSTREAM_BITSTREAMREADER_H 15 #define LLVM_BITSTREAM_BITSTREAMREADER_H 16 17 #include "llvm/ADT/ArrayRef.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/Bitstream/BitCodes.h" 20 #include "llvm/Support/Endian.h" 21 #include "llvm/Support/Error.h" 22 #include "llvm/Support/MemoryBufferRef.h" 23 #include <algorithm> 24 #include <cassert> 25 #include <climits> 26 #include <cstddef> 27 #include <cstdint> 28 #include <memory> 29 #include <optional> 30 #include <string> 31 #include <utility> 32 #include <vector> 33 34 namespace llvm { 35 36 /// This class maintains the abbreviations read from a block info block. 37 class BitstreamBlockInfo { 38 public: 39 /// This contains information emitted to BLOCKINFO_BLOCK blocks. These 40 /// describe abbreviations that all blocks of the specified ID inherit. 41 struct BlockInfo { 42 unsigned BlockID = 0; 43 std::vector<std::shared_ptr<BitCodeAbbrev>> Abbrevs; 44 std::string Name; 45 std::vector<std::pair<unsigned, std::string>> RecordNames; 46 }; 47 48 private: 49 std::vector<BlockInfo> BlockInfoRecords; 50 51 public: 52 /// If there is block info for the specified ID, return it, otherwise return 53 /// null. getBlockInfo(unsigned BlockID)54 const BlockInfo *getBlockInfo(unsigned BlockID) const { 55 // Common case, the most recent entry matches BlockID. 56 if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID) 57 return &BlockInfoRecords.back(); 58 59 for (const BlockInfo &BI : BlockInfoRecords) 60 if (BI.BlockID == BlockID) 61 return &BI; 62 return nullptr; 63 } 64 getOrCreateBlockInfo(unsigned BlockID)65 BlockInfo &getOrCreateBlockInfo(unsigned BlockID) { 66 if (const BlockInfo *BI = getBlockInfo(BlockID)) 67 return *const_cast<BlockInfo*>(BI); 68 69 // Otherwise, add a new record. 70 BlockInfoRecords.emplace_back(); 71 BlockInfoRecords.back().BlockID = BlockID; 72 return BlockInfoRecords.back(); 73 } 74 }; 75 76 /// This represents a position within a bitstream. There may be multiple 77 /// independent cursors reading within one bitstream, each maintaining their 78 /// own local state. 79 class SimpleBitstreamCursor { 80 ArrayRef<uint8_t> BitcodeBytes; 81 size_t NextChar = 0; 82 83 public: 84 /// This is the current data we have pulled from the stream but have not 85 /// returned to the client. This is specifically and intentionally defined to 86 /// follow the word size of the host machine for efficiency. We use word_t in 87 /// places that are aware of this to make it perfectly explicit what is going 88 /// on. 89 using word_t = size_t; 90 91 private: 92 word_t CurWord = 0; 93 94 /// This is the number of bits in CurWord that are valid. This is always from 95 /// [0...bits_of(size_t)-1] inclusive. 96 unsigned BitsInCurWord = 0; 97 98 public: 99 SimpleBitstreamCursor() = default; SimpleBitstreamCursor(ArrayRef<uint8_t> BitcodeBytes)100 explicit SimpleBitstreamCursor(ArrayRef<uint8_t> BitcodeBytes) 101 : BitcodeBytes(BitcodeBytes) {} SimpleBitstreamCursor(StringRef BitcodeBytes)102 explicit SimpleBitstreamCursor(StringRef BitcodeBytes) 103 : BitcodeBytes(arrayRefFromStringRef(BitcodeBytes)) {} SimpleBitstreamCursor(MemoryBufferRef BitcodeBytes)104 explicit SimpleBitstreamCursor(MemoryBufferRef BitcodeBytes) 105 : SimpleBitstreamCursor(BitcodeBytes.getBuffer()) {} 106 canSkipToPos(size_t pos)107 bool canSkipToPos(size_t pos) const { 108 // pos can be skipped to if it is a valid address or one byte past the end. 109 return pos <= BitcodeBytes.size(); 110 } 111 AtEndOfStream()112 bool AtEndOfStream() { 113 return BitsInCurWord == 0 && BitcodeBytes.size() <= NextChar; 114 } 115 116 /// Return the bit # of the bit we are reading. GetCurrentBitNo()117 uint64_t GetCurrentBitNo() const { 118 return NextChar*CHAR_BIT - BitsInCurWord; 119 } 120 121 // Return the byte # of the current bit. getCurrentByteNo()122 uint64_t getCurrentByteNo() const { return GetCurrentBitNo() / 8; } 123 getBitcodeBytes()124 ArrayRef<uint8_t> getBitcodeBytes() const { return BitcodeBytes; } 125 126 /// Reset the stream to the specified bit number. JumpToBit(uint64_t BitNo)127 Error JumpToBit(uint64_t BitNo) { 128 size_t ByteNo = size_t(BitNo/8) & ~(sizeof(word_t)-1); 129 unsigned WordBitNo = unsigned(BitNo & (sizeof(word_t)*8-1)); 130 assert(canSkipToPos(ByteNo) && "Invalid location"); 131 132 // Move the cursor to the right word. 133 NextChar = ByteNo; 134 BitsInCurWord = 0; 135 136 // Skip over any bits that are already consumed. 137 if (WordBitNo) { 138 if (Expected<word_t> Res = Read(WordBitNo)) 139 return Error::success(); 140 else 141 return Res.takeError(); 142 } 143 144 return Error::success(); 145 } 146 147 /// Get a pointer into the bitstream at the specified byte offset. getPointerToByte(uint64_t ByteNo,uint64_t NumBytes)148 const uint8_t *getPointerToByte(uint64_t ByteNo, uint64_t NumBytes) { 149 return BitcodeBytes.data() + ByteNo; 150 } 151 152 /// Get a pointer into the bitstream at the specified bit offset. 153 /// 154 /// The bit offset must be on a byte boundary. getPointerToBit(uint64_t BitNo,uint64_t NumBytes)155 const uint8_t *getPointerToBit(uint64_t BitNo, uint64_t NumBytes) { 156 assert(!(BitNo % 8) && "Expected bit on byte boundary"); 157 return getPointerToByte(BitNo / 8, NumBytes); 158 } 159 fillCurWord()160 Error fillCurWord() { 161 if (NextChar >= BitcodeBytes.size()) 162 return createStringError(std::errc::io_error, 163 "Unexpected end of file reading %u of %u bytes", 164 NextChar, BitcodeBytes.size()); 165 166 // Read the next word from the stream. 167 const uint8_t *NextCharPtr = BitcodeBytes.data() + NextChar; 168 unsigned BytesRead; 169 if (BitcodeBytes.size() >= NextChar + sizeof(word_t)) { 170 BytesRead = sizeof(word_t); 171 CurWord = 172 support::endian::read<word_t, support::little, support::unaligned>( 173 NextCharPtr); 174 } else { 175 // Short read. 176 BytesRead = BitcodeBytes.size() - NextChar; 177 CurWord = 0; 178 for (unsigned B = 0; B != BytesRead; ++B) 179 CurWord |= uint64_t(NextCharPtr[B]) << (B * 8); 180 } 181 NextChar += BytesRead; 182 BitsInCurWord = BytesRead * 8; 183 return Error::success(); 184 } 185 Read(unsigned NumBits)186 Expected<word_t> Read(unsigned NumBits) { 187 static const unsigned BitsInWord = sizeof(word_t) * 8; 188 189 assert(NumBits && NumBits <= BitsInWord && 190 "Cannot return zero or more than BitsInWord bits!"); 191 192 static const unsigned Mask = sizeof(word_t) > 4 ? 0x3f : 0x1f; 193 194 // If the field is fully contained by CurWord, return it quickly. 195 if (BitsInCurWord >= NumBits) { 196 word_t R = CurWord & (~word_t(0) >> (BitsInWord - NumBits)); 197 198 // Use a mask to avoid undefined behavior. 199 CurWord >>= (NumBits & Mask); 200 201 BitsInCurWord -= NumBits; 202 return R; 203 } 204 205 word_t R = BitsInCurWord ? CurWord : 0; 206 unsigned BitsLeft = NumBits - BitsInCurWord; 207 208 if (Error fillResult = fillCurWord()) 209 return std::move(fillResult); 210 211 // If we run out of data, abort. 212 if (BitsLeft > BitsInCurWord) 213 return createStringError(std::errc::io_error, 214 "Unexpected end of file reading %u of %u bits", 215 BitsInCurWord, BitsLeft); 216 217 word_t R2 = CurWord & (~word_t(0) >> (BitsInWord - BitsLeft)); 218 219 // Use a mask to avoid undefined behavior. 220 CurWord >>= (BitsLeft & Mask); 221 222 BitsInCurWord -= BitsLeft; 223 224 R |= R2 << (NumBits - BitsLeft); 225 226 return R; 227 } 228 ReadVBR(const unsigned NumBits)229 Expected<uint32_t> ReadVBR(const unsigned NumBits) { 230 Expected<unsigned> MaybeRead = Read(NumBits); 231 if (!MaybeRead) 232 return MaybeRead; 233 uint32_t Piece = MaybeRead.get(); 234 235 assert(NumBits <= 32 && NumBits >= 1 && "Invalid NumBits value"); 236 const uint32_t MaskBitOrder = (NumBits - 1); 237 const uint32_t Mask = 1UL << MaskBitOrder; 238 239 if ((Piece & Mask) == 0) 240 return Piece; 241 242 uint32_t Result = 0; 243 unsigned NextBit = 0; 244 while (true) { 245 Result |= (Piece & (Mask - 1)) << NextBit; 246 247 if ((Piece & Mask) == 0) 248 return Result; 249 250 NextBit += NumBits-1; 251 if (NextBit >= 32) 252 return createStringError(std::errc::illegal_byte_sequence, 253 "Unterminated VBR"); 254 255 MaybeRead = Read(NumBits); 256 if (!MaybeRead) 257 return MaybeRead; 258 Piece = MaybeRead.get(); 259 } 260 } 261 262 // Read a VBR that may have a value up to 64-bits in size. The chunk size of 263 // the VBR must still be <= 32 bits though. ReadVBR64(const unsigned NumBits)264 Expected<uint64_t> ReadVBR64(const unsigned NumBits) { 265 Expected<uint64_t> MaybeRead = Read(NumBits); 266 if (!MaybeRead) 267 return MaybeRead; 268 uint32_t Piece = MaybeRead.get(); 269 assert(NumBits <= 32 && NumBits >= 1 && "Invalid NumBits value"); 270 const uint32_t MaskBitOrder = (NumBits - 1); 271 const uint32_t Mask = 1UL << MaskBitOrder; 272 273 if ((Piece & Mask) == 0) 274 return uint64_t(Piece); 275 276 uint64_t Result = 0; 277 unsigned NextBit = 0; 278 while (true) { 279 Result |= uint64_t(Piece & (Mask - 1)) << NextBit; 280 281 if ((Piece & Mask) == 0) 282 return Result; 283 284 NextBit += NumBits-1; 285 if (NextBit >= 64) 286 return createStringError(std::errc::illegal_byte_sequence, 287 "Unterminated VBR"); 288 289 MaybeRead = Read(NumBits); 290 if (!MaybeRead) 291 return MaybeRead; 292 Piece = MaybeRead.get(); 293 } 294 } 295 SkipToFourByteBoundary()296 void SkipToFourByteBoundary() { 297 // If word_t is 64-bits and if we've read less than 32 bits, just dump 298 // the bits we have up to the next 32-bit boundary. 299 if (sizeof(word_t) > 4 && 300 BitsInCurWord >= 32) { 301 CurWord >>= BitsInCurWord-32; 302 BitsInCurWord = 32; 303 return; 304 } 305 306 BitsInCurWord = 0; 307 } 308 309 /// Return the size of the stream in bytes. SizeInBytes()310 size_t SizeInBytes() const { return BitcodeBytes.size(); } 311 312 /// Skip to the end of the file. skipToEnd()313 void skipToEnd() { NextChar = BitcodeBytes.size(); } 314 315 /// Check whether a reservation of Size elements is plausible. isSizePlausible(size_t Size)316 bool isSizePlausible(size_t Size) const { 317 // Don't allow reserving more elements than the number of bits, assuming 318 // at least one bit is needed to encode an element. 319 return Size < BitcodeBytes.size() * 8; 320 } 321 }; 322 323 /// When advancing through a bitstream cursor, each advance can discover a few 324 /// different kinds of entries: 325 struct BitstreamEntry { 326 enum { 327 Error, // Malformed bitcode was found. 328 EndBlock, // We've reached the end of the current block, (or the end of the 329 // file, which is treated like a series of EndBlock records. 330 SubBlock, // This is the start of a new subblock of a specific ID. 331 Record // This is a record with a specific AbbrevID. 332 } Kind; 333 334 unsigned ID; 335 getErrorBitstreamEntry336 static BitstreamEntry getError() { 337 BitstreamEntry E; E.Kind = Error; return E; 338 } 339 getEndBlockBitstreamEntry340 static BitstreamEntry getEndBlock() { 341 BitstreamEntry E; E.Kind = EndBlock; return E; 342 } 343 getSubBlockBitstreamEntry344 static BitstreamEntry getSubBlock(unsigned ID) { 345 BitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E; 346 } 347 getRecordBitstreamEntry348 static BitstreamEntry getRecord(unsigned AbbrevID) { 349 BitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E; 350 } 351 }; 352 353 /// This represents a position within a bitcode file, implemented on top of a 354 /// SimpleBitstreamCursor. 355 /// 356 /// Unlike iterators, BitstreamCursors are heavy-weight objects that should not 357 /// be passed by value. 358 class BitstreamCursor : SimpleBitstreamCursor { 359 // This is the declared size of code values used for the current block, in 360 // bits. 361 unsigned CurCodeSize = 2; 362 363 /// Abbrevs installed at in this block. 364 std::vector<std::shared_ptr<BitCodeAbbrev>> CurAbbrevs; 365 366 struct Block { 367 unsigned PrevCodeSize; 368 std::vector<std::shared_ptr<BitCodeAbbrev>> PrevAbbrevs; 369 BlockBlock370 explicit Block(unsigned PCS) : PrevCodeSize(PCS) {} 371 }; 372 373 /// This tracks the codesize of parent blocks. 374 SmallVector<Block, 8> BlockScope; 375 376 BitstreamBlockInfo *BlockInfo = nullptr; 377 378 public: 379 static const size_t MaxChunkSize = 32; 380 381 BitstreamCursor() = default; BitstreamCursor(ArrayRef<uint8_t> BitcodeBytes)382 explicit BitstreamCursor(ArrayRef<uint8_t> BitcodeBytes) 383 : SimpleBitstreamCursor(BitcodeBytes) {} BitstreamCursor(StringRef BitcodeBytes)384 explicit BitstreamCursor(StringRef BitcodeBytes) 385 : SimpleBitstreamCursor(BitcodeBytes) {} BitstreamCursor(MemoryBufferRef BitcodeBytes)386 explicit BitstreamCursor(MemoryBufferRef BitcodeBytes) 387 : SimpleBitstreamCursor(BitcodeBytes) {} 388 389 using SimpleBitstreamCursor::AtEndOfStream; 390 using SimpleBitstreamCursor::canSkipToPos; 391 using SimpleBitstreamCursor::fillCurWord; 392 using SimpleBitstreamCursor::getBitcodeBytes; 393 using SimpleBitstreamCursor::GetCurrentBitNo; 394 using SimpleBitstreamCursor::getCurrentByteNo; 395 using SimpleBitstreamCursor::getPointerToByte; 396 using SimpleBitstreamCursor::JumpToBit; 397 using SimpleBitstreamCursor::Read; 398 using SimpleBitstreamCursor::ReadVBR; 399 using SimpleBitstreamCursor::ReadVBR64; 400 using SimpleBitstreamCursor::SizeInBytes; 401 using SimpleBitstreamCursor::skipToEnd; 402 403 /// Return the number of bits used to encode an abbrev #. getAbbrevIDWidth()404 unsigned getAbbrevIDWidth() const { return CurCodeSize; } 405 406 /// Flags that modify the behavior of advance(). 407 enum { 408 /// If this flag is used, the advance() method does not automatically pop 409 /// the block scope when the end of a block is reached. 410 AF_DontPopBlockAtEnd = 1, 411 412 /// If this flag is used, abbrev entries are returned just like normal 413 /// records. 414 AF_DontAutoprocessAbbrevs = 2 415 }; 416 417 /// Advance the current bitstream, returning the next entry in the stream. 418 Expected<BitstreamEntry> advance(unsigned Flags = 0) { 419 while (true) { 420 if (AtEndOfStream()) 421 return BitstreamEntry::getError(); 422 423 Expected<unsigned> MaybeCode = ReadCode(); 424 if (!MaybeCode) 425 return MaybeCode.takeError(); 426 unsigned Code = MaybeCode.get(); 427 428 if (Code == bitc::END_BLOCK) { 429 // Pop the end of the block unless Flags tells us not to. 430 if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd()) 431 return BitstreamEntry::getError(); 432 return BitstreamEntry::getEndBlock(); 433 } 434 435 if (Code == bitc::ENTER_SUBBLOCK) { 436 if (Expected<unsigned> MaybeSubBlock = ReadSubBlockID()) 437 return BitstreamEntry::getSubBlock(MaybeSubBlock.get()); 438 else 439 return MaybeSubBlock.takeError(); 440 } 441 442 if (Code == bitc::DEFINE_ABBREV && 443 !(Flags & AF_DontAutoprocessAbbrevs)) { 444 // We read and accumulate abbrev's, the client can't do anything with 445 // them anyway. 446 if (Error Err = ReadAbbrevRecord()) 447 return std::move(Err); 448 continue; 449 } 450 451 return BitstreamEntry::getRecord(Code); 452 } 453 } 454 455 /// This is a convenience function for clients that don't expect any 456 /// subblocks. This just skips over them automatically. 457 Expected<BitstreamEntry> advanceSkippingSubblocks(unsigned Flags = 0) { 458 while (true) { 459 // If we found a normal entry, return it. 460 Expected<BitstreamEntry> MaybeEntry = advance(Flags); 461 if (!MaybeEntry) 462 return MaybeEntry; 463 BitstreamEntry Entry = MaybeEntry.get(); 464 465 if (Entry.Kind != BitstreamEntry::SubBlock) 466 return Entry; 467 468 // If we found a sub-block, just skip over it and check the next entry. 469 if (Error Err = SkipBlock()) 470 return std::move(Err); 471 } 472 } 473 ReadCode()474 Expected<unsigned> ReadCode() { return Read(CurCodeSize); } 475 476 // Block header: 477 // [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen] 478 479 /// Having read the ENTER_SUBBLOCK code, read the BlockID for the block. ReadSubBlockID()480 Expected<unsigned> ReadSubBlockID() { return ReadVBR(bitc::BlockIDWidth); } 481 482 /// Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip over the body 483 /// of this block. SkipBlock()484 Error SkipBlock() { 485 // Read and ignore the codelen value. 486 if (Expected<uint32_t> Res = ReadVBR(bitc::CodeLenWidth)) 487 ; // Since we are skipping this block, we don't care what code widths are 488 // used inside of it. 489 else 490 return Res.takeError(); 491 492 SkipToFourByteBoundary(); 493 Expected<unsigned> MaybeNum = Read(bitc::BlockSizeWidth); 494 if (!MaybeNum) 495 return MaybeNum.takeError(); 496 size_t NumFourBytes = MaybeNum.get(); 497 498 // Check that the block wasn't partially defined, and that the offset isn't 499 // bogus. 500 size_t SkipTo = GetCurrentBitNo() + NumFourBytes * 4 * 8; 501 if (AtEndOfStream()) 502 return createStringError(std::errc::illegal_byte_sequence, 503 "can't skip block: already at end of stream"); 504 if (!canSkipToPos(SkipTo / 8)) 505 return createStringError(std::errc::illegal_byte_sequence, 506 "can't skip to bit %zu from %" PRIu64, SkipTo, 507 GetCurrentBitNo()); 508 509 if (Error Res = JumpToBit(SkipTo)) 510 return Res; 511 512 return Error::success(); 513 } 514 515 /// Having read the ENTER_SUBBLOCK abbrevid, and enter the block. 516 Error EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = nullptr); 517 ReadBlockEnd()518 bool ReadBlockEnd() { 519 if (BlockScope.empty()) return true; 520 521 // Block tail: 522 // [END_BLOCK, <align4bytes>] 523 SkipToFourByteBoundary(); 524 525 popBlockScope(); 526 return false; 527 } 528 529 private: popBlockScope()530 void popBlockScope() { 531 CurCodeSize = BlockScope.back().PrevCodeSize; 532 533 CurAbbrevs = std::move(BlockScope.back().PrevAbbrevs); 534 BlockScope.pop_back(); 535 } 536 537 //===--------------------------------------------------------------------===// 538 // Record Processing 539 //===--------------------------------------------------------------------===// 540 541 public: 542 /// Return the abbreviation for the specified AbbrevId. getAbbrev(unsigned AbbrevID)543 Expected<const BitCodeAbbrev *> getAbbrev(unsigned AbbrevID) { 544 unsigned AbbrevNo = AbbrevID - bitc::FIRST_APPLICATION_ABBREV; 545 if (AbbrevNo >= CurAbbrevs.size()) 546 return createStringError( 547 std::errc::illegal_byte_sequence, "Invalid abbrev number"); 548 return CurAbbrevs[AbbrevNo].get(); 549 } 550 551 /// Read the current record and discard it, returning the code for the record. 552 Expected<unsigned> skipRecord(unsigned AbbrevID); 553 554 Expected<unsigned> readRecord(unsigned AbbrevID, 555 SmallVectorImpl<uint64_t> &Vals, 556 StringRef *Blob = nullptr); 557 558 //===--------------------------------------------------------------------===// 559 // Abbrev Processing 560 //===--------------------------------------------------------------------===// 561 Error ReadAbbrevRecord(); 562 563 /// Read and return a block info block from the bitstream. If an error was 564 /// encountered, return std::nullopt. 565 /// 566 /// \param ReadBlockInfoNames Whether to read block/record name information in 567 /// the BlockInfo block. Only llvm-bcanalyzer uses this. 568 Expected<std::optional<BitstreamBlockInfo>> 569 ReadBlockInfoBlock(bool ReadBlockInfoNames = false); 570 571 /// Set the block info to be used by this BitstreamCursor to interpret 572 /// abbreviated records. setBlockInfo(BitstreamBlockInfo * BI)573 void setBlockInfo(BitstreamBlockInfo *BI) { BlockInfo = BI; } 574 }; 575 576 } // end llvm namespace 577 578 #endif // LLVM_BITSTREAM_BITSTREAMREADER_H 579