1 //===- BitstreamReader.h - Low-level bitstream reader interface -*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This header defines the BitstreamReader class. This class can be used to 10 // read an arbitrary bitstream, regardless of its contents. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_BITSTREAM_BITSTREAMREADER_H 15 #define LLVM_BITSTREAM_BITSTREAMREADER_H 16 17 #include "llvm/ADT/ArrayRef.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/Bitstream/BitCodes.h" 20 #include "llvm/Support/Endian.h" 21 #include "llvm/Support/Error.h" 22 #include "llvm/Support/ErrorHandling.h" 23 #include "llvm/Support/MathExtras.h" 24 #include "llvm/Support/MemoryBuffer.h" 25 #include <algorithm> 26 #include <cassert> 27 #include <climits> 28 #include <cstddef> 29 #include <cstdint> 30 #include <memory> 31 #include <string> 32 #include <utility> 33 #include <vector> 34 35 namespace llvm { 36 37 /// This class maintains the abbreviations read from a block info block. 38 class BitstreamBlockInfo { 39 public: 40 /// This contains information emitted to BLOCKINFO_BLOCK blocks. These 41 /// describe abbreviations that all blocks of the specified ID inherit. 42 struct BlockInfo { 43 unsigned BlockID = 0; 44 std::vector<std::shared_ptr<BitCodeAbbrev>> Abbrevs; 45 std::string Name; 46 std::vector<std::pair<unsigned, std::string>> RecordNames; 47 }; 48 49 private: 50 std::vector<BlockInfo> BlockInfoRecords; 51 52 public: 53 /// If there is block info for the specified ID, return it, otherwise return 54 /// null. getBlockInfo(unsigned BlockID)55 const BlockInfo *getBlockInfo(unsigned BlockID) const { 56 // Common case, the most recent entry matches BlockID. 57 if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID) 58 return &BlockInfoRecords.back(); 59 60 for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size()); 61 i != e; ++i) 62 if (BlockInfoRecords[i].BlockID == BlockID) 63 return &BlockInfoRecords[i]; 64 return nullptr; 65 } 66 getOrCreateBlockInfo(unsigned BlockID)67 BlockInfo &getOrCreateBlockInfo(unsigned BlockID) { 68 if (const BlockInfo *BI = getBlockInfo(BlockID)) 69 return *const_cast<BlockInfo*>(BI); 70 71 // Otherwise, add a new record. 72 BlockInfoRecords.emplace_back(); 73 BlockInfoRecords.back().BlockID = BlockID; 74 return BlockInfoRecords.back(); 75 } 76 }; 77 78 /// This represents a position within a bitstream. There may be multiple 79 /// independent cursors reading within one bitstream, each maintaining their 80 /// own local state. 81 class SimpleBitstreamCursor { 82 ArrayRef<uint8_t> BitcodeBytes; 83 size_t NextChar = 0; 84 85 public: 86 /// This is the current data we have pulled from the stream but have not 87 /// returned to the client. This is specifically and intentionally defined to 88 /// follow the word size of the host machine for efficiency. We use word_t in 89 /// places that are aware of this to make it perfectly explicit what is going 90 /// on. 91 using word_t = size_t; 92 93 private: 94 word_t CurWord = 0; 95 96 /// This is the number of bits in CurWord that are valid. This is always from 97 /// [0...bits_of(size_t)-1] inclusive. 98 unsigned BitsInCurWord = 0; 99 100 public: 101 static const constexpr size_t MaxChunkSize = sizeof(word_t) * 8; 102 103 SimpleBitstreamCursor() = default; SimpleBitstreamCursor(ArrayRef<uint8_t> BitcodeBytes)104 explicit SimpleBitstreamCursor(ArrayRef<uint8_t> BitcodeBytes) 105 : BitcodeBytes(BitcodeBytes) {} SimpleBitstreamCursor(StringRef BitcodeBytes)106 explicit SimpleBitstreamCursor(StringRef BitcodeBytes) 107 : BitcodeBytes(arrayRefFromStringRef(BitcodeBytes)) {} SimpleBitstreamCursor(MemoryBufferRef BitcodeBytes)108 explicit SimpleBitstreamCursor(MemoryBufferRef BitcodeBytes) 109 : SimpleBitstreamCursor(BitcodeBytes.getBuffer()) {} 110 canSkipToPos(size_t pos)111 bool canSkipToPos(size_t pos) const { 112 // pos can be skipped to if it is a valid address or one byte past the end. 113 return pos <= BitcodeBytes.size(); 114 } 115 AtEndOfStream()116 bool AtEndOfStream() { 117 return BitsInCurWord == 0 && BitcodeBytes.size() <= NextChar; 118 } 119 120 /// Return the bit # of the bit we are reading. GetCurrentBitNo()121 uint64_t GetCurrentBitNo() const { 122 return NextChar*CHAR_BIT - BitsInCurWord; 123 } 124 125 // Return the byte # of the current bit. getCurrentByteNo()126 uint64_t getCurrentByteNo() const { return GetCurrentBitNo() / 8; } 127 getBitcodeBytes()128 ArrayRef<uint8_t> getBitcodeBytes() const { return BitcodeBytes; } 129 130 /// Reset the stream to the specified bit number. JumpToBit(uint64_t BitNo)131 Error JumpToBit(uint64_t BitNo) { 132 size_t ByteNo = size_t(BitNo/8) & ~(sizeof(word_t)-1); 133 unsigned WordBitNo = unsigned(BitNo & (sizeof(word_t)*8-1)); 134 assert(canSkipToPos(ByteNo) && "Invalid location"); 135 136 // Move the cursor to the right word. 137 NextChar = ByteNo; 138 BitsInCurWord = 0; 139 140 // Skip over any bits that are already consumed. 141 if (WordBitNo) { 142 if (Expected<word_t> Res = Read(WordBitNo)) 143 return Error::success(); 144 else 145 return Res.takeError(); 146 } 147 148 return Error::success(); 149 } 150 151 /// Get a pointer into the bitstream at the specified byte offset. getPointerToByte(uint64_t ByteNo,uint64_t NumBytes)152 const uint8_t *getPointerToByte(uint64_t ByteNo, uint64_t NumBytes) { 153 return BitcodeBytes.data() + ByteNo; 154 } 155 156 /// Get a pointer into the bitstream at the specified bit offset. 157 /// 158 /// The bit offset must be on a byte boundary. getPointerToBit(uint64_t BitNo,uint64_t NumBytes)159 const uint8_t *getPointerToBit(uint64_t BitNo, uint64_t NumBytes) { 160 assert(!(BitNo % 8) && "Expected bit on byte boundary"); 161 return getPointerToByte(BitNo / 8, NumBytes); 162 } 163 fillCurWord()164 Error fillCurWord() { 165 if (NextChar >= BitcodeBytes.size()) 166 return createStringError(std::errc::io_error, 167 "Unexpected end of file reading %u of %u bytes", 168 NextChar, BitcodeBytes.size()); 169 170 // Read the next word from the stream. 171 const uint8_t *NextCharPtr = BitcodeBytes.data() + NextChar; 172 unsigned BytesRead; 173 if (BitcodeBytes.size() >= NextChar + sizeof(word_t)) { 174 BytesRead = sizeof(word_t); 175 CurWord = 176 support::endian::read<word_t, support::little, support::unaligned>( 177 NextCharPtr); 178 } else { 179 // Short read. 180 BytesRead = BitcodeBytes.size() - NextChar; 181 CurWord = 0; 182 for (unsigned B = 0; B != BytesRead; ++B) 183 CurWord |= uint64_t(NextCharPtr[B]) << (B * 8); 184 } 185 NextChar += BytesRead; 186 BitsInCurWord = BytesRead * 8; 187 return Error::success(); 188 } 189 Read(unsigned NumBits)190 Expected<word_t> Read(unsigned NumBits) { 191 static const unsigned BitsInWord = MaxChunkSize; 192 193 assert(NumBits && NumBits <= BitsInWord && 194 "Cannot return zero or more than BitsInWord bits!"); 195 196 static const unsigned Mask = sizeof(word_t) > 4 ? 0x3f : 0x1f; 197 198 // If the field is fully contained by CurWord, return it quickly. 199 if (BitsInCurWord >= NumBits) { 200 word_t R = CurWord & (~word_t(0) >> (BitsInWord - NumBits)); 201 202 // Use a mask to avoid undefined behavior. 203 CurWord >>= (NumBits & Mask); 204 205 BitsInCurWord -= NumBits; 206 return R; 207 } 208 209 word_t R = BitsInCurWord ? CurWord : 0; 210 unsigned BitsLeft = NumBits - BitsInCurWord; 211 212 if (Error fillResult = fillCurWord()) 213 return std::move(fillResult); 214 215 // If we run out of data, abort. 216 if (BitsLeft > BitsInCurWord) 217 return createStringError(std::errc::io_error, 218 "Unexpected end of file reading %u of %u bits", 219 BitsInCurWord, BitsLeft); 220 221 word_t R2 = CurWord & (~word_t(0) >> (BitsInWord - BitsLeft)); 222 223 // Use a mask to avoid undefined behavior. 224 CurWord >>= (BitsLeft & Mask); 225 226 BitsInCurWord -= BitsLeft; 227 228 R |= R2 << (NumBits - BitsLeft); 229 230 return R; 231 } 232 ReadVBR(unsigned NumBits)233 Expected<uint32_t> ReadVBR(unsigned NumBits) { 234 Expected<unsigned> MaybeRead = Read(NumBits); 235 if (!MaybeRead) 236 return MaybeRead; 237 uint32_t Piece = MaybeRead.get(); 238 239 if ((Piece & (1U << (NumBits-1))) == 0) 240 return Piece; 241 242 uint32_t Result = 0; 243 unsigned NextBit = 0; 244 while (true) { 245 Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit; 246 247 if ((Piece & (1U << (NumBits-1))) == 0) 248 return Result; 249 250 NextBit += NumBits-1; 251 MaybeRead = Read(NumBits); 252 if (!MaybeRead) 253 return MaybeRead; 254 Piece = MaybeRead.get(); 255 } 256 } 257 258 // Read a VBR that may have a value up to 64-bits in size. The chunk size of 259 // the VBR must still be <= 32 bits though. ReadVBR64(unsigned NumBits)260 Expected<uint64_t> ReadVBR64(unsigned NumBits) { 261 Expected<uint64_t> MaybeRead = Read(NumBits); 262 if (!MaybeRead) 263 return MaybeRead; 264 uint32_t Piece = MaybeRead.get(); 265 266 if ((Piece & (1U << (NumBits-1))) == 0) 267 return uint64_t(Piece); 268 269 uint64_t Result = 0; 270 unsigned NextBit = 0; 271 while (true) { 272 Result |= uint64_t(Piece & ((1U << (NumBits-1))-1)) << NextBit; 273 274 if ((Piece & (1U << (NumBits-1))) == 0) 275 return Result; 276 277 NextBit += NumBits-1; 278 MaybeRead = Read(NumBits); 279 if (!MaybeRead) 280 return MaybeRead; 281 Piece = MaybeRead.get(); 282 } 283 } 284 SkipToFourByteBoundary()285 void SkipToFourByteBoundary() { 286 // If word_t is 64-bits and if we've read less than 32 bits, just dump 287 // the bits we have up to the next 32-bit boundary. 288 if (sizeof(word_t) > 4 && 289 BitsInCurWord >= 32) { 290 CurWord >>= BitsInCurWord-32; 291 BitsInCurWord = 32; 292 return; 293 } 294 295 BitsInCurWord = 0; 296 } 297 298 /// Return the size of the stream in bytes. SizeInBytes()299 size_t SizeInBytes() const { return BitcodeBytes.size(); } 300 301 /// Skip to the end of the file. skipToEnd()302 void skipToEnd() { NextChar = BitcodeBytes.size(); } 303 }; 304 305 /// When advancing through a bitstream cursor, each advance can discover a few 306 /// different kinds of entries: 307 struct BitstreamEntry { 308 enum { 309 Error, // Malformed bitcode was found. 310 EndBlock, // We've reached the end of the current block, (or the end of the 311 // file, which is treated like a series of EndBlock records. 312 SubBlock, // This is the start of a new subblock of a specific ID. 313 Record // This is a record with a specific AbbrevID. 314 } Kind; 315 316 unsigned ID; 317 getErrorBitstreamEntry318 static BitstreamEntry getError() { 319 BitstreamEntry E; E.Kind = Error; return E; 320 } 321 getEndBlockBitstreamEntry322 static BitstreamEntry getEndBlock() { 323 BitstreamEntry E; E.Kind = EndBlock; return E; 324 } 325 getSubBlockBitstreamEntry326 static BitstreamEntry getSubBlock(unsigned ID) { 327 BitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E; 328 } 329 getRecordBitstreamEntry330 static BitstreamEntry getRecord(unsigned AbbrevID) { 331 BitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E; 332 } 333 }; 334 335 /// This represents a position within a bitcode file, implemented on top of a 336 /// SimpleBitstreamCursor. 337 /// 338 /// Unlike iterators, BitstreamCursors are heavy-weight objects that should not 339 /// be passed by value. 340 class BitstreamCursor : SimpleBitstreamCursor { 341 // This is the declared size of code values used for the current block, in 342 // bits. 343 unsigned CurCodeSize = 2; 344 345 /// Abbrevs installed at in this block. 346 std::vector<std::shared_ptr<BitCodeAbbrev>> CurAbbrevs; 347 348 struct Block { 349 unsigned PrevCodeSize; 350 std::vector<std::shared_ptr<BitCodeAbbrev>> PrevAbbrevs; 351 BlockBlock352 explicit Block(unsigned PCS) : PrevCodeSize(PCS) {} 353 }; 354 355 /// This tracks the codesize of parent blocks. 356 SmallVector<Block, 8> BlockScope; 357 358 BitstreamBlockInfo *BlockInfo = nullptr; 359 360 public: 361 static const size_t MaxChunkSize = sizeof(word_t) * 8; 362 363 BitstreamCursor() = default; BitstreamCursor(ArrayRef<uint8_t> BitcodeBytes)364 explicit BitstreamCursor(ArrayRef<uint8_t> BitcodeBytes) 365 : SimpleBitstreamCursor(BitcodeBytes) {} BitstreamCursor(StringRef BitcodeBytes)366 explicit BitstreamCursor(StringRef BitcodeBytes) 367 : SimpleBitstreamCursor(BitcodeBytes) {} BitstreamCursor(MemoryBufferRef BitcodeBytes)368 explicit BitstreamCursor(MemoryBufferRef BitcodeBytes) 369 : SimpleBitstreamCursor(BitcodeBytes) {} 370 371 using SimpleBitstreamCursor::AtEndOfStream; 372 using SimpleBitstreamCursor::canSkipToPos; 373 using SimpleBitstreamCursor::fillCurWord; 374 using SimpleBitstreamCursor::getBitcodeBytes; 375 using SimpleBitstreamCursor::GetCurrentBitNo; 376 using SimpleBitstreamCursor::getCurrentByteNo; 377 using SimpleBitstreamCursor::getPointerToByte; 378 using SimpleBitstreamCursor::JumpToBit; 379 using SimpleBitstreamCursor::Read; 380 using SimpleBitstreamCursor::ReadVBR; 381 using SimpleBitstreamCursor::ReadVBR64; 382 using SimpleBitstreamCursor::SizeInBytes; 383 using SimpleBitstreamCursor::skipToEnd; 384 385 /// Return the number of bits used to encode an abbrev #. getAbbrevIDWidth()386 unsigned getAbbrevIDWidth() const { return CurCodeSize; } 387 388 /// Flags that modify the behavior of advance(). 389 enum { 390 /// If this flag is used, the advance() method does not automatically pop 391 /// the block scope when the end of a block is reached. 392 AF_DontPopBlockAtEnd = 1, 393 394 /// If this flag is used, abbrev entries are returned just like normal 395 /// records. 396 AF_DontAutoprocessAbbrevs = 2 397 }; 398 399 /// Advance the current bitstream, returning the next entry in the stream. 400 Expected<BitstreamEntry> advance(unsigned Flags = 0) { 401 while (true) { 402 if (AtEndOfStream()) 403 return BitstreamEntry::getError(); 404 405 Expected<unsigned> MaybeCode = ReadCode(); 406 if (!MaybeCode) 407 return MaybeCode.takeError(); 408 unsigned Code = MaybeCode.get(); 409 410 if (Code == bitc::END_BLOCK) { 411 // Pop the end of the block unless Flags tells us not to. 412 if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd()) 413 return BitstreamEntry::getError(); 414 return BitstreamEntry::getEndBlock(); 415 } 416 417 if (Code == bitc::ENTER_SUBBLOCK) { 418 if (Expected<unsigned> MaybeSubBlock = ReadSubBlockID()) 419 return BitstreamEntry::getSubBlock(MaybeSubBlock.get()); 420 else 421 return MaybeSubBlock.takeError(); 422 } 423 424 if (Code == bitc::DEFINE_ABBREV && 425 !(Flags & AF_DontAutoprocessAbbrevs)) { 426 // We read and accumulate abbrev's, the client can't do anything with 427 // them anyway. 428 if (Error Err = ReadAbbrevRecord()) 429 return std::move(Err); 430 continue; 431 } 432 433 return BitstreamEntry::getRecord(Code); 434 } 435 } 436 437 /// This is a convenience function for clients that don't expect any 438 /// subblocks. This just skips over them automatically. 439 Expected<BitstreamEntry> advanceSkippingSubblocks(unsigned Flags = 0) { 440 while (true) { 441 // If we found a normal entry, return it. 442 Expected<BitstreamEntry> MaybeEntry = advance(Flags); 443 if (!MaybeEntry) 444 return MaybeEntry; 445 BitstreamEntry Entry = MaybeEntry.get(); 446 447 if (Entry.Kind != BitstreamEntry::SubBlock) 448 return Entry; 449 450 // If we found a sub-block, just skip over it and check the next entry. 451 if (Error Err = SkipBlock()) 452 return std::move(Err); 453 } 454 } 455 ReadCode()456 Expected<unsigned> ReadCode() { return Read(CurCodeSize); } 457 458 // Block header: 459 // [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen] 460 461 /// Having read the ENTER_SUBBLOCK code, read the BlockID for the block. ReadSubBlockID()462 Expected<unsigned> ReadSubBlockID() { return ReadVBR(bitc::BlockIDWidth); } 463 464 /// Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip over the body 465 /// of this block. SkipBlock()466 Error SkipBlock() { 467 // Read and ignore the codelen value. 468 if (Expected<uint32_t> Res = ReadVBR(bitc::CodeLenWidth)) 469 ; // Since we are skipping this block, we don't care what code widths are 470 // used inside of it. 471 else 472 return Res.takeError(); 473 474 SkipToFourByteBoundary(); 475 Expected<unsigned> MaybeNum = Read(bitc::BlockSizeWidth); 476 if (!MaybeNum) 477 return MaybeNum.takeError(); 478 size_t NumFourBytes = MaybeNum.get(); 479 480 // Check that the block wasn't partially defined, and that the offset isn't 481 // bogus. 482 size_t SkipTo = GetCurrentBitNo() + NumFourBytes * 4 * 8; 483 if (AtEndOfStream()) 484 return createStringError(std::errc::illegal_byte_sequence, 485 "can't skip block: already at end of stream"); 486 if (!canSkipToPos(SkipTo / 8)) 487 return createStringError(std::errc::illegal_byte_sequence, 488 "can't skip to bit %zu from %" PRIu64, SkipTo, 489 GetCurrentBitNo()); 490 491 if (Error Res = JumpToBit(SkipTo)) 492 return Res; 493 494 return Error::success(); 495 } 496 497 /// Having read the ENTER_SUBBLOCK abbrevid, and enter the block. 498 Error EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = nullptr); 499 ReadBlockEnd()500 bool ReadBlockEnd() { 501 if (BlockScope.empty()) return true; 502 503 // Block tail: 504 // [END_BLOCK, <align4bytes>] 505 SkipToFourByteBoundary(); 506 507 popBlockScope(); 508 return false; 509 } 510 511 private: popBlockScope()512 void popBlockScope() { 513 CurCodeSize = BlockScope.back().PrevCodeSize; 514 515 CurAbbrevs = std::move(BlockScope.back().PrevAbbrevs); 516 BlockScope.pop_back(); 517 } 518 519 //===--------------------------------------------------------------------===// 520 // Record Processing 521 //===--------------------------------------------------------------------===// 522 523 public: 524 /// Return the abbreviation for the specified AbbrevId. getAbbrev(unsigned AbbrevID)525 const BitCodeAbbrev *getAbbrev(unsigned AbbrevID) { 526 unsigned AbbrevNo = AbbrevID - bitc::FIRST_APPLICATION_ABBREV; 527 if (AbbrevNo >= CurAbbrevs.size()) 528 report_fatal_error("Invalid abbrev number"); 529 return CurAbbrevs[AbbrevNo].get(); 530 } 531 532 /// Read the current record and discard it, returning the code for the record. 533 Expected<unsigned> skipRecord(unsigned AbbrevID); 534 535 Expected<unsigned> readRecord(unsigned AbbrevID, 536 SmallVectorImpl<uint64_t> &Vals, 537 StringRef *Blob = nullptr); 538 539 //===--------------------------------------------------------------------===// 540 // Abbrev Processing 541 //===--------------------------------------------------------------------===// 542 Error ReadAbbrevRecord(); 543 544 /// Read and return a block info block from the bitstream. If an error was 545 /// encountered, return None. 546 /// 547 /// \param ReadBlockInfoNames Whether to read block/record name information in 548 /// the BlockInfo block. Only llvm-bcanalyzer uses this. 549 Expected<Optional<BitstreamBlockInfo>> 550 ReadBlockInfoBlock(bool ReadBlockInfoNames = false); 551 552 /// Set the block info to be used by this BitstreamCursor to interpret 553 /// abbreviated records. setBlockInfo(BitstreamBlockInfo * BI)554 void setBlockInfo(BitstreamBlockInfo *BI) { BlockInfo = BI; } 555 }; 556 557 } // end llvm namespace 558 559 #endif // LLVM_BITSTREAM_BITSTREAMREADER_H 560