1 //===- BitstreamReader.h - Low-level bitstream reader interface -*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This header defines the BitstreamReader class. This class can be used to 11 // read an arbitrary bitstream, regardless of its contents. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_BITCODE_BITSTREAMREADER_H 16 #define LLVM_BITCODE_BITSTREAMREADER_H 17 18 #include "llvm/Bitcode/BitCodes.h" 19 #include "llvm/Support/Endian.h" 20 #include "llvm/Support/StreamingMemoryObject.h" 21 #include <climits> 22 #include <string> 23 #include <vector> 24 25 namespace llvm { 26 27 class Deserializer; 28 29 /// This class is used to read from an LLVM bitcode stream, maintaining 30 /// information that is global to decoding the entire file. While a file is 31 /// being read, multiple cursors can be independently advanced or skipped around 32 /// within the file. These are represented by the BitstreamCursor class. 33 class BitstreamReader { 34 public: 35 /// This contains information emitted to BLOCKINFO_BLOCK blocks. These 36 /// describe abbreviations that all blocks of the specified ID inherit. 37 struct BlockInfo { 38 unsigned BlockID; 39 std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> Abbrevs; 40 std::string Name; 41 42 std::vector<std::pair<unsigned, std::string> > RecordNames; 43 }; 44 private: 45 std::unique_ptr<MemoryObject> BitcodeBytes; 46 47 std::vector<BlockInfo> BlockInfoRecords; 48 49 /// This is set to true if we don't care about the block/record name 50 /// information in the BlockInfo block. Only llvm-bcanalyzer uses this. 51 bool IgnoreBlockInfoNames; 52 53 BitstreamReader(const BitstreamReader&) LLVM_DELETED_FUNCTION; 54 void operator=(const BitstreamReader&) LLVM_DELETED_FUNCTION; 55 public: BitstreamReader()56 BitstreamReader() : IgnoreBlockInfoNames(true) { 57 } 58 BitstreamReader(const unsigned char * Start,const unsigned char * End)59 BitstreamReader(const unsigned char *Start, const unsigned char *End) 60 : IgnoreBlockInfoNames(true) { 61 init(Start, End); 62 } 63 BitstreamReader(std::unique_ptr<MemoryObject> BitcodeBytes)64 BitstreamReader(std::unique_ptr<MemoryObject> BitcodeBytes) 65 : BitcodeBytes(std::move(BitcodeBytes)), IgnoreBlockInfoNames(true) {} 66 BitstreamReader(BitstreamReader && Other)67 BitstreamReader(BitstreamReader &&Other) { 68 *this = std::move(Other); 69 } 70 71 BitstreamReader &operator=(BitstreamReader &&Other) { 72 BitcodeBytes = std::move(Other.BitcodeBytes); 73 // Explicitly swap block info, so that nothing gets destroyed twice. 74 std::swap(BlockInfoRecords, Other.BlockInfoRecords); 75 IgnoreBlockInfoNames = Other.IgnoreBlockInfoNames; 76 return *this; 77 } 78 init(const unsigned char * Start,const unsigned char * End)79 void init(const unsigned char *Start, const unsigned char *End) { 80 assert(((End-Start) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes"); 81 BitcodeBytes.reset(getNonStreamedMemoryObject(Start, End)); 82 } 83 getBitcodeBytes()84 MemoryObject &getBitcodeBytes() { return *BitcodeBytes; } 85 86 /// This is called by clients that want block/record name information. CollectBlockInfoNames()87 void CollectBlockInfoNames() { IgnoreBlockInfoNames = false; } isIgnoringBlockInfoNames()88 bool isIgnoringBlockInfoNames() { return IgnoreBlockInfoNames; } 89 90 //===--------------------------------------------------------------------===// 91 // Block Manipulation 92 //===--------------------------------------------------------------------===// 93 94 /// Return true if we've already read and processed the block info block for 95 /// this Bitstream. We only process it for the first cursor that walks over 96 /// it. hasBlockInfoRecords()97 bool hasBlockInfoRecords() const { return !BlockInfoRecords.empty(); } 98 99 /// If there is block info for the specified ID, return it, otherwise return 100 /// null. getBlockInfo(unsigned BlockID)101 const BlockInfo *getBlockInfo(unsigned BlockID) const { 102 // Common case, the most recent entry matches BlockID. 103 if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID) 104 return &BlockInfoRecords.back(); 105 106 for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size()); 107 i != e; ++i) 108 if (BlockInfoRecords[i].BlockID == BlockID) 109 return &BlockInfoRecords[i]; 110 return nullptr; 111 } 112 getOrCreateBlockInfo(unsigned BlockID)113 BlockInfo &getOrCreateBlockInfo(unsigned BlockID) { 114 if (const BlockInfo *BI = getBlockInfo(BlockID)) 115 return *const_cast<BlockInfo*>(BI); 116 117 // Otherwise, add a new record. 118 BlockInfoRecords.push_back(BlockInfo()); 119 BlockInfoRecords.back().BlockID = BlockID; 120 return BlockInfoRecords.back(); 121 } 122 123 /// Takes block info from the other bitstream reader. 124 /// 125 /// This is a "take" operation because BlockInfo records are non-trivial, and 126 /// indeed rather expensive. takeBlockInfo(BitstreamReader && Other)127 void takeBlockInfo(BitstreamReader &&Other) { 128 assert(!hasBlockInfoRecords()); 129 BlockInfoRecords = std::move(Other.BlockInfoRecords); 130 } 131 }; 132 133 /// When advancing through a bitstream cursor, each advance can discover a few 134 /// different kinds of entries: 135 struct BitstreamEntry { 136 enum { 137 Error, // Malformed bitcode was found. 138 EndBlock, // We've reached the end of the current block, (or the end of the 139 // file, which is treated like a series of EndBlock records. 140 SubBlock, // This is the start of a new subblock of a specific ID. 141 Record // This is a record with a specific AbbrevID. 142 } Kind; 143 144 unsigned ID; 145 getErrorBitstreamEntry146 static BitstreamEntry getError() { 147 BitstreamEntry E; E.Kind = Error; return E; 148 } getEndBlockBitstreamEntry149 static BitstreamEntry getEndBlock() { 150 BitstreamEntry E; E.Kind = EndBlock; return E; 151 } getSubBlockBitstreamEntry152 static BitstreamEntry getSubBlock(unsigned ID) { 153 BitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E; 154 } getRecordBitstreamEntry155 static BitstreamEntry getRecord(unsigned AbbrevID) { 156 BitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E; 157 } 158 }; 159 160 /// This represents a position within a bitcode file. There may be multiple 161 /// independent cursors reading within one bitstream, each maintaining their own 162 /// local state. 163 /// 164 /// Unlike iterators, BitstreamCursors are heavy-weight objects that should not 165 /// be passed by value. 166 class BitstreamCursor { 167 friend class Deserializer; 168 BitstreamReader *BitStream; 169 size_t NextChar; 170 171 // The size of the bicode. 0 if we don't know it yet. 172 size_t Size; 173 174 /// This is the current data we have pulled from the stream but have not 175 /// returned to the client. This is specifically and intentionally defined to 176 /// follow the word size of the host machine for efficiency. We use word_t in 177 /// places that are aware of this to make it perfectly explicit what is going 178 /// on. 179 typedef size_t word_t; 180 word_t CurWord; 181 182 /// This is the number of bits in CurWord that are valid. This is always from 183 /// [0...bits_of(size_t)-1] inclusive. 184 unsigned BitsInCurWord; 185 186 // This is the declared size of code values used for the current block, in 187 // bits. 188 unsigned CurCodeSize; 189 190 /// Abbrevs installed at in this block. 191 std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> CurAbbrevs; 192 193 struct Block { 194 unsigned PrevCodeSize; 195 std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> PrevAbbrevs; BlockBlock196 explicit Block(unsigned PCS) : PrevCodeSize(PCS) {} 197 }; 198 199 /// This tracks the codesize of parent blocks. 200 SmallVector<Block, 8> BlockScope; 201 202 203 public: BitstreamCursor()204 BitstreamCursor() { init(nullptr); } 205 BitstreamCursor(BitstreamReader & R)206 explicit BitstreamCursor(BitstreamReader &R) { init(&R); } 207 init(BitstreamReader * R)208 void init(BitstreamReader *R) { 209 freeState(); 210 211 BitStream = R; 212 NextChar = 0; 213 Size = 0; 214 BitsInCurWord = 0; 215 CurCodeSize = 2; 216 } 217 218 void freeState(); 219 canSkipToPos(size_t pos)220 bool canSkipToPos(size_t pos) const { 221 // pos can be skipped to if it is a valid address or one byte past the end. 222 return pos == 0 || BitStream->getBitcodeBytes().isValidAddress( 223 static_cast<uint64_t>(pos - 1)); 224 } 225 AtEndOfStream()226 bool AtEndOfStream() { 227 if (BitsInCurWord != 0) 228 return false; 229 if (Size != 0) 230 return Size == NextChar; 231 fillCurWord(); 232 return BitsInCurWord == 0; 233 } 234 235 /// Return the number of bits used to encode an abbrev #. getAbbrevIDWidth()236 unsigned getAbbrevIDWidth() const { return CurCodeSize; } 237 238 /// Return the bit # of the bit we are reading. GetCurrentBitNo()239 uint64_t GetCurrentBitNo() const { 240 return NextChar*CHAR_BIT - BitsInCurWord; 241 } 242 getBitStreamReader()243 BitstreamReader *getBitStreamReader() { 244 return BitStream; 245 } getBitStreamReader()246 const BitstreamReader *getBitStreamReader() const { 247 return BitStream; 248 } 249 250 /// Flags that modify the behavior of advance(). 251 enum { 252 /// If this flag is used, the advance() method does not automatically pop 253 /// the block scope when the end of a block is reached. 254 AF_DontPopBlockAtEnd = 1, 255 256 /// If this flag is used, abbrev entries are returned just like normal 257 /// records. 258 AF_DontAutoprocessAbbrevs = 2 259 }; 260 261 /// Advance the current bitstream, returning the next entry in the stream. 262 BitstreamEntry advance(unsigned Flags = 0) { 263 while (1) { 264 unsigned Code = ReadCode(); 265 if (Code == bitc::END_BLOCK) { 266 // Pop the end of the block unless Flags tells us not to. 267 if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd()) 268 return BitstreamEntry::getError(); 269 return BitstreamEntry::getEndBlock(); 270 } 271 272 if (Code == bitc::ENTER_SUBBLOCK) 273 return BitstreamEntry::getSubBlock(ReadSubBlockID()); 274 275 if (Code == bitc::DEFINE_ABBREV && 276 !(Flags & AF_DontAutoprocessAbbrevs)) { 277 // We read and accumulate abbrev's, the client can't do anything with 278 // them anyway. 279 ReadAbbrevRecord(); 280 continue; 281 } 282 283 return BitstreamEntry::getRecord(Code); 284 } 285 } 286 287 /// This is a convenience function for clients that don't expect any 288 /// subblocks. This just skips over them automatically. 289 BitstreamEntry advanceSkippingSubblocks(unsigned Flags = 0) { 290 while (1) { 291 // If we found a normal entry, return it. 292 BitstreamEntry Entry = advance(Flags); 293 if (Entry.Kind != BitstreamEntry::SubBlock) 294 return Entry; 295 296 // If we found a sub-block, just skip over it and check the next entry. 297 if (SkipBlock()) 298 return BitstreamEntry::getError(); 299 } 300 } 301 302 /// Reset the stream to the specified bit number. JumpToBit(uint64_t BitNo)303 void JumpToBit(uint64_t BitNo) { 304 uintptr_t ByteNo = uintptr_t(BitNo/8) & ~(sizeof(word_t)-1); 305 unsigned WordBitNo = unsigned(BitNo & (sizeof(word_t)*8-1)); 306 assert(canSkipToPos(ByteNo) && "Invalid location"); 307 308 // Move the cursor to the right word. 309 NextChar = ByteNo; 310 BitsInCurWord = 0; 311 312 // Skip over any bits that are already consumed. 313 if (WordBitNo) 314 Read(WordBitNo); 315 } 316 fillCurWord()317 void fillCurWord() { 318 assert(Size == 0 || NextChar < (unsigned)Size); 319 320 // Read the next word from the stream. 321 uint8_t Array[sizeof(word_t)] = {0}; 322 323 uint64_t BytesRead = 324 BitStream->getBitcodeBytes().readBytes(Array, sizeof(Array), NextChar); 325 326 // If we run out of data, stop at the end of the stream. 327 if (BytesRead == 0) { 328 Size = NextChar; 329 return; 330 } 331 332 CurWord = 333 support::endian::read<word_t, support::little, support::unaligned>( 334 Array); 335 NextChar += BytesRead; 336 BitsInCurWord = BytesRead * 8; 337 } 338 Read(unsigned NumBits)339 word_t Read(unsigned NumBits) { 340 static const unsigned BitsInWord = sizeof(word_t) * 8; 341 342 assert(NumBits && NumBits <= BitsInWord && 343 "Cannot return zero or more than BitsInWord bits!"); 344 345 static const unsigned Mask = sizeof(word_t) > 4 ? 0x3f : 0x1f; 346 347 // If the field is fully contained by CurWord, return it quickly. 348 if (BitsInCurWord >= NumBits) { 349 word_t R = CurWord & (~word_t(0) >> (BitsInWord - NumBits)); 350 351 // Use a mask to avoid undefined behavior. 352 CurWord >>= (NumBits & Mask); 353 354 BitsInCurWord -= NumBits; 355 return R; 356 } 357 358 word_t R = BitsInCurWord ? CurWord : 0; 359 unsigned BitsLeft = NumBits - BitsInCurWord; 360 361 fillCurWord(); 362 363 // If we run out of data, stop at the end of the stream. 364 if (BitsLeft > BitsInCurWord) 365 return 0; 366 367 word_t R2 = CurWord & (~word_t(0) >> (BitsInWord - BitsLeft)); 368 369 // Use a mask to avoid undefined behavior. 370 CurWord >>= (BitsLeft & Mask); 371 372 BitsInCurWord -= BitsLeft; 373 374 R |= R2 << (NumBits - BitsLeft); 375 376 return R; 377 } 378 ReadVBR(unsigned NumBits)379 uint32_t ReadVBR(unsigned NumBits) { 380 uint32_t Piece = Read(NumBits); 381 if ((Piece & (1U << (NumBits-1))) == 0) 382 return Piece; 383 384 uint32_t Result = 0; 385 unsigned NextBit = 0; 386 while (1) { 387 Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit; 388 389 if ((Piece & (1U << (NumBits-1))) == 0) 390 return Result; 391 392 NextBit += NumBits-1; 393 Piece = Read(NumBits); 394 } 395 } 396 397 // Read a VBR that may have a value up to 64-bits in size. The chunk size of 398 // the VBR must still be <= 32 bits though. ReadVBR64(unsigned NumBits)399 uint64_t ReadVBR64(unsigned NumBits) { 400 uint32_t Piece = Read(NumBits); 401 if ((Piece & (1U << (NumBits-1))) == 0) 402 return uint64_t(Piece); 403 404 uint64_t Result = 0; 405 unsigned NextBit = 0; 406 while (1) { 407 Result |= uint64_t(Piece & ((1U << (NumBits-1))-1)) << NextBit; 408 409 if ((Piece & (1U << (NumBits-1))) == 0) 410 return Result; 411 412 NextBit += NumBits-1; 413 Piece = Read(NumBits); 414 } 415 } 416 417 private: SkipToFourByteBoundary()418 void SkipToFourByteBoundary() { 419 // If word_t is 64-bits and if we've read less than 32 bits, just dump 420 // the bits we have up to the next 32-bit boundary. 421 if (sizeof(word_t) > 4 && 422 BitsInCurWord >= 32) { 423 CurWord >>= BitsInCurWord-32; 424 BitsInCurWord = 32; 425 return; 426 } 427 428 BitsInCurWord = 0; 429 } 430 public: 431 ReadCode()432 unsigned ReadCode() { 433 return Read(CurCodeSize); 434 } 435 436 437 // Block header: 438 // [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen] 439 440 /// Having read the ENTER_SUBBLOCK code, read the BlockID for the block. ReadSubBlockID()441 unsigned ReadSubBlockID() { 442 return ReadVBR(bitc::BlockIDWidth); 443 } 444 445 /// Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip over the body 446 /// of this block. If the block record is malformed, return true. SkipBlock()447 bool SkipBlock() { 448 // Read and ignore the codelen value. Since we are skipping this block, we 449 // don't care what code widths are used inside of it. 450 ReadVBR(bitc::CodeLenWidth); 451 SkipToFourByteBoundary(); 452 unsigned NumFourBytes = Read(bitc::BlockSizeWidth); 453 454 // Check that the block wasn't partially defined, and that the offset isn't 455 // bogus. 456 size_t SkipTo = GetCurrentBitNo() + NumFourBytes*4*8; 457 if (AtEndOfStream() || !canSkipToPos(SkipTo/8)) 458 return true; 459 460 JumpToBit(SkipTo); 461 return false; 462 } 463 464 /// Having read the ENTER_SUBBLOCK abbrevid, enter the block, and return true 465 /// if the block has an error. 466 bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = nullptr); 467 ReadBlockEnd()468 bool ReadBlockEnd() { 469 if (BlockScope.empty()) return true; 470 471 // Block tail: 472 // [END_BLOCK, <align4bytes>] 473 SkipToFourByteBoundary(); 474 475 popBlockScope(); 476 return false; 477 } 478 479 private: 480 popBlockScope()481 void popBlockScope() { 482 CurCodeSize = BlockScope.back().PrevCodeSize; 483 484 CurAbbrevs = std::move(BlockScope.back().PrevAbbrevs); 485 BlockScope.pop_back(); 486 } 487 488 //===--------------------------------------------------------------------===// 489 // Record Processing 490 //===--------------------------------------------------------------------===// 491 492 public: 493 494 /// Return the abbreviation for the specified AbbrevId. getAbbrev(unsigned AbbrevID)495 const BitCodeAbbrev *getAbbrev(unsigned AbbrevID) { 496 unsigned AbbrevNo = AbbrevID-bitc::FIRST_APPLICATION_ABBREV; 497 assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!"); 498 return CurAbbrevs[AbbrevNo].get(); 499 } 500 501 /// Read the current record and discard it. 502 void skipRecord(unsigned AbbrevID); 503 504 unsigned readRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals, 505 StringRef *Blob = nullptr); 506 507 //===--------------------------------------------------------------------===// 508 // Abbrev Processing 509 //===--------------------------------------------------------------------===// 510 void ReadAbbrevRecord(); 511 512 bool ReadBlockInfoBlock(); 513 }; 514 515 } // End llvm namespace 516 517 #endif 518