1 //===- BitstreamReader.h - Low-level bitstream reader interface -*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This header defines the BitstreamReader class.  This class can be used to
10 // read an arbitrary bitstream, regardless of its contents.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_BITSTREAM_BITSTREAMREADER_H
15 #define LLVM_BITSTREAM_BITSTREAMREADER_H
16 
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/Bitstream/BitCodes.h"
20 #include "llvm/Support/Endian.h"
21 #include "llvm/Support/Error.h"
22 #include "llvm/Support/ErrorHandling.h"
23 #include "llvm/Support/MathExtras.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include <algorithm>
26 #include <cassert>
27 #include <climits>
28 #include <cstddef>
29 #include <cstdint>
30 #include <memory>
31 #include <string>
32 #include <utility>
33 #include <vector>
34 
35 namespace llvm {
36 
37 /// This class maintains the abbreviations read from a block info block.
38 class BitstreamBlockInfo {
39 public:
40   /// This contains information emitted to BLOCKINFO_BLOCK blocks. These
41   /// describe abbreviations that all blocks of the specified ID inherit.
42   struct BlockInfo {
43     unsigned BlockID = 0;
44     std::vector<std::shared_ptr<BitCodeAbbrev>> Abbrevs;
45     std::string Name;
46     std::vector<std::pair<unsigned, std::string>> RecordNames;
47   };
48 
49 private:
50   std::vector<BlockInfo> BlockInfoRecords;
51 
52 public:
53   /// If there is block info for the specified ID, return it, otherwise return
54   /// null.
getBlockInfo(unsigned BlockID)55   const BlockInfo *getBlockInfo(unsigned BlockID) const {
56     // Common case, the most recent entry matches BlockID.
57     if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID)
58       return &BlockInfoRecords.back();
59 
60     for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size());
61          i != e; ++i)
62       if (BlockInfoRecords[i].BlockID == BlockID)
63         return &BlockInfoRecords[i];
64     return nullptr;
65   }
66 
getOrCreateBlockInfo(unsigned BlockID)67   BlockInfo &getOrCreateBlockInfo(unsigned BlockID) {
68     if (const BlockInfo *BI = getBlockInfo(BlockID))
69       return *const_cast<BlockInfo*>(BI);
70 
71     // Otherwise, add a new record.
72     BlockInfoRecords.emplace_back();
73     BlockInfoRecords.back().BlockID = BlockID;
74     return BlockInfoRecords.back();
75   }
76 };
77 
78 /// This represents a position within a bitstream. There may be multiple
79 /// independent cursors reading within one bitstream, each maintaining their
80 /// own local state.
81 class SimpleBitstreamCursor {
82   ArrayRef<uint8_t> BitcodeBytes;
83   size_t NextChar = 0;
84 
85 public:
86   /// This is the current data we have pulled from the stream but have not
87   /// returned to the client. This is specifically and intentionally defined to
88   /// follow the word size of the host machine for efficiency. We use word_t in
89   /// places that are aware of this to make it perfectly explicit what is going
90   /// on.
91   using word_t = size_t;
92 
93 private:
94   word_t CurWord = 0;
95 
96   /// This is the number of bits in CurWord that are valid. This is always from
97   /// [0...bits_of(size_t)-1] inclusive.
98   unsigned BitsInCurWord = 0;
99 
100 public:
101   static const constexpr size_t MaxChunkSize = sizeof(word_t) * 8;
102 
103   SimpleBitstreamCursor() = default;
SimpleBitstreamCursor(ArrayRef<uint8_t> BitcodeBytes)104   explicit SimpleBitstreamCursor(ArrayRef<uint8_t> BitcodeBytes)
105       : BitcodeBytes(BitcodeBytes) {}
SimpleBitstreamCursor(StringRef BitcodeBytes)106   explicit SimpleBitstreamCursor(StringRef BitcodeBytes)
107       : BitcodeBytes(arrayRefFromStringRef(BitcodeBytes)) {}
SimpleBitstreamCursor(MemoryBufferRef BitcodeBytes)108   explicit SimpleBitstreamCursor(MemoryBufferRef BitcodeBytes)
109       : SimpleBitstreamCursor(BitcodeBytes.getBuffer()) {}
110 
canSkipToPos(size_t pos)111   bool canSkipToPos(size_t pos) const {
112     // pos can be skipped to if it is a valid address or one byte past the end.
113     return pos <= BitcodeBytes.size();
114   }
115 
AtEndOfStream()116   bool AtEndOfStream() {
117     return BitsInCurWord == 0 && BitcodeBytes.size() <= NextChar;
118   }
119 
120   /// Return the bit # of the bit we are reading.
GetCurrentBitNo()121   uint64_t GetCurrentBitNo() const {
122     return NextChar*CHAR_BIT - BitsInCurWord;
123   }
124 
125   // Return the byte # of the current bit.
getCurrentByteNo()126   uint64_t getCurrentByteNo() const { return GetCurrentBitNo() / 8; }
127 
getBitcodeBytes()128   ArrayRef<uint8_t> getBitcodeBytes() const { return BitcodeBytes; }
129 
130   /// Reset the stream to the specified bit number.
JumpToBit(uint64_t BitNo)131   Error JumpToBit(uint64_t BitNo) {
132     size_t ByteNo = size_t(BitNo/8) & ~(sizeof(word_t)-1);
133     unsigned WordBitNo = unsigned(BitNo & (sizeof(word_t)*8-1));
134     assert(canSkipToPos(ByteNo) && "Invalid location");
135 
136     // Move the cursor to the right word.
137     NextChar = ByteNo;
138     BitsInCurWord = 0;
139 
140     // Skip over any bits that are already consumed.
141     if (WordBitNo) {
142       if (Expected<word_t> Res = Read(WordBitNo))
143         return Error::success();
144       else
145         return Res.takeError();
146     }
147 
148     return Error::success();
149   }
150 
151   /// Get a pointer into the bitstream at the specified byte offset.
getPointerToByte(uint64_t ByteNo,uint64_t NumBytes)152   const uint8_t *getPointerToByte(uint64_t ByteNo, uint64_t NumBytes) {
153     return BitcodeBytes.data() + ByteNo;
154   }
155 
156   /// Get a pointer into the bitstream at the specified bit offset.
157   ///
158   /// The bit offset must be on a byte boundary.
getPointerToBit(uint64_t BitNo,uint64_t NumBytes)159   const uint8_t *getPointerToBit(uint64_t BitNo, uint64_t NumBytes) {
160     assert(!(BitNo % 8) && "Expected bit on byte boundary");
161     return getPointerToByte(BitNo / 8, NumBytes);
162   }
163 
fillCurWord()164   Error fillCurWord() {
165     if (NextChar >= BitcodeBytes.size())
166       return createStringError(std::errc::io_error,
167                                "Unexpected end of file reading %u of %u bytes",
168                                NextChar, BitcodeBytes.size());
169 
170     // Read the next word from the stream.
171     const uint8_t *NextCharPtr = BitcodeBytes.data() + NextChar;
172     unsigned BytesRead;
173     if (BitcodeBytes.size() >= NextChar + sizeof(word_t)) {
174       BytesRead = sizeof(word_t);
175       CurWord =
176           support::endian::read<word_t, support::little, support::unaligned>(
177               NextCharPtr);
178     } else {
179       // Short read.
180       BytesRead = BitcodeBytes.size() - NextChar;
181       CurWord = 0;
182       for (unsigned B = 0; B != BytesRead; ++B)
183         CurWord |= uint64_t(NextCharPtr[B]) << (B * 8);
184     }
185     NextChar += BytesRead;
186     BitsInCurWord = BytesRead * 8;
187     return Error::success();
188   }
189 
Read(unsigned NumBits)190   Expected<word_t> Read(unsigned NumBits) {
191     static const unsigned BitsInWord = MaxChunkSize;
192 
193     assert(NumBits && NumBits <= BitsInWord &&
194            "Cannot return zero or more than BitsInWord bits!");
195 
196     static const unsigned Mask = sizeof(word_t) > 4 ? 0x3f : 0x1f;
197 
198     // If the field is fully contained by CurWord, return it quickly.
199     if (BitsInCurWord >= NumBits) {
200       word_t R = CurWord & (~word_t(0) >> (BitsInWord - NumBits));
201 
202       // Use a mask to avoid undefined behavior.
203       CurWord >>= (NumBits & Mask);
204 
205       BitsInCurWord -= NumBits;
206       return R;
207     }
208 
209     word_t R = BitsInCurWord ? CurWord : 0;
210     unsigned BitsLeft = NumBits - BitsInCurWord;
211 
212     if (Error fillResult = fillCurWord())
213       return std::move(fillResult);
214 
215     // If we run out of data, abort.
216     if (BitsLeft > BitsInCurWord)
217       return createStringError(std::errc::io_error,
218                                "Unexpected end of file reading %u of %u bits",
219                                BitsInCurWord, BitsLeft);
220 
221     word_t R2 = CurWord & (~word_t(0) >> (BitsInWord - BitsLeft));
222 
223     // Use a mask to avoid undefined behavior.
224     CurWord >>= (BitsLeft & Mask);
225 
226     BitsInCurWord -= BitsLeft;
227 
228     R |= R2 << (NumBits - BitsLeft);
229 
230     return R;
231   }
232 
ReadVBR(unsigned NumBits)233   Expected<uint32_t> ReadVBR(unsigned NumBits) {
234     Expected<unsigned> MaybeRead = Read(NumBits);
235     if (!MaybeRead)
236       return MaybeRead;
237     uint32_t Piece = MaybeRead.get();
238 
239     if ((Piece & (1U << (NumBits-1))) == 0)
240       return Piece;
241 
242     uint32_t Result = 0;
243     unsigned NextBit = 0;
244     while (true) {
245       Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit;
246 
247       if ((Piece & (1U << (NumBits-1))) == 0)
248         return Result;
249 
250       NextBit += NumBits-1;
251       MaybeRead = Read(NumBits);
252       if (!MaybeRead)
253         return MaybeRead;
254       Piece = MaybeRead.get();
255     }
256   }
257 
258   // Read a VBR that may have a value up to 64-bits in size. The chunk size of
259   // the VBR must still be <= 32 bits though.
ReadVBR64(unsigned NumBits)260   Expected<uint64_t> ReadVBR64(unsigned NumBits) {
261     Expected<uint64_t> MaybeRead = Read(NumBits);
262     if (!MaybeRead)
263       return MaybeRead;
264     uint32_t Piece = MaybeRead.get();
265 
266     if ((Piece & (1U << (NumBits-1))) == 0)
267       return uint64_t(Piece);
268 
269     uint64_t Result = 0;
270     unsigned NextBit = 0;
271     while (true) {
272       Result |= uint64_t(Piece & ((1U << (NumBits-1))-1)) << NextBit;
273 
274       if ((Piece & (1U << (NumBits-1))) == 0)
275         return Result;
276 
277       NextBit += NumBits-1;
278       MaybeRead = Read(NumBits);
279       if (!MaybeRead)
280         return MaybeRead;
281       Piece = MaybeRead.get();
282     }
283   }
284 
SkipToFourByteBoundary()285   void SkipToFourByteBoundary() {
286     // If word_t is 64-bits and if we've read less than 32 bits, just dump
287     // the bits we have up to the next 32-bit boundary.
288     if (sizeof(word_t) > 4 &&
289         BitsInCurWord >= 32) {
290       CurWord >>= BitsInCurWord-32;
291       BitsInCurWord = 32;
292       return;
293     }
294 
295     BitsInCurWord = 0;
296   }
297 
298   /// Return the size of the stream in bytes.
SizeInBytes()299   size_t SizeInBytes() const { return BitcodeBytes.size(); }
300 
301   /// Skip to the end of the file.
skipToEnd()302   void skipToEnd() { NextChar = BitcodeBytes.size(); }
303 };
304 
305 /// When advancing through a bitstream cursor, each advance can discover a few
306 /// different kinds of entries:
307 struct BitstreamEntry {
308   enum {
309     Error,    // Malformed bitcode was found.
310     EndBlock, // We've reached the end of the current block, (or the end of the
311               // file, which is treated like a series of EndBlock records.
312     SubBlock, // This is the start of a new subblock of a specific ID.
313     Record    // This is a record with a specific AbbrevID.
314   } Kind;
315 
316   unsigned ID;
317 
getErrorBitstreamEntry318   static BitstreamEntry getError() {
319     BitstreamEntry E; E.Kind = Error; return E;
320   }
321 
getEndBlockBitstreamEntry322   static BitstreamEntry getEndBlock() {
323     BitstreamEntry E; E.Kind = EndBlock; return E;
324   }
325 
getSubBlockBitstreamEntry326   static BitstreamEntry getSubBlock(unsigned ID) {
327     BitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E;
328   }
329 
getRecordBitstreamEntry330   static BitstreamEntry getRecord(unsigned AbbrevID) {
331     BitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E;
332   }
333 };
334 
335 /// This represents a position within a bitcode file, implemented on top of a
336 /// SimpleBitstreamCursor.
337 ///
338 /// Unlike iterators, BitstreamCursors are heavy-weight objects that should not
339 /// be passed by value.
340 class BitstreamCursor : SimpleBitstreamCursor {
341   // This is the declared size of code values used for the current block, in
342   // bits.
343   unsigned CurCodeSize = 2;
344 
345   /// Abbrevs installed at in this block.
346   std::vector<std::shared_ptr<BitCodeAbbrev>> CurAbbrevs;
347 
348   struct Block {
349     unsigned PrevCodeSize;
350     std::vector<std::shared_ptr<BitCodeAbbrev>> PrevAbbrevs;
351 
BlockBlock352     explicit Block(unsigned PCS) : PrevCodeSize(PCS) {}
353   };
354 
355   /// This tracks the codesize of parent blocks.
356   SmallVector<Block, 8> BlockScope;
357 
358   BitstreamBlockInfo *BlockInfo = nullptr;
359 
360 public:
361   static const size_t MaxChunkSize = sizeof(word_t) * 8;
362 
363   BitstreamCursor() = default;
BitstreamCursor(ArrayRef<uint8_t> BitcodeBytes)364   explicit BitstreamCursor(ArrayRef<uint8_t> BitcodeBytes)
365       : SimpleBitstreamCursor(BitcodeBytes) {}
BitstreamCursor(StringRef BitcodeBytes)366   explicit BitstreamCursor(StringRef BitcodeBytes)
367       : SimpleBitstreamCursor(BitcodeBytes) {}
BitstreamCursor(MemoryBufferRef BitcodeBytes)368   explicit BitstreamCursor(MemoryBufferRef BitcodeBytes)
369       : SimpleBitstreamCursor(BitcodeBytes) {}
370 
371   using SimpleBitstreamCursor::AtEndOfStream;
372   using SimpleBitstreamCursor::canSkipToPos;
373   using SimpleBitstreamCursor::fillCurWord;
374   using SimpleBitstreamCursor::getBitcodeBytes;
375   using SimpleBitstreamCursor::GetCurrentBitNo;
376   using SimpleBitstreamCursor::getCurrentByteNo;
377   using SimpleBitstreamCursor::getPointerToByte;
378   using SimpleBitstreamCursor::JumpToBit;
379   using SimpleBitstreamCursor::Read;
380   using SimpleBitstreamCursor::ReadVBR;
381   using SimpleBitstreamCursor::ReadVBR64;
382   using SimpleBitstreamCursor::SizeInBytes;
383   using SimpleBitstreamCursor::skipToEnd;
384 
385   /// Return the number of bits used to encode an abbrev #.
getAbbrevIDWidth()386   unsigned getAbbrevIDWidth() const { return CurCodeSize; }
387 
388   /// Flags that modify the behavior of advance().
389   enum {
390     /// If this flag is used, the advance() method does not automatically pop
391     /// the block scope when the end of a block is reached.
392     AF_DontPopBlockAtEnd = 1,
393 
394     /// If this flag is used, abbrev entries are returned just like normal
395     /// records.
396     AF_DontAutoprocessAbbrevs = 2
397   };
398 
399   /// Advance the current bitstream, returning the next entry in the stream.
400   Expected<BitstreamEntry> advance(unsigned Flags = 0) {
401     while (true) {
402       if (AtEndOfStream())
403         return BitstreamEntry::getError();
404 
405       Expected<unsigned> MaybeCode = ReadCode();
406       if (!MaybeCode)
407         return MaybeCode.takeError();
408       unsigned Code = MaybeCode.get();
409 
410       if (Code == bitc::END_BLOCK) {
411         // Pop the end of the block unless Flags tells us not to.
412         if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd())
413           return BitstreamEntry::getError();
414         return BitstreamEntry::getEndBlock();
415       }
416 
417       if (Code == bitc::ENTER_SUBBLOCK) {
418         if (Expected<unsigned> MaybeSubBlock = ReadSubBlockID())
419           return BitstreamEntry::getSubBlock(MaybeSubBlock.get());
420         else
421           return MaybeSubBlock.takeError();
422       }
423 
424       if (Code == bitc::DEFINE_ABBREV &&
425           !(Flags & AF_DontAutoprocessAbbrevs)) {
426         // We read and accumulate abbrev's, the client can't do anything with
427         // them anyway.
428         if (Error Err = ReadAbbrevRecord())
429           return std::move(Err);
430         continue;
431       }
432 
433       return BitstreamEntry::getRecord(Code);
434     }
435   }
436 
437   /// This is a convenience function for clients that don't expect any
438   /// subblocks. This just skips over them automatically.
439   Expected<BitstreamEntry> advanceSkippingSubblocks(unsigned Flags = 0) {
440     while (true) {
441       // If we found a normal entry, return it.
442       Expected<BitstreamEntry> MaybeEntry = advance(Flags);
443       if (!MaybeEntry)
444         return MaybeEntry;
445       BitstreamEntry Entry = MaybeEntry.get();
446 
447       if (Entry.Kind != BitstreamEntry::SubBlock)
448         return Entry;
449 
450       // If we found a sub-block, just skip over it and check the next entry.
451       if (Error Err = SkipBlock())
452         return std::move(Err);
453     }
454   }
455 
ReadCode()456   Expected<unsigned> ReadCode() { return Read(CurCodeSize); }
457 
458   // Block header:
459   //    [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
460 
461   /// Having read the ENTER_SUBBLOCK code, read the BlockID for the block.
ReadSubBlockID()462   Expected<unsigned> ReadSubBlockID() { return ReadVBR(bitc::BlockIDWidth); }
463 
464   /// Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip over the body
465   /// of this block.
SkipBlock()466   Error SkipBlock() {
467     // Read and ignore the codelen value.
468     if (Expected<uint32_t> Res = ReadVBR(bitc::CodeLenWidth))
469       ; // Since we are skipping this block, we don't care what code widths are
470         // used inside of it.
471     else
472       return Res.takeError();
473 
474     SkipToFourByteBoundary();
475     Expected<unsigned> MaybeNum = Read(bitc::BlockSizeWidth);
476     if (!MaybeNum)
477       return MaybeNum.takeError();
478     size_t NumFourBytes = MaybeNum.get();
479 
480     // Check that the block wasn't partially defined, and that the offset isn't
481     // bogus.
482     size_t SkipTo = GetCurrentBitNo() + NumFourBytes * 4 * 8;
483     if (AtEndOfStream())
484       return createStringError(std::errc::illegal_byte_sequence,
485                                "can't skip block: already at end of stream");
486     if (!canSkipToPos(SkipTo / 8))
487       return createStringError(std::errc::illegal_byte_sequence,
488                                "can't skip to bit %zu from %" PRIu64, SkipTo,
489                                GetCurrentBitNo());
490 
491     if (Error Res = JumpToBit(SkipTo))
492       return Res;
493 
494     return Error::success();
495   }
496 
497   /// Having read the ENTER_SUBBLOCK abbrevid, and enter the block.
498   Error EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = nullptr);
499 
ReadBlockEnd()500   bool ReadBlockEnd() {
501     if (BlockScope.empty()) return true;
502 
503     // Block tail:
504     //    [END_BLOCK, <align4bytes>]
505     SkipToFourByteBoundary();
506 
507     popBlockScope();
508     return false;
509   }
510 
511 private:
popBlockScope()512   void popBlockScope() {
513     CurCodeSize = BlockScope.back().PrevCodeSize;
514 
515     CurAbbrevs = std::move(BlockScope.back().PrevAbbrevs);
516     BlockScope.pop_back();
517   }
518 
519   //===--------------------------------------------------------------------===//
520   // Record Processing
521   //===--------------------------------------------------------------------===//
522 
523 public:
524   /// Return the abbreviation for the specified AbbrevId.
getAbbrev(unsigned AbbrevID)525   const BitCodeAbbrev *getAbbrev(unsigned AbbrevID) {
526     unsigned AbbrevNo = AbbrevID - bitc::FIRST_APPLICATION_ABBREV;
527     if (AbbrevNo >= CurAbbrevs.size())
528       report_fatal_error("Invalid abbrev number");
529     return CurAbbrevs[AbbrevNo].get();
530   }
531 
532   /// Read the current record and discard it, returning the code for the record.
533   Expected<unsigned> skipRecord(unsigned AbbrevID);
534 
535   Expected<unsigned> readRecord(unsigned AbbrevID,
536                                 SmallVectorImpl<uint64_t> &Vals,
537                                 StringRef *Blob = nullptr);
538 
539   //===--------------------------------------------------------------------===//
540   // Abbrev Processing
541   //===--------------------------------------------------------------------===//
542   Error ReadAbbrevRecord();
543 
544   /// Read and return a block info block from the bitstream. If an error was
545   /// encountered, return None.
546   ///
547   /// \param ReadBlockInfoNames Whether to read block/record name information in
548   /// the BlockInfo block. Only llvm-bcanalyzer uses this.
549   Expected<Optional<BitstreamBlockInfo>>
550   ReadBlockInfoBlock(bool ReadBlockInfoNames = false);
551 
552   /// Set the block info to be used by this BitstreamCursor to interpret
553   /// abbreviated records.
setBlockInfo(BitstreamBlockInfo * BI)554   void setBlockInfo(BitstreamBlockInfo *BI) { BlockInfo = BI; }
555 };
556 
557 } // end llvm namespace
558 
559 #endif // LLVM_BITSTREAM_BITSTREAMREADER_H
560