1 //===- BitstreamReader.cpp - BitstreamReader implementation ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/Bitstream/BitstreamReader.h"
10 #include "llvm/ADT/StringRef.h"
11 #include <cassert>
12 #include <optional>
13 #include <string>
14 
15 using namespace llvm;
16 
17 //===----------------------------------------------------------------------===//
18 //  BitstreamCursor implementation
19 //===----------------------------------------------------------------------===//
20 //
21 static Error error(const char *Message) {
22   return createStringError(std::errc::illegal_byte_sequence, Message);
23 }
24 
25 /// Having read the ENTER_SUBBLOCK abbrevid, enter the block.
26 Error BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) {
27   // Save the current block's state on BlockScope.
28   BlockScope.push_back(Block(CurCodeSize));
29   BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
30 
31   // Add the abbrevs specific to this block to the CurAbbrevs list.
32   if (BlockInfo) {
33     if (const BitstreamBlockInfo::BlockInfo *Info =
34             BlockInfo->getBlockInfo(BlockID)) {
35       llvm::append_range(CurAbbrevs, Info->Abbrevs);
36     }
37   }
38 
39   // Get the codesize of this block.
40   Expected<uint32_t> MaybeVBR = ReadVBR(bitc::CodeLenWidth);
41   if (!MaybeVBR)
42     return MaybeVBR.takeError();
43   CurCodeSize = MaybeVBR.get();
44 
45   if (CurCodeSize > MaxChunkSize)
46     return llvm::createStringError(
47         std::errc::illegal_byte_sequence,
48         "can't read more than %zu at a time, trying to read %u", +MaxChunkSize,
49         CurCodeSize);
50 
51   SkipToFourByteBoundary();
52   Expected<word_t> MaybeNum = Read(bitc::BlockSizeWidth);
53   if (!MaybeNum)
54     return MaybeNum.takeError();
55   word_t NumWords = MaybeNum.get();
56   if (NumWordsP)
57     *NumWordsP = NumWords;
58 
59   if (CurCodeSize == 0)
60     return llvm::createStringError(
61         std::errc::illegal_byte_sequence,
62         "can't enter sub-block: current code size is 0");
63   if (AtEndOfStream())
64     return llvm::createStringError(
65         std::errc::illegal_byte_sequence,
66         "can't enter sub block: already at end of stream");
67 
68   return Error::success();
69 }
70 
71 static Expected<uint64_t> readAbbreviatedField(BitstreamCursor &Cursor,
72                                                const BitCodeAbbrevOp &Op) {
73   assert(!Op.isLiteral() && "Not to be used with literals!");
74 
75   // Decode the value as we are commanded.
76   switch (Op.getEncoding()) {
77   case BitCodeAbbrevOp::Array:
78   case BitCodeAbbrevOp::Blob:
79     llvm_unreachable("Should not reach here");
80   case BitCodeAbbrevOp::Fixed:
81     assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
82     return Cursor.Read((unsigned)Op.getEncodingData());
83   case BitCodeAbbrevOp::VBR:
84     assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
85     return Cursor.ReadVBR64((unsigned)Op.getEncodingData());
86   case BitCodeAbbrevOp::Char6:
87     if (Expected<unsigned> Res = Cursor.Read(6))
88       return BitCodeAbbrevOp::DecodeChar6(Res.get());
89     else
90       return Res.takeError();
91   }
92   llvm_unreachable("invalid abbreviation encoding");
93 }
94 
95 /// skipRecord - Read the current record and discard it.
96 Expected<unsigned> BitstreamCursor::skipRecord(unsigned AbbrevID) {
97   // Skip unabbreviated records by reading past their entries.
98   if (AbbrevID == bitc::UNABBREV_RECORD) {
99     Expected<uint32_t> MaybeCode = ReadVBR(6);
100     if (!MaybeCode)
101       return MaybeCode.takeError();
102     unsigned Code = MaybeCode.get();
103     Expected<uint32_t> MaybeVBR = ReadVBR(6);
104     if (!MaybeVBR)
105       return MaybeVBR.takeError();
106     unsigned NumElts = MaybeVBR.get();
107     for (unsigned i = 0; i != NumElts; ++i)
108       if (Expected<uint64_t> Res = ReadVBR64(6))
109         ; // Skip!
110       else
111         return Res.takeError();
112     return Code;
113   }
114 
115   Expected<const BitCodeAbbrev *> MaybeAbbv = getAbbrev(AbbrevID);
116   if (!MaybeAbbv)
117     return MaybeAbbv.takeError();
118 
119   const BitCodeAbbrev *Abbv = MaybeAbbv.get();
120   const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0);
121   unsigned Code;
122   if (CodeOp.isLiteral())
123     Code = CodeOp.getLiteralValue();
124   else {
125     if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array ||
126         CodeOp.getEncoding() == BitCodeAbbrevOp::Blob)
127       return llvm::createStringError(
128           std::errc::illegal_byte_sequence,
129           "Abbreviation starts with an Array or a Blob");
130     Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp);
131     if (!MaybeCode)
132       return MaybeCode.takeError();
133     Code = MaybeCode.get();
134   }
135 
136   for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i < e; ++i) {
137     const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
138     if (Op.isLiteral())
139       continue;
140 
141     if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
142         Op.getEncoding() != BitCodeAbbrevOp::Blob) {
143       if (Expected<uint64_t> MaybeField = readAbbreviatedField(*this, Op))
144         continue;
145       else
146         return MaybeField.takeError();
147     }
148 
149     if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
150       // Array case.  Read the number of elements as a vbr6.
151       Expected<uint32_t> MaybeNum = ReadVBR(6);
152       if (!MaybeNum)
153         return MaybeNum.takeError();
154       unsigned NumElts = MaybeNum.get();
155 
156       // Get the element encoding.
157       assert(i+2 == e && "array op not second to last?");
158       const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
159 
160       // Read all the elements.
161       // Decode the value as we are commanded.
162       switch (EltEnc.getEncoding()) {
163       default:
164         return error("Array element type can't be an Array or a Blob");
165       case BitCodeAbbrevOp::Fixed:
166         assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize);
167         if (Error Err =
168                 JumpToBit(GetCurrentBitNo() + static_cast<uint64_t>(NumElts) *
169                                                   EltEnc.getEncodingData()))
170           return std::move(Err);
171         break;
172       case BitCodeAbbrevOp::VBR:
173         assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize);
174         for (; NumElts; --NumElts)
175           if (Expected<uint64_t> Res =
176                   ReadVBR64((unsigned)EltEnc.getEncodingData()))
177             ; // Skip!
178           else
179             return Res.takeError();
180         break;
181       case BitCodeAbbrevOp::Char6:
182         if (Error Err = JumpToBit(GetCurrentBitNo() + NumElts * 6))
183           return std::move(Err);
184         break;
185       }
186       continue;
187     }
188 
189     assert(Op.getEncoding() == BitCodeAbbrevOp::Blob);
190     // Blob case.  Read the number of bytes as a vbr6.
191     Expected<uint32_t> MaybeNum = ReadVBR(6);
192     if (!MaybeNum)
193       return MaybeNum.takeError();
194     unsigned NumElts = MaybeNum.get();
195     SkipToFourByteBoundary();  // 32-bit alignment
196 
197     // Figure out where the end of this blob will be including tail padding.
198     const size_t NewEnd = GetCurrentBitNo() + alignTo(NumElts, 4) * 8;
199 
200     // If this would read off the end of the bitcode file, just set the
201     // record to empty and return.
202     if (!canSkipToPos(NewEnd/8)) {
203       skipToEnd();
204       break;
205     }
206 
207     // Skip over the blob.
208     if (Error Err = JumpToBit(NewEnd))
209       return std::move(Err);
210   }
211   return Code;
212 }
213 
214 Expected<unsigned> BitstreamCursor::readRecord(unsigned AbbrevID,
215                                                SmallVectorImpl<uint64_t> &Vals,
216                                                StringRef *Blob) {
217   if (AbbrevID == bitc::UNABBREV_RECORD) {
218     Expected<uint32_t> MaybeCode = ReadVBR(6);
219     if (!MaybeCode)
220       return MaybeCode.takeError();
221     uint32_t Code = MaybeCode.get();
222     Expected<uint32_t> MaybeNumElts = ReadVBR(6);
223     if (!MaybeNumElts)
224       return error(
225           ("Failed to read size: " + toString(MaybeNumElts.takeError()))
226               .c_str());
227     uint32_t NumElts = MaybeNumElts.get();
228     if (!isSizePlausible(NumElts))
229       return error("Size is not plausible");
230     Vals.reserve(Vals.size() + NumElts);
231 
232     for (unsigned i = 0; i != NumElts; ++i)
233       if (Expected<uint64_t> MaybeVal = ReadVBR64(6))
234         Vals.push_back(MaybeVal.get());
235       else
236         return MaybeVal.takeError();
237     return Code;
238   }
239 
240   Expected<const BitCodeAbbrev *> MaybeAbbv = getAbbrev(AbbrevID);
241   if (!MaybeAbbv)
242     return MaybeAbbv.takeError();
243   const BitCodeAbbrev *Abbv = MaybeAbbv.get();
244 
245   // Read the record code first.
246   assert(Abbv->getNumOperandInfos() != 0 && "no record code in abbreviation?");
247   const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0);
248   unsigned Code;
249   if (CodeOp.isLiteral())
250     Code = CodeOp.getLiteralValue();
251   else {
252     if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array ||
253         CodeOp.getEncoding() == BitCodeAbbrevOp::Blob)
254       return error("Abbreviation starts with an Array or a Blob");
255     if (Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp))
256       Code = MaybeCode.get();
257     else
258       return MaybeCode.takeError();
259   }
260 
261   for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) {
262     const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
263     if (Op.isLiteral()) {
264       Vals.push_back(Op.getLiteralValue());
265       continue;
266     }
267 
268     if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
269         Op.getEncoding() != BitCodeAbbrevOp::Blob) {
270       if (Expected<uint64_t> MaybeVal = readAbbreviatedField(*this, Op))
271         Vals.push_back(MaybeVal.get());
272       else
273         return MaybeVal.takeError();
274       continue;
275     }
276 
277     if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
278       // Array case.  Read the number of elements as a vbr6.
279       Expected<uint32_t> MaybeNumElts = ReadVBR(6);
280       if (!MaybeNumElts)
281         return error(
282             ("Failed to read size: " + toString(MaybeNumElts.takeError()))
283                 .c_str());
284       uint32_t NumElts = MaybeNumElts.get();
285       if (!isSizePlausible(NumElts))
286         return error("Size is not plausible");
287       Vals.reserve(Vals.size() + NumElts);
288 
289       // Get the element encoding.
290       if (i + 2 != e)
291         return error("Array op not second to last");
292       const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
293       if (!EltEnc.isEncoding())
294         return error(
295             "Array element type has to be an encoding of a type");
296 
297       // Read all the elements.
298       switch (EltEnc.getEncoding()) {
299       default:
300         return error("Array element type can't be an Array or a Blob");
301       case BitCodeAbbrevOp::Fixed:
302         for (; NumElts; --NumElts)
303           if (Expected<SimpleBitstreamCursor::word_t> MaybeVal =
304                   Read((unsigned)EltEnc.getEncodingData()))
305             Vals.push_back(MaybeVal.get());
306           else
307             return MaybeVal.takeError();
308         break;
309       case BitCodeAbbrevOp::VBR:
310         for (; NumElts; --NumElts)
311           if (Expected<uint64_t> MaybeVal =
312                   ReadVBR64((unsigned)EltEnc.getEncodingData()))
313             Vals.push_back(MaybeVal.get());
314           else
315             return MaybeVal.takeError();
316         break;
317       case BitCodeAbbrevOp::Char6:
318         for (; NumElts; --NumElts)
319           if (Expected<SimpleBitstreamCursor::word_t> MaybeVal = Read(6))
320             Vals.push_back(BitCodeAbbrevOp::DecodeChar6(MaybeVal.get()));
321           else
322             return MaybeVal.takeError();
323       }
324       continue;
325     }
326 
327     assert(Op.getEncoding() == BitCodeAbbrevOp::Blob);
328     // Blob case.  Read the number of bytes as a vbr6.
329     Expected<uint32_t> MaybeNumElts = ReadVBR(6);
330     if (!MaybeNumElts)
331       return MaybeNumElts.takeError();
332     uint32_t NumElts = MaybeNumElts.get();
333     SkipToFourByteBoundary();  // 32-bit alignment
334 
335     // Figure out where the end of this blob will be including tail padding.
336     size_t CurBitPos = GetCurrentBitNo();
337     const size_t NewEnd = CurBitPos + alignTo(NumElts, 4) * 8;
338 
339     // Make sure the bitstream is large enough to contain the blob.
340     if (!canSkipToPos(NewEnd/8))
341       return error("Blob ends too soon");
342 
343     // Otherwise, inform the streamer that we need these bytes in memory.  Skip
344     // over tail padding first, in case jumping to NewEnd invalidates the Blob
345     // pointer.
346     if (Error Err = JumpToBit(NewEnd))
347       return std::move(Err);
348     const char *Ptr = (const char *)getPointerToBit(CurBitPos, NumElts);
349 
350     // If we can return a reference to the data, do so to avoid copying it.
351     if (Blob) {
352       *Blob = StringRef(Ptr, NumElts);
353     } else {
354       // Otherwise, unpack into Vals with zero extension.
355       auto *UPtr = reinterpret_cast<const unsigned char *>(Ptr);
356       Vals.append(UPtr, UPtr + NumElts);
357     }
358   }
359 
360   return Code;
361 }
362 
363 Error BitstreamCursor::ReadAbbrevRecord() {
364   auto Abbv = std::make_shared<BitCodeAbbrev>();
365   Expected<uint32_t> MaybeNumOpInfo = ReadVBR(5);
366   if (!MaybeNumOpInfo)
367     return MaybeNumOpInfo.takeError();
368   unsigned NumOpInfo = MaybeNumOpInfo.get();
369   for (unsigned i = 0; i != NumOpInfo; ++i) {
370     Expected<word_t> MaybeIsLiteral = Read(1);
371     if (!MaybeIsLiteral)
372       return MaybeIsLiteral.takeError();
373     bool IsLiteral = MaybeIsLiteral.get();
374     if (IsLiteral) {
375       Expected<uint64_t> MaybeOp = ReadVBR64(8);
376       if (!MaybeOp)
377         return MaybeOp.takeError();
378       Abbv->Add(BitCodeAbbrevOp(MaybeOp.get()));
379       continue;
380     }
381 
382     Expected<word_t> MaybeEncoding = Read(3);
383     if (!MaybeEncoding)
384       return MaybeEncoding.takeError();
385     if (!BitCodeAbbrevOp::isValidEncoding(MaybeEncoding.get()))
386       return error("Invalid encoding");
387 
388     BitCodeAbbrevOp::Encoding E =
389         (BitCodeAbbrevOp::Encoding)MaybeEncoding.get();
390     if (BitCodeAbbrevOp::hasEncodingData(E)) {
391       Expected<uint64_t> MaybeData = ReadVBR64(5);
392       if (!MaybeData)
393         return MaybeData.takeError();
394       uint64_t Data = MaybeData.get();
395 
396       // As a special case, handle fixed(0) (i.e., a fixed field with zero bits)
397       // and vbr(0) as a literal zero.  This is decoded the same way, and avoids
398       // a slow path in Read() to have to handle reading zero bits.
399       if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) &&
400           Data == 0) {
401         Abbv->Add(BitCodeAbbrevOp(0));
402         continue;
403       }
404 
405       if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) &&
406           Data > MaxChunkSize)
407         return error("Fixed or VBR abbrev record with size > MaxChunkData");
408 
409       Abbv->Add(BitCodeAbbrevOp(E, Data));
410     } else
411       Abbv->Add(BitCodeAbbrevOp(E));
412   }
413 
414   if (Abbv->getNumOperandInfos() == 0)
415     return error("Abbrev record with no operands");
416   CurAbbrevs.push_back(std::move(Abbv));
417 
418   return Error::success();
419 }
420 
421 Expected<std::optional<BitstreamBlockInfo>>
422 BitstreamCursor::ReadBlockInfoBlock(bool ReadBlockInfoNames) {
423   if (llvm::Error Err = EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID))
424     return std::move(Err);
425 
426   BitstreamBlockInfo NewBlockInfo;
427 
428   SmallVector<uint64_t, 64> Record;
429   BitstreamBlockInfo::BlockInfo *CurBlockInfo = nullptr;
430 
431   // Read all the records for this module.
432   while (true) {
433     Expected<BitstreamEntry> MaybeEntry =
434         advanceSkippingSubblocks(AF_DontAutoprocessAbbrevs);
435     if (!MaybeEntry)
436       return MaybeEntry.takeError();
437     BitstreamEntry Entry = MaybeEntry.get();
438 
439     switch (Entry.Kind) {
440     case llvm::BitstreamEntry::SubBlock: // Handled for us already.
441     case llvm::BitstreamEntry::Error:
442       return std::nullopt;
443     case llvm::BitstreamEntry::EndBlock:
444       return std::move(NewBlockInfo);
445     case llvm::BitstreamEntry::Record:
446       // The interesting case.
447       break;
448     }
449 
450     // Read abbrev records, associate them with CurBID.
451     if (Entry.ID == bitc::DEFINE_ABBREV) {
452       if (!CurBlockInfo)
453         return std::nullopt;
454       if (Error Err = ReadAbbrevRecord())
455         return std::move(Err);
456 
457       // ReadAbbrevRecord installs the abbrev in CurAbbrevs.  Move it to the
458       // appropriate BlockInfo.
459       CurBlockInfo->Abbrevs.push_back(std::move(CurAbbrevs.back()));
460       CurAbbrevs.pop_back();
461       continue;
462     }
463 
464     // Read a record.
465     Record.clear();
466     Expected<unsigned> MaybeBlockInfo = readRecord(Entry.ID, Record);
467     if (!MaybeBlockInfo)
468       return MaybeBlockInfo.takeError();
469     switch (MaybeBlockInfo.get()) {
470     default:
471       break; // Default behavior, ignore unknown content.
472     case bitc::BLOCKINFO_CODE_SETBID:
473       if (Record.size() < 1)
474         return std::nullopt;
475       CurBlockInfo = &NewBlockInfo.getOrCreateBlockInfo((unsigned)Record[0]);
476       break;
477     case bitc::BLOCKINFO_CODE_BLOCKNAME: {
478       if (!CurBlockInfo)
479         return std::nullopt;
480       if (!ReadBlockInfoNames)
481         break; // Ignore name.
482       CurBlockInfo->Name = std::string(Record.begin(), Record.end());
483       break;
484     }
485       case bitc::BLOCKINFO_CODE_SETRECORDNAME: {
486       if (!CurBlockInfo)
487         return std::nullopt;
488       if (!ReadBlockInfoNames)
489         break; // Ignore name.
490       CurBlockInfo->RecordNames.emplace_back(
491           (unsigned)Record[0], std::string(Record.begin() + 1, Record.end()));
492       break;
493       }
494       }
495   }
496 }
497