1 //===- BitstreamReader.cpp - BitstreamReader implementation ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/Bitstream/BitstreamReader.h"
10 #include "llvm/ADT/StringRef.h"
11 #include <cassert>
12 #include <string>
13 
14 using namespace llvm;
15 
16 //===----------------------------------------------------------------------===//
17 //  BitstreamCursor implementation
18 //===----------------------------------------------------------------------===//
19 
20 /// Having read the ENTER_SUBBLOCK abbrevid, enter the block.
EnterSubBlock(unsigned BlockID,unsigned * NumWordsP)21 Error BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) {
22   // Save the current block's state on BlockScope.
23   BlockScope.push_back(Block(CurCodeSize));
24   BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
25 
26   // Add the abbrevs specific to this block to the CurAbbrevs list.
27   if (BlockInfo) {
28     if (const BitstreamBlockInfo::BlockInfo *Info =
29             BlockInfo->getBlockInfo(BlockID)) {
30       llvm::append_range(CurAbbrevs, Info->Abbrevs);
31     }
32   }
33 
34   // Get the codesize of this block.
35   Expected<uint32_t> MaybeVBR = ReadVBR(bitc::CodeLenWidth);
36   if (!MaybeVBR)
37     return MaybeVBR.takeError();
38   CurCodeSize = MaybeVBR.get();
39 
40   if (CurCodeSize > MaxChunkSize)
41     return llvm::createStringError(
42         std::errc::illegal_byte_sequence,
43         "can't read more than %zu at a time, trying to read %u", +MaxChunkSize,
44         CurCodeSize);
45 
46   SkipToFourByteBoundary();
47   Expected<word_t> MaybeNum = Read(bitc::BlockSizeWidth);
48   if (!MaybeNum)
49     return MaybeNum.takeError();
50   word_t NumWords = MaybeNum.get();
51   if (NumWordsP)
52     *NumWordsP = NumWords;
53 
54   if (CurCodeSize == 0)
55     return llvm::createStringError(
56         std::errc::illegal_byte_sequence,
57         "can't enter sub-block: current code size is 0");
58   if (AtEndOfStream())
59     return llvm::createStringError(
60         std::errc::illegal_byte_sequence,
61         "can't enter sub block: already at end of stream");
62 
63   return Error::success();
64 }
65 
readAbbreviatedField(BitstreamCursor & Cursor,const BitCodeAbbrevOp & Op)66 static Expected<uint64_t> readAbbreviatedField(BitstreamCursor &Cursor,
67                                                const BitCodeAbbrevOp &Op) {
68   assert(!Op.isLiteral() && "Not to be used with literals!");
69 
70   // Decode the value as we are commanded.
71   switch (Op.getEncoding()) {
72   case BitCodeAbbrevOp::Array:
73   case BitCodeAbbrevOp::Blob:
74     llvm_unreachable("Should not reach here");
75   case BitCodeAbbrevOp::Fixed:
76     assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
77     return Cursor.Read((unsigned)Op.getEncodingData());
78   case BitCodeAbbrevOp::VBR:
79     assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
80     return Cursor.ReadVBR64((unsigned)Op.getEncodingData());
81   case BitCodeAbbrevOp::Char6:
82     if (Expected<unsigned> Res = Cursor.Read(6))
83       return BitCodeAbbrevOp::DecodeChar6(Res.get());
84     else
85       return Res.takeError();
86   }
87   llvm_unreachable("invalid abbreviation encoding");
88 }
89 
90 /// skipRecord - Read the current record and discard it.
skipRecord(unsigned AbbrevID)91 Expected<unsigned> BitstreamCursor::skipRecord(unsigned AbbrevID) {
92   // Skip unabbreviated records by reading past their entries.
93   if (AbbrevID == bitc::UNABBREV_RECORD) {
94     Expected<uint32_t> MaybeCode = ReadVBR(6);
95     if (!MaybeCode)
96       return MaybeCode.takeError();
97     unsigned Code = MaybeCode.get();
98     Expected<uint32_t> MaybeVBR = ReadVBR(6);
99     if (!MaybeVBR)
100       return MaybeVBR.get();
101     unsigned NumElts = MaybeVBR.get();
102     for (unsigned i = 0; i != NumElts; ++i)
103       if (Expected<uint64_t> Res = ReadVBR64(6))
104         ; // Skip!
105       else
106         return Res.takeError();
107     return Code;
108   }
109 
110   const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID);
111   const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0);
112   unsigned Code;
113   if (CodeOp.isLiteral())
114     Code = CodeOp.getLiteralValue();
115   else {
116     if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array ||
117         CodeOp.getEncoding() == BitCodeAbbrevOp::Blob)
118       return llvm::createStringError(
119           std::errc::illegal_byte_sequence,
120           "Abbreviation starts with an Array or a Blob");
121     Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp);
122     if (!MaybeCode)
123       return MaybeCode.takeError();
124     Code = MaybeCode.get();
125   }
126 
127   for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i < e; ++i) {
128     const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
129     if (Op.isLiteral())
130       continue;
131 
132     if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
133         Op.getEncoding() != BitCodeAbbrevOp::Blob) {
134       if (Expected<uint64_t> MaybeField = readAbbreviatedField(*this, Op))
135         continue;
136       else
137         return MaybeField.takeError();
138     }
139 
140     if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
141       // Array case.  Read the number of elements as a vbr6.
142       Expected<uint32_t> MaybeNum = ReadVBR(6);
143       if (!MaybeNum)
144         return MaybeNum.takeError();
145       unsigned NumElts = MaybeNum.get();
146 
147       // Get the element encoding.
148       assert(i+2 == e && "array op not second to last?");
149       const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
150 
151       // Read all the elements.
152       // Decode the value as we are commanded.
153       switch (EltEnc.getEncoding()) {
154       default:
155         report_fatal_error("Array element type can't be an Array or a Blob");
156       case BitCodeAbbrevOp::Fixed:
157         assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize);
158         if (Error Err =
159                 JumpToBit(GetCurrentBitNo() + static_cast<uint64_t>(NumElts) *
160                                                   EltEnc.getEncodingData()))
161           return std::move(Err);
162         break;
163       case BitCodeAbbrevOp::VBR:
164         assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize);
165         for (; NumElts; --NumElts)
166           if (Expected<uint64_t> Res =
167                   ReadVBR64((unsigned)EltEnc.getEncodingData()))
168             ; // Skip!
169           else
170             return Res.takeError();
171         break;
172       case BitCodeAbbrevOp::Char6:
173         if (Error Err = JumpToBit(GetCurrentBitNo() + NumElts * 6))
174           return std::move(Err);
175         break;
176       }
177       continue;
178     }
179 
180     assert(Op.getEncoding() == BitCodeAbbrevOp::Blob);
181     // Blob case.  Read the number of bytes as a vbr6.
182     Expected<uint32_t> MaybeNum = ReadVBR(6);
183     if (!MaybeNum)
184       return MaybeNum.takeError();
185     unsigned NumElts = MaybeNum.get();
186     SkipToFourByteBoundary();  // 32-bit alignment
187 
188     // Figure out where the end of this blob will be including tail padding.
189     const size_t NewEnd = GetCurrentBitNo() + alignTo(NumElts, 4) * 8;
190 
191     // If this would read off the end of the bitcode file, just set the
192     // record to empty and return.
193     if (!canSkipToPos(NewEnd/8)) {
194       skipToEnd();
195       break;
196     }
197 
198     // Skip over the blob.
199     if (Error Err = JumpToBit(NewEnd))
200       return std::move(Err);
201   }
202   return Code;
203 }
204 
readRecord(unsigned AbbrevID,SmallVectorImpl<uint64_t> & Vals,StringRef * Blob)205 Expected<unsigned> BitstreamCursor::readRecord(unsigned AbbrevID,
206                                                SmallVectorImpl<uint64_t> &Vals,
207                                                StringRef *Blob) {
208   if (AbbrevID == bitc::UNABBREV_RECORD) {
209     Expected<uint32_t> MaybeCode = ReadVBR(6);
210     if (!MaybeCode)
211       return MaybeCode.takeError();
212     uint32_t Code = MaybeCode.get();
213     Expected<uint32_t> MaybeNumElts = ReadVBR(6);
214     if (!MaybeNumElts)
215       return MaybeNumElts.takeError();
216     uint32_t NumElts = MaybeNumElts.get();
217     Vals.reserve(Vals.size() + NumElts);
218 
219     for (unsigned i = 0; i != NumElts; ++i)
220       if (Expected<uint64_t> MaybeVal = ReadVBR64(6))
221         Vals.push_back(MaybeVal.get());
222       else
223         return MaybeVal.takeError();
224     return Code;
225   }
226 
227   const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID);
228 
229   // Read the record code first.
230   assert(Abbv->getNumOperandInfos() != 0 && "no record code in abbreviation?");
231   const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0);
232   unsigned Code;
233   if (CodeOp.isLiteral())
234     Code = CodeOp.getLiteralValue();
235   else {
236     if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array ||
237         CodeOp.getEncoding() == BitCodeAbbrevOp::Blob)
238       report_fatal_error("Abbreviation starts with an Array or a Blob");
239     if (Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp))
240       Code = MaybeCode.get();
241     else
242       return MaybeCode.takeError();
243   }
244 
245   for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) {
246     const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
247     if (Op.isLiteral()) {
248       Vals.push_back(Op.getLiteralValue());
249       continue;
250     }
251 
252     if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
253         Op.getEncoding() != BitCodeAbbrevOp::Blob) {
254       if (Expected<uint64_t> MaybeVal = readAbbreviatedField(*this, Op))
255         Vals.push_back(MaybeVal.get());
256       else
257         return MaybeVal.takeError();
258       continue;
259     }
260 
261     if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
262       // Array case.  Read the number of elements as a vbr6.
263       Expected<uint32_t> MaybeNumElts = ReadVBR(6);
264       if (!MaybeNumElts)
265         return MaybeNumElts.takeError();
266       uint32_t NumElts = MaybeNumElts.get();
267       Vals.reserve(Vals.size() + NumElts);
268 
269       // Get the element encoding.
270       if (i + 2 != e)
271         report_fatal_error("Array op not second to last");
272       const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
273       if (!EltEnc.isEncoding())
274         report_fatal_error(
275             "Array element type has to be an encoding of a type");
276 
277       // Read all the elements.
278       switch (EltEnc.getEncoding()) {
279       default:
280         report_fatal_error("Array element type can't be an Array or a Blob");
281       case BitCodeAbbrevOp::Fixed:
282         for (; NumElts; --NumElts)
283           if (Expected<SimpleBitstreamCursor::word_t> MaybeVal =
284                   Read((unsigned)EltEnc.getEncodingData()))
285             Vals.push_back(MaybeVal.get());
286           else
287             return MaybeVal.takeError();
288         break;
289       case BitCodeAbbrevOp::VBR:
290         for (; NumElts; --NumElts)
291           if (Expected<uint64_t> MaybeVal =
292                   ReadVBR64((unsigned)EltEnc.getEncodingData()))
293             Vals.push_back(MaybeVal.get());
294           else
295             return MaybeVal.takeError();
296         break;
297       case BitCodeAbbrevOp::Char6:
298         for (; NumElts; --NumElts)
299           if (Expected<SimpleBitstreamCursor::word_t> MaybeVal = Read(6))
300             Vals.push_back(BitCodeAbbrevOp::DecodeChar6(MaybeVal.get()));
301           else
302             return MaybeVal.takeError();
303       }
304       continue;
305     }
306 
307     assert(Op.getEncoding() == BitCodeAbbrevOp::Blob);
308     // Blob case.  Read the number of bytes as a vbr6.
309     Expected<uint32_t> MaybeNumElts = ReadVBR(6);
310     if (!MaybeNumElts)
311       return MaybeNumElts.takeError();
312     uint32_t NumElts = MaybeNumElts.get();
313     SkipToFourByteBoundary();  // 32-bit alignment
314 
315     // Figure out where the end of this blob will be including tail padding.
316     size_t CurBitPos = GetCurrentBitNo();
317     const size_t NewEnd = CurBitPos + alignTo(NumElts, 4) * 8;
318 
319     // If this would read off the end of the bitcode file, just set the
320     // record to empty and return.
321     if (!canSkipToPos(NewEnd/8)) {
322       Vals.append(NumElts, 0);
323       skipToEnd();
324       break;
325     }
326 
327     // Otherwise, inform the streamer that we need these bytes in memory.  Skip
328     // over tail padding first, in case jumping to NewEnd invalidates the Blob
329     // pointer.
330     if (Error Err = JumpToBit(NewEnd))
331       return std::move(Err);
332     const char *Ptr = (const char *)getPointerToBit(CurBitPos, NumElts);
333 
334     // If we can return a reference to the data, do so to avoid copying it.
335     if (Blob) {
336       *Blob = StringRef(Ptr, NumElts);
337     } else {
338       // Otherwise, unpack into Vals with zero extension.
339       auto *UPtr = reinterpret_cast<const unsigned char *>(Ptr);
340       Vals.append(UPtr, UPtr + NumElts);
341     }
342   }
343 
344   return Code;
345 }
346 
ReadAbbrevRecord()347 Error BitstreamCursor::ReadAbbrevRecord() {
348   auto Abbv = std::make_shared<BitCodeAbbrev>();
349   Expected<uint32_t> MaybeNumOpInfo = ReadVBR(5);
350   if (!MaybeNumOpInfo)
351     return MaybeNumOpInfo.takeError();
352   unsigned NumOpInfo = MaybeNumOpInfo.get();
353   for (unsigned i = 0; i != NumOpInfo; ++i) {
354     Expected<word_t> MaybeIsLiteral = Read(1);
355     if (!MaybeIsLiteral)
356       return MaybeIsLiteral.takeError();
357     bool IsLiteral = MaybeIsLiteral.get();
358     if (IsLiteral) {
359       Expected<uint64_t> MaybeOp = ReadVBR64(8);
360       if (!MaybeOp)
361         return MaybeOp.takeError();
362       Abbv->Add(BitCodeAbbrevOp(MaybeOp.get()));
363       continue;
364     }
365 
366     Expected<word_t> MaybeEncoding = Read(3);
367     if (!MaybeEncoding)
368       return MaybeEncoding.takeError();
369     BitCodeAbbrevOp::Encoding E =
370         (BitCodeAbbrevOp::Encoding)MaybeEncoding.get();
371     if (BitCodeAbbrevOp::hasEncodingData(E)) {
372       Expected<uint64_t> MaybeData = ReadVBR64(5);
373       if (!MaybeData)
374         return MaybeData.takeError();
375       uint64_t Data = MaybeData.get();
376 
377       // As a special case, handle fixed(0) (i.e., a fixed field with zero bits)
378       // and vbr(0) as a literal zero.  This is decoded the same way, and avoids
379       // a slow path in Read() to have to handle reading zero bits.
380       if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) &&
381           Data == 0) {
382         Abbv->Add(BitCodeAbbrevOp(0));
383         continue;
384       }
385 
386       if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) &&
387           Data > MaxChunkSize)
388         report_fatal_error(
389             "Fixed or VBR abbrev record with size > MaxChunkData");
390 
391       Abbv->Add(BitCodeAbbrevOp(E, Data));
392     } else
393       Abbv->Add(BitCodeAbbrevOp(E));
394   }
395 
396   if (Abbv->getNumOperandInfos() == 0)
397     report_fatal_error("Abbrev record with no operands");
398   CurAbbrevs.push_back(std::move(Abbv));
399 
400   return Error::success();
401 }
402 
403 Expected<Optional<BitstreamBlockInfo>>
ReadBlockInfoBlock(bool ReadBlockInfoNames)404 BitstreamCursor::ReadBlockInfoBlock(bool ReadBlockInfoNames) {
405   if (llvm::Error Err = EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID))
406     return std::move(Err);
407 
408   BitstreamBlockInfo NewBlockInfo;
409 
410   SmallVector<uint64_t, 64> Record;
411   BitstreamBlockInfo::BlockInfo *CurBlockInfo = nullptr;
412 
413   // Read all the records for this module.
414   while (true) {
415     Expected<BitstreamEntry> MaybeEntry =
416         advanceSkippingSubblocks(AF_DontAutoprocessAbbrevs);
417     if (!MaybeEntry)
418       return MaybeEntry.takeError();
419     BitstreamEntry Entry = MaybeEntry.get();
420 
421     switch (Entry.Kind) {
422     case llvm::BitstreamEntry::SubBlock: // Handled for us already.
423     case llvm::BitstreamEntry::Error:
424       return None;
425     case llvm::BitstreamEntry::EndBlock:
426       return std::move(NewBlockInfo);
427     case llvm::BitstreamEntry::Record:
428       // The interesting case.
429       break;
430     }
431 
432     // Read abbrev records, associate them with CurBID.
433     if (Entry.ID == bitc::DEFINE_ABBREV) {
434       if (!CurBlockInfo) return None;
435       if (Error Err = ReadAbbrevRecord())
436         return std::move(Err);
437 
438       // ReadAbbrevRecord installs the abbrev in CurAbbrevs.  Move it to the
439       // appropriate BlockInfo.
440       CurBlockInfo->Abbrevs.push_back(std::move(CurAbbrevs.back()));
441       CurAbbrevs.pop_back();
442       continue;
443     }
444 
445     // Read a record.
446     Record.clear();
447     Expected<unsigned> MaybeBlockInfo = readRecord(Entry.ID, Record);
448     if (!MaybeBlockInfo)
449       return MaybeBlockInfo.takeError();
450     switch (MaybeBlockInfo.get()) {
451     default:
452       break; // Default behavior, ignore unknown content.
453     case bitc::BLOCKINFO_CODE_SETBID:
454       if (Record.size() < 1)
455         return None;
456       CurBlockInfo = &NewBlockInfo.getOrCreateBlockInfo((unsigned)Record[0]);
457       break;
458     case bitc::BLOCKINFO_CODE_BLOCKNAME: {
459       if (!CurBlockInfo)
460         return None;
461       if (!ReadBlockInfoNames)
462         break; // Ignore name.
463       CurBlockInfo->Name = std::string(Record.begin(), Record.end());
464       break;
465     }
466       case bitc::BLOCKINFO_CODE_SETRECORDNAME: {
467         if (!CurBlockInfo) return None;
468         if (!ReadBlockInfoNames)
469           break; // Ignore name.
470         CurBlockInfo->RecordNames.emplace_back(
471             (unsigned)Record[0], std::string(Record.begin() + 1, Record.end()));
472         break;
473       }
474       }
475   }
476 }
477