1 //===- BitcodeAnalyzer.cpp - Internal BitcodeAnalyzer implementation ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/Bitcode/BitcodeAnalyzer.h"
10 #include "llvm/Bitcode/BitcodeReader.h"
11 #include "llvm/Bitcode/LLVMBitCodes.h"
12 #include "llvm/Bitstream/BitCodes.h"
13 #include "llvm/Bitstream/BitstreamReader.h"
14 #include "llvm/Support/Format.h"
15 #include "llvm/Support/SHA1.h"
16 
17 using namespace llvm;
18 
19 static Error reportError(StringRef Message) {
20   return createStringError(std::errc::illegal_byte_sequence, Message.data());
21 }
22 
23 /// Return a symbolic block name if known, otherwise return null.
24 static Optional<const char *> GetBlockName(unsigned BlockID,
25                                            const BitstreamBlockInfo &BlockInfo,
26                                            CurStreamTypeType CurStreamType) {
27   // Standard blocks for all bitcode files.
28   if (BlockID < bitc::FIRST_APPLICATION_BLOCKID) {
29     if (BlockID == bitc::BLOCKINFO_BLOCK_ID)
30       return "BLOCKINFO_BLOCK";
31     return None;
32   }
33 
34   // Check to see if we have a blockinfo record for this block, with a name.
35   if (const BitstreamBlockInfo::BlockInfo *Info =
36           BlockInfo.getBlockInfo(BlockID)) {
37     if (!Info->Name.empty())
38       return Info->Name.c_str();
39   }
40 
41   if (CurStreamType != LLVMIRBitstream)
42     return None;
43 
44   switch (BlockID) {
45   default:
46     return None;
47   case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID:
48     return "OPERAND_BUNDLE_TAGS_BLOCK";
49   case bitc::MODULE_BLOCK_ID:
50     return "MODULE_BLOCK";
51   case bitc::PARAMATTR_BLOCK_ID:
52     return "PARAMATTR_BLOCK";
53   case bitc::PARAMATTR_GROUP_BLOCK_ID:
54     return "PARAMATTR_GROUP_BLOCK_ID";
55   case bitc::TYPE_BLOCK_ID_NEW:
56     return "TYPE_BLOCK_ID";
57   case bitc::CONSTANTS_BLOCK_ID:
58     return "CONSTANTS_BLOCK";
59   case bitc::FUNCTION_BLOCK_ID:
60     return "FUNCTION_BLOCK";
61   case bitc::IDENTIFICATION_BLOCK_ID:
62     return "IDENTIFICATION_BLOCK_ID";
63   case bitc::VALUE_SYMTAB_BLOCK_ID:
64     return "VALUE_SYMTAB";
65   case bitc::METADATA_BLOCK_ID:
66     return "METADATA_BLOCK";
67   case bitc::METADATA_KIND_BLOCK_ID:
68     return "METADATA_KIND_BLOCK";
69   case bitc::METADATA_ATTACHMENT_ID:
70     return "METADATA_ATTACHMENT_BLOCK";
71   case bitc::USELIST_BLOCK_ID:
72     return "USELIST_BLOCK_ID";
73   case bitc::GLOBALVAL_SUMMARY_BLOCK_ID:
74     return "GLOBALVAL_SUMMARY_BLOCK";
75   case bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID:
76     return "FULL_LTO_GLOBALVAL_SUMMARY_BLOCK";
77   case bitc::MODULE_STRTAB_BLOCK_ID:
78     return "MODULE_STRTAB_BLOCK";
79   case bitc::STRTAB_BLOCK_ID:
80     return "STRTAB_BLOCK";
81   case bitc::SYMTAB_BLOCK_ID:
82     return "SYMTAB_BLOCK";
83   }
84 }
85 
86 /// Return a symbolic code name if known, otherwise return null.
87 static Optional<const char *> GetCodeName(unsigned CodeID, unsigned BlockID,
88                                           const BitstreamBlockInfo &BlockInfo,
89                                           CurStreamTypeType CurStreamType) {
90   // Standard blocks for all bitcode files.
91   if (BlockID < bitc::FIRST_APPLICATION_BLOCKID) {
92     if (BlockID == bitc::BLOCKINFO_BLOCK_ID) {
93       switch (CodeID) {
94       default:
95         return None;
96       case bitc::BLOCKINFO_CODE_SETBID:
97         return "SETBID";
98       case bitc::BLOCKINFO_CODE_BLOCKNAME:
99         return "BLOCKNAME";
100       case bitc::BLOCKINFO_CODE_SETRECORDNAME:
101         return "SETRECORDNAME";
102       }
103     }
104     return None;
105   }
106 
107   // Check to see if we have a blockinfo record for this record, with a name.
108   if (const BitstreamBlockInfo::BlockInfo *Info =
109           BlockInfo.getBlockInfo(BlockID)) {
110     for (unsigned i = 0, e = Info->RecordNames.size(); i != e; ++i)
111       if (Info->RecordNames[i].first == CodeID)
112         return Info->RecordNames[i].second.c_str();
113   }
114 
115   if (CurStreamType != LLVMIRBitstream)
116     return None;
117 
118 #define STRINGIFY_CODE(PREFIX, CODE)                                           \
119   case bitc::PREFIX##_##CODE:                                                  \
120     return #CODE;
121   switch (BlockID) {
122   default:
123     return None;
124   case bitc::MODULE_BLOCK_ID:
125     switch (CodeID) {
126     default:
127       return None;
128       STRINGIFY_CODE(MODULE_CODE, VERSION)
129       STRINGIFY_CODE(MODULE_CODE, TRIPLE)
130       STRINGIFY_CODE(MODULE_CODE, DATALAYOUT)
131       STRINGIFY_CODE(MODULE_CODE, ASM)
132       STRINGIFY_CODE(MODULE_CODE, SECTIONNAME)
133       STRINGIFY_CODE(MODULE_CODE, DEPLIB) // Deprecated, present in old bitcode
134       STRINGIFY_CODE(MODULE_CODE, GLOBALVAR)
135       STRINGIFY_CODE(MODULE_CODE, FUNCTION)
136       STRINGIFY_CODE(MODULE_CODE, ALIAS)
137       STRINGIFY_CODE(MODULE_CODE, GCNAME)
138       STRINGIFY_CODE(MODULE_CODE, COMDAT)
139       STRINGIFY_CODE(MODULE_CODE, VSTOFFSET)
140       STRINGIFY_CODE(MODULE_CODE, METADATA_VALUES_UNUSED)
141       STRINGIFY_CODE(MODULE_CODE, SOURCE_FILENAME)
142       STRINGIFY_CODE(MODULE_CODE, HASH)
143     }
144   case bitc::IDENTIFICATION_BLOCK_ID:
145     switch (CodeID) {
146     default:
147       return None;
148       STRINGIFY_CODE(IDENTIFICATION_CODE, STRING)
149       STRINGIFY_CODE(IDENTIFICATION_CODE, EPOCH)
150     }
151   case bitc::PARAMATTR_BLOCK_ID:
152     switch (CodeID) {
153     default:
154       return None;
155     // FIXME: Should these be different?
156     case bitc::PARAMATTR_CODE_ENTRY_OLD:
157       return "ENTRY";
158     case bitc::PARAMATTR_CODE_ENTRY:
159       return "ENTRY";
160     }
161   case bitc::PARAMATTR_GROUP_BLOCK_ID:
162     switch (CodeID) {
163     default:
164       return None;
165     case bitc::PARAMATTR_GRP_CODE_ENTRY:
166       return "ENTRY";
167     }
168   case bitc::TYPE_BLOCK_ID_NEW:
169     switch (CodeID) {
170     default:
171       return None;
172       STRINGIFY_CODE(TYPE_CODE, NUMENTRY)
173       STRINGIFY_CODE(TYPE_CODE, VOID)
174       STRINGIFY_CODE(TYPE_CODE, FLOAT)
175       STRINGIFY_CODE(TYPE_CODE, DOUBLE)
176       STRINGIFY_CODE(TYPE_CODE, LABEL)
177       STRINGIFY_CODE(TYPE_CODE, OPAQUE)
178       STRINGIFY_CODE(TYPE_CODE, INTEGER)
179       STRINGIFY_CODE(TYPE_CODE, POINTER)
180       STRINGIFY_CODE(TYPE_CODE, HALF)
181       STRINGIFY_CODE(TYPE_CODE, ARRAY)
182       STRINGIFY_CODE(TYPE_CODE, VECTOR)
183       STRINGIFY_CODE(TYPE_CODE, X86_FP80)
184       STRINGIFY_CODE(TYPE_CODE, FP128)
185       STRINGIFY_CODE(TYPE_CODE, PPC_FP128)
186       STRINGIFY_CODE(TYPE_CODE, METADATA)
187       STRINGIFY_CODE(TYPE_CODE, X86_MMX)
188       STRINGIFY_CODE(TYPE_CODE, STRUCT_ANON)
189       STRINGIFY_CODE(TYPE_CODE, STRUCT_NAME)
190       STRINGIFY_CODE(TYPE_CODE, STRUCT_NAMED)
191       STRINGIFY_CODE(TYPE_CODE, FUNCTION)
192       STRINGIFY_CODE(TYPE_CODE, TOKEN)
193       STRINGIFY_CODE(TYPE_CODE, BFLOAT)
194     }
195 
196   case bitc::CONSTANTS_BLOCK_ID:
197     switch (CodeID) {
198     default:
199       return None;
200       STRINGIFY_CODE(CST_CODE, SETTYPE)
201       STRINGIFY_CODE(CST_CODE, NULL)
202       STRINGIFY_CODE(CST_CODE, UNDEF)
203       STRINGIFY_CODE(CST_CODE, INTEGER)
204       STRINGIFY_CODE(CST_CODE, WIDE_INTEGER)
205       STRINGIFY_CODE(CST_CODE, FLOAT)
206       STRINGIFY_CODE(CST_CODE, AGGREGATE)
207       STRINGIFY_CODE(CST_CODE, STRING)
208       STRINGIFY_CODE(CST_CODE, CSTRING)
209       STRINGIFY_CODE(CST_CODE, CE_BINOP)
210       STRINGIFY_CODE(CST_CODE, CE_CAST)
211       STRINGIFY_CODE(CST_CODE, CE_GEP)
212       STRINGIFY_CODE(CST_CODE, CE_INBOUNDS_GEP)
213       STRINGIFY_CODE(CST_CODE, CE_SELECT)
214       STRINGIFY_CODE(CST_CODE, CE_EXTRACTELT)
215       STRINGIFY_CODE(CST_CODE, CE_INSERTELT)
216       STRINGIFY_CODE(CST_CODE, CE_SHUFFLEVEC)
217       STRINGIFY_CODE(CST_CODE, CE_CMP)
218       STRINGIFY_CODE(CST_CODE, INLINEASM)
219       STRINGIFY_CODE(CST_CODE, CE_SHUFVEC_EX)
220       STRINGIFY_CODE(CST_CODE, CE_UNOP)
221     case bitc::CST_CODE_BLOCKADDRESS:
222       return "CST_CODE_BLOCKADDRESS";
223       STRINGIFY_CODE(CST_CODE, DATA)
224     }
225   case bitc::FUNCTION_BLOCK_ID:
226     switch (CodeID) {
227     default:
228       return None;
229       STRINGIFY_CODE(FUNC_CODE, DECLAREBLOCKS)
230       STRINGIFY_CODE(FUNC_CODE, INST_BINOP)
231       STRINGIFY_CODE(FUNC_CODE, INST_CAST)
232       STRINGIFY_CODE(FUNC_CODE, INST_GEP_OLD)
233       STRINGIFY_CODE(FUNC_CODE, INST_INBOUNDS_GEP_OLD)
234       STRINGIFY_CODE(FUNC_CODE, INST_SELECT)
235       STRINGIFY_CODE(FUNC_CODE, INST_EXTRACTELT)
236       STRINGIFY_CODE(FUNC_CODE, INST_INSERTELT)
237       STRINGIFY_CODE(FUNC_CODE, INST_SHUFFLEVEC)
238       STRINGIFY_CODE(FUNC_CODE, INST_CMP)
239       STRINGIFY_CODE(FUNC_CODE, INST_RET)
240       STRINGIFY_CODE(FUNC_CODE, INST_BR)
241       STRINGIFY_CODE(FUNC_CODE, INST_SWITCH)
242       STRINGIFY_CODE(FUNC_CODE, INST_INVOKE)
243       STRINGIFY_CODE(FUNC_CODE, INST_UNOP)
244       STRINGIFY_CODE(FUNC_CODE, INST_UNREACHABLE)
245       STRINGIFY_CODE(FUNC_CODE, INST_CLEANUPRET)
246       STRINGIFY_CODE(FUNC_CODE, INST_CATCHRET)
247       STRINGIFY_CODE(FUNC_CODE, INST_CATCHPAD)
248       STRINGIFY_CODE(FUNC_CODE, INST_PHI)
249       STRINGIFY_CODE(FUNC_CODE, INST_ALLOCA)
250       STRINGIFY_CODE(FUNC_CODE, INST_LOAD)
251       STRINGIFY_CODE(FUNC_CODE, INST_VAARG)
252       STRINGIFY_CODE(FUNC_CODE, INST_STORE)
253       STRINGIFY_CODE(FUNC_CODE, INST_EXTRACTVAL)
254       STRINGIFY_CODE(FUNC_CODE, INST_INSERTVAL)
255       STRINGIFY_CODE(FUNC_CODE, INST_CMP2)
256       STRINGIFY_CODE(FUNC_CODE, INST_VSELECT)
257       STRINGIFY_CODE(FUNC_CODE, DEBUG_LOC_AGAIN)
258       STRINGIFY_CODE(FUNC_CODE, INST_CALL)
259       STRINGIFY_CODE(FUNC_CODE, DEBUG_LOC)
260       STRINGIFY_CODE(FUNC_CODE, INST_GEP)
261       STRINGIFY_CODE(FUNC_CODE, OPERAND_BUNDLE)
262       STRINGIFY_CODE(FUNC_CODE, INST_FENCE)
263       STRINGIFY_CODE(FUNC_CODE, INST_ATOMICRMW)
264       STRINGIFY_CODE(FUNC_CODE, INST_LOADATOMIC)
265       STRINGIFY_CODE(FUNC_CODE, INST_STOREATOMIC)
266       STRINGIFY_CODE(FUNC_CODE, INST_CMPXCHG)
267       STRINGIFY_CODE(FUNC_CODE, INST_CALLBR)
268     }
269   case bitc::VALUE_SYMTAB_BLOCK_ID:
270     switch (CodeID) {
271     default:
272       return None;
273       STRINGIFY_CODE(VST_CODE, ENTRY)
274       STRINGIFY_CODE(VST_CODE, BBENTRY)
275       STRINGIFY_CODE(VST_CODE, FNENTRY)
276       STRINGIFY_CODE(VST_CODE, COMBINED_ENTRY)
277     }
278   case bitc::MODULE_STRTAB_BLOCK_ID:
279     switch (CodeID) {
280     default:
281       return None;
282       STRINGIFY_CODE(MST_CODE, ENTRY)
283       STRINGIFY_CODE(MST_CODE, HASH)
284     }
285   case bitc::GLOBALVAL_SUMMARY_BLOCK_ID:
286   case bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID:
287     switch (CodeID) {
288     default:
289       return None;
290       STRINGIFY_CODE(FS, PERMODULE)
291       STRINGIFY_CODE(FS, PERMODULE_PROFILE)
292       STRINGIFY_CODE(FS, PERMODULE_RELBF)
293       STRINGIFY_CODE(FS, PERMODULE_GLOBALVAR_INIT_REFS)
294       STRINGIFY_CODE(FS, PERMODULE_VTABLE_GLOBALVAR_INIT_REFS)
295       STRINGIFY_CODE(FS, COMBINED)
296       STRINGIFY_CODE(FS, COMBINED_PROFILE)
297       STRINGIFY_CODE(FS, COMBINED_GLOBALVAR_INIT_REFS)
298       STRINGIFY_CODE(FS, ALIAS)
299       STRINGIFY_CODE(FS, COMBINED_ALIAS)
300       STRINGIFY_CODE(FS, COMBINED_ORIGINAL_NAME)
301       STRINGIFY_CODE(FS, VERSION)
302       STRINGIFY_CODE(FS, FLAGS)
303       STRINGIFY_CODE(FS, TYPE_TESTS)
304       STRINGIFY_CODE(FS, TYPE_TEST_ASSUME_VCALLS)
305       STRINGIFY_CODE(FS, TYPE_CHECKED_LOAD_VCALLS)
306       STRINGIFY_CODE(FS, TYPE_TEST_ASSUME_CONST_VCALL)
307       STRINGIFY_CODE(FS, TYPE_CHECKED_LOAD_CONST_VCALL)
308       STRINGIFY_CODE(FS, VALUE_GUID)
309       STRINGIFY_CODE(FS, CFI_FUNCTION_DEFS)
310       STRINGIFY_CODE(FS, CFI_FUNCTION_DECLS)
311       STRINGIFY_CODE(FS, TYPE_ID)
312       STRINGIFY_CODE(FS, TYPE_ID_METADATA)
313       STRINGIFY_CODE(FS, BLOCK_COUNT)
314       STRINGIFY_CODE(FS, PARAM_ACCESS)
315     }
316   case bitc::METADATA_ATTACHMENT_ID:
317     switch (CodeID) {
318     default:
319       return None;
320       STRINGIFY_CODE(METADATA, ATTACHMENT)
321     }
322   case bitc::METADATA_BLOCK_ID:
323     switch (CodeID) {
324     default:
325       return None;
326       STRINGIFY_CODE(METADATA, STRING_OLD)
327       STRINGIFY_CODE(METADATA, VALUE)
328       STRINGIFY_CODE(METADATA, NODE)
329       STRINGIFY_CODE(METADATA, NAME)
330       STRINGIFY_CODE(METADATA, DISTINCT_NODE)
331       STRINGIFY_CODE(METADATA, KIND) // Older bitcode has it in a MODULE_BLOCK
332       STRINGIFY_CODE(METADATA, LOCATION)
333       STRINGIFY_CODE(METADATA, OLD_NODE)
334       STRINGIFY_CODE(METADATA, OLD_FN_NODE)
335       STRINGIFY_CODE(METADATA, NAMED_NODE)
336       STRINGIFY_CODE(METADATA, GENERIC_DEBUG)
337       STRINGIFY_CODE(METADATA, SUBRANGE)
338       STRINGIFY_CODE(METADATA, ENUMERATOR)
339       STRINGIFY_CODE(METADATA, BASIC_TYPE)
340       STRINGIFY_CODE(METADATA, FILE)
341       STRINGIFY_CODE(METADATA, DERIVED_TYPE)
342       STRINGIFY_CODE(METADATA, COMPOSITE_TYPE)
343       STRINGIFY_CODE(METADATA, SUBROUTINE_TYPE)
344       STRINGIFY_CODE(METADATA, COMPILE_UNIT)
345       STRINGIFY_CODE(METADATA, SUBPROGRAM)
346       STRINGIFY_CODE(METADATA, LEXICAL_BLOCK)
347       STRINGIFY_CODE(METADATA, LEXICAL_BLOCK_FILE)
348       STRINGIFY_CODE(METADATA, NAMESPACE)
349       STRINGIFY_CODE(METADATA, TEMPLATE_TYPE)
350       STRINGIFY_CODE(METADATA, TEMPLATE_VALUE)
351       STRINGIFY_CODE(METADATA, GLOBAL_VAR)
352       STRINGIFY_CODE(METADATA, LOCAL_VAR)
353       STRINGIFY_CODE(METADATA, EXPRESSION)
354       STRINGIFY_CODE(METADATA, OBJC_PROPERTY)
355       STRINGIFY_CODE(METADATA, IMPORTED_ENTITY)
356       STRINGIFY_CODE(METADATA, MODULE)
357       STRINGIFY_CODE(METADATA, MACRO)
358       STRINGIFY_CODE(METADATA, MACRO_FILE)
359       STRINGIFY_CODE(METADATA, STRINGS)
360       STRINGIFY_CODE(METADATA, GLOBAL_DECL_ATTACHMENT)
361       STRINGIFY_CODE(METADATA, GLOBAL_VAR_EXPR)
362       STRINGIFY_CODE(METADATA, INDEX_OFFSET)
363       STRINGIFY_CODE(METADATA, INDEX)
364     }
365   case bitc::METADATA_KIND_BLOCK_ID:
366     switch (CodeID) {
367     default:
368       return None;
369       STRINGIFY_CODE(METADATA, KIND)
370     }
371   case bitc::USELIST_BLOCK_ID:
372     switch (CodeID) {
373     default:
374       return None;
375     case bitc::USELIST_CODE_DEFAULT:
376       return "USELIST_CODE_DEFAULT";
377     case bitc::USELIST_CODE_BB:
378       return "USELIST_CODE_BB";
379     }
380 
381   case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID:
382     switch (CodeID) {
383     default:
384       return None;
385     case bitc::OPERAND_BUNDLE_TAG:
386       return "OPERAND_BUNDLE_TAG";
387     }
388   case bitc::STRTAB_BLOCK_ID:
389     switch (CodeID) {
390     default:
391       return None;
392     case bitc::STRTAB_BLOB:
393       return "BLOB";
394     }
395   case bitc::SYMTAB_BLOCK_ID:
396     switch (CodeID) {
397     default:
398       return None;
399     case bitc::SYMTAB_BLOB:
400       return "BLOB";
401     }
402   }
403 #undef STRINGIFY_CODE
404 }
405 
406 static void printSize(raw_ostream &OS, double Bits) {
407   OS << format("%.2f/%.2fB/%luW", Bits, Bits / 8, (unsigned long)(Bits / 32));
408 }
409 static void printSize(raw_ostream &OS, uint64_t Bits) {
410   OS << format("%lub/%.2fB/%luW", (unsigned long)Bits, (double)Bits / 8,
411                (unsigned long)(Bits / 32));
412 }
413 
414 static Expected<CurStreamTypeType> ReadSignature(BitstreamCursor &Stream) {
415   auto tryRead = [&Stream](char &Dest, size_t size) -> Error {
416     if (Expected<SimpleBitstreamCursor::word_t> MaybeWord = Stream.Read(size))
417       Dest = MaybeWord.get();
418     else
419       return MaybeWord.takeError();
420     return Error::success();
421   };
422 
423   char Signature[6];
424   if (Error Err = tryRead(Signature[0], 8))
425     return std::move(Err);
426   if (Error Err = tryRead(Signature[1], 8))
427     return std::move(Err);
428 
429   // Autodetect the file contents, if it is one we know.
430   if (Signature[0] == 'C' && Signature[1] == 'P') {
431     if (Error Err = tryRead(Signature[2], 8))
432       return std::move(Err);
433     if (Error Err = tryRead(Signature[3], 8))
434       return std::move(Err);
435     if (Signature[2] == 'C' && Signature[3] == 'H')
436       return ClangSerializedASTBitstream;
437   } else if (Signature[0] == 'D' && Signature[1] == 'I') {
438     if (Error Err = tryRead(Signature[2], 8))
439       return std::move(Err);
440     if (Error Err = tryRead(Signature[3], 8))
441       return std::move(Err);
442     if (Signature[2] == 'A' && Signature[3] == 'G')
443       return ClangSerializedDiagnosticsBitstream;
444   } else if (Signature[0] == 'R' && Signature[1] == 'M') {
445     if (Error Err = tryRead(Signature[2], 8))
446       return std::move(Err);
447     if (Error Err = tryRead(Signature[3], 8))
448       return std::move(Err);
449     if (Signature[2] == 'R' && Signature[3] == 'K')
450       return LLVMBitstreamRemarks;
451   } else {
452     if (Error Err = tryRead(Signature[2], 4))
453       return std::move(Err);
454     if (Error Err = tryRead(Signature[3], 4))
455       return std::move(Err);
456     if (Error Err = tryRead(Signature[4], 4))
457       return std::move(Err);
458     if (Error Err = tryRead(Signature[5], 4))
459       return std::move(Err);
460     if (Signature[0] == 'B' && Signature[1] == 'C' && Signature[2] == 0x0 &&
461         Signature[3] == 0xC && Signature[4] == 0xE && Signature[5] == 0xD)
462       return LLVMIRBitstream;
463   }
464   return UnknownBitstream;
465 }
466 
467 static Expected<CurStreamTypeType> analyzeHeader(Optional<BCDumpOptions> O,
468                                                  BitstreamCursor &Stream) {
469   ArrayRef<uint8_t> Bytes = Stream.getBitcodeBytes();
470   const unsigned char *BufPtr = (const unsigned char *)Bytes.data();
471   const unsigned char *EndBufPtr = BufPtr + Bytes.size();
472 
473   // If we have a wrapper header, parse it and ignore the non-bc file
474   // contents. The magic number is 0x0B17C0DE stored in little endian.
475   if (isBitcodeWrapper(BufPtr, EndBufPtr)) {
476     if (Bytes.size() < BWH_HeaderSize)
477       return reportError("Invalid bitcode wrapper header");
478 
479     if (O) {
480       unsigned Magic = support::endian::read32le(&BufPtr[BWH_MagicField]);
481       unsigned Version = support::endian::read32le(&BufPtr[BWH_VersionField]);
482       unsigned Offset = support::endian::read32le(&BufPtr[BWH_OffsetField]);
483       unsigned Size = support::endian::read32le(&BufPtr[BWH_SizeField]);
484       unsigned CPUType = support::endian::read32le(&BufPtr[BWH_CPUTypeField]);
485 
486       O->OS << "<BITCODE_WRAPPER_HEADER"
487             << " Magic=" << format_hex(Magic, 10)
488             << " Version=" << format_hex(Version, 10)
489             << " Offset=" << format_hex(Offset, 10)
490             << " Size=" << format_hex(Size, 10)
491             << " CPUType=" << format_hex(CPUType, 10) << "/>\n";
492     }
493 
494     if (SkipBitcodeWrapperHeader(BufPtr, EndBufPtr, true))
495       return reportError("Invalid bitcode wrapper header");
496   }
497 
498   // Use the cursor modified by skipping the wrapper header.
499   Stream = BitstreamCursor(ArrayRef<uint8_t>(BufPtr, EndBufPtr));
500 
501   return ReadSignature(Stream);
502 }
503 
504 static bool canDecodeBlob(unsigned Code, unsigned BlockID) {
505   return BlockID == bitc::METADATA_BLOCK_ID && Code == bitc::METADATA_STRINGS;
506 }
507 
508 Error BitcodeAnalyzer::decodeMetadataStringsBlob(StringRef Indent,
509                                                  ArrayRef<uint64_t> Record,
510                                                  StringRef Blob,
511                                                  raw_ostream &OS) {
512   if (Blob.empty())
513     return reportError("Cannot decode empty blob.");
514 
515   if (Record.size() != 2)
516     return reportError(
517         "Decoding metadata strings blob needs two record entries.");
518 
519   unsigned NumStrings = Record[0];
520   unsigned StringsOffset = Record[1];
521   OS << " num-strings = " << NumStrings << " {\n";
522 
523   StringRef Lengths = Blob.slice(0, StringsOffset);
524   SimpleBitstreamCursor R(Lengths);
525   StringRef Strings = Blob.drop_front(StringsOffset);
526   do {
527     if (R.AtEndOfStream())
528       return reportError("bad length");
529 
530     Expected<uint32_t> MaybeSize = R.ReadVBR(6);
531     if (!MaybeSize)
532       return MaybeSize.takeError();
533     uint32_t Size = MaybeSize.get();
534     if (Strings.size() < Size)
535       return reportError("truncated chars");
536 
537     OS << Indent << "    '";
538     OS.write_escaped(Strings.slice(0, Size), /*hex=*/true);
539     OS << "'\n";
540     Strings = Strings.drop_front(Size);
541   } while (--NumStrings);
542 
543   OS << Indent << "  }";
544   return Error::success();
545 }
546 
547 BitcodeAnalyzer::BitcodeAnalyzer(StringRef Buffer,
548                                  Optional<StringRef> BlockInfoBuffer)
549     : Stream(Buffer) {
550   if (BlockInfoBuffer)
551     BlockInfoStream.emplace(*BlockInfoBuffer);
552 }
553 
554 Error BitcodeAnalyzer::analyze(Optional<BCDumpOptions> O,
555                                Optional<StringRef> CheckHash) {
556   Expected<CurStreamTypeType> MaybeType = analyzeHeader(O, Stream);
557   if (!MaybeType)
558     return MaybeType.takeError();
559   else
560     CurStreamType = *MaybeType;
561 
562   Stream.setBlockInfo(&BlockInfo);
563 
564   // Read block info from BlockInfoStream, if specified.
565   // The block info must be a top-level block.
566   if (BlockInfoStream) {
567     BitstreamCursor BlockInfoCursor(*BlockInfoStream);
568     Expected<CurStreamTypeType> H = analyzeHeader(O, BlockInfoCursor);
569     if (!H)
570       return H.takeError();
571 
572     while (!BlockInfoCursor.AtEndOfStream()) {
573       Expected<unsigned> MaybeCode = BlockInfoCursor.ReadCode();
574       if (!MaybeCode)
575         return MaybeCode.takeError();
576       if (MaybeCode.get() != bitc::ENTER_SUBBLOCK)
577         return reportError("Invalid record at top-level in block info file");
578 
579       Expected<unsigned> MaybeBlockID = BlockInfoCursor.ReadSubBlockID();
580       if (!MaybeBlockID)
581         return MaybeBlockID.takeError();
582       if (MaybeBlockID.get() == bitc::BLOCKINFO_BLOCK_ID) {
583         Expected<Optional<BitstreamBlockInfo>> MaybeNewBlockInfo =
584             BlockInfoCursor.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true);
585         if (!MaybeNewBlockInfo)
586           return MaybeNewBlockInfo.takeError();
587         Optional<BitstreamBlockInfo> NewBlockInfo =
588             std::move(MaybeNewBlockInfo.get());
589         if (!NewBlockInfo)
590           return reportError("Malformed BlockInfoBlock in block info file");
591         BlockInfo = std::move(*NewBlockInfo);
592         break;
593       }
594 
595       if (Error Err = BlockInfoCursor.SkipBlock())
596         return Err;
597     }
598   }
599 
600   // Parse the top-level structure.  We only allow blocks at the top-level.
601   while (!Stream.AtEndOfStream()) {
602     Expected<unsigned> MaybeCode = Stream.ReadCode();
603     if (!MaybeCode)
604       return MaybeCode.takeError();
605     if (MaybeCode.get() != bitc::ENTER_SUBBLOCK)
606       return reportError("Invalid record at top-level");
607 
608     Expected<unsigned> MaybeBlockID = Stream.ReadSubBlockID();
609     if (!MaybeBlockID)
610       return MaybeBlockID.takeError();
611 
612     if (Error E = parseBlock(MaybeBlockID.get(), 0, O, CheckHash))
613       return E;
614     ++NumTopBlocks;
615   }
616 
617   return Error::success();
618 }
619 
620 void BitcodeAnalyzer::printStats(BCDumpOptions O,
621                                  Optional<StringRef> Filename) {
622   uint64_t BufferSizeBits = Stream.getBitcodeBytes().size() * CHAR_BIT;
623   // Print a summary of the read file.
624   O.OS << "Summary ";
625   if (Filename)
626     O.OS << "of " << Filename->data() << ":\n";
627   O.OS << "         Total size: ";
628   printSize(O.OS, BufferSizeBits);
629   O.OS << "\n";
630   O.OS << "        Stream type: ";
631   switch (CurStreamType) {
632   case UnknownBitstream:
633     O.OS << "unknown\n";
634     break;
635   case LLVMIRBitstream:
636     O.OS << "LLVM IR\n";
637     break;
638   case ClangSerializedASTBitstream:
639     O.OS << "Clang Serialized AST\n";
640     break;
641   case ClangSerializedDiagnosticsBitstream:
642     O.OS << "Clang Serialized Diagnostics\n";
643     break;
644   case LLVMBitstreamRemarks:
645     O.OS << "LLVM Remarks\n";
646     break;
647   }
648   O.OS << "  # Toplevel Blocks: " << NumTopBlocks << "\n";
649   O.OS << "\n";
650 
651   // Emit per-block stats.
652   O.OS << "Per-block Summary:\n";
653   for (std::map<unsigned, PerBlockIDStats>::iterator I = BlockIDStats.begin(),
654                                                      E = BlockIDStats.end();
655        I != E; ++I) {
656     O.OS << "  Block ID #" << I->first;
657     if (Optional<const char *> BlockName =
658             GetBlockName(I->first, BlockInfo, CurStreamType))
659       O.OS << " (" << *BlockName << ")";
660     O.OS << ":\n";
661 
662     const PerBlockIDStats &Stats = I->second;
663     O.OS << "      Num Instances: " << Stats.NumInstances << "\n";
664     O.OS << "         Total Size: ";
665     printSize(O.OS, Stats.NumBits);
666     O.OS << "\n";
667     double pct = (Stats.NumBits * 100.0) / BufferSizeBits;
668     O.OS << "    Percent of file: " << format("%2.4f%%", pct) << "\n";
669     if (Stats.NumInstances > 1) {
670       O.OS << "       Average Size: ";
671       printSize(O.OS, Stats.NumBits / (double)Stats.NumInstances);
672       O.OS << "\n";
673       O.OS << "  Tot/Avg SubBlocks: " << Stats.NumSubBlocks << "/"
674            << Stats.NumSubBlocks / (double)Stats.NumInstances << "\n";
675       O.OS << "    Tot/Avg Abbrevs: " << Stats.NumAbbrevs << "/"
676            << Stats.NumAbbrevs / (double)Stats.NumInstances << "\n";
677       O.OS << "    Tot/Avg Records: " << Stats.NumRecords << "/"
678            << Stats.NumRecords / (double)Stats.NumInstances << "\n";
679     } else {
680       O.OS << "      Num SubBlocks: " << Stats.NumSubBlocks << "\n";
681       O.OS << "        Num Abbrevs: " << Stats.NumAbbrevs << "\n";
682       O.OS << "        Num Records: " << Stats.NumRecords << "\n";
683     }
684     if (Stats.NumRecords) {
685       double pct = (Stats.NumAbbreviatedRecords * 100.0) / Stats.NumRecords;
686       O.OS << "    Percent Abbrevs: " << format("%2.4f%%", pct) << "\n";
687     }
688     O.OS << "\n";
689 
690     // Print a histogram of the codes we see.
691     if (O.Histogram && !Stats.CodeFreq.empty()) {
692       std::vector<std::pair<unsigned, unsigned>> FreqPairs; // <freq,code>
693       for (unsigned i = 0, e = Stats.CodeFreq.size(); i != e; ++i)
694         if (unsigned Freq = Stats.CodeFreq[i].NumInstances)
695           FreqPairs.push_back(std::make_pair(Freq, i));
696       llvm::stable_sort(FreqPairs);
697       std::reverse(FreqPairs.begin(), FreqPairs.end());
698 
699       O.OS << "\tRecord Histogram:\n";
700       O.OS << "\t\t  Count    # Bits     b/Rec   % Abv  Record Kind\n";
701       for (unsigned i = 0, e = FreqPairs.size(); i != e; ++i) {
702         const PerRecordStats &RecStats = Stats.CodeFreq[FreqPairs[i].second];
703 
704         O.OS << format("\t\t%7d %9lu", RecStats.NumInstances,
705                        (unsigned long)RecStats.TotalBits);
706 
707         if (RecStats.NumInstances > 1)
708           O.OS << format(" %9.1f",
709                          (double)RecStats.TotalBits / RecStats.NumInstances);
710         else
711           O.OS << "          ";
712 
713         if (RecStats.NumAbbrev)
714           O.OS << format(" %7.2f", (double)RecStats.NumAbbrev /
715                                        RecStats.NumInstances * 100);
716         else
717           O.OS << "        ";
718 
719         O.OS << "  ";
720         if (Optional<const char *> CodeName = GetCodeName(
721                 FreqPairs[i].second, I->first, BlockInfo, CurStreamType))
722           O.OS << *CodeName << "\n";
723         else
724           O.OS << "UnknownCode" << FreqPairs[i].second << "\n";
725       }
726       O.OS << "\n";
727     }
728   }
729 }
730 
731 Error BitcodeAnalyzer::parseBlock(unsigned BlockID, unsigned IndentLevel,
732                                   Optional<BCDumpOptions> O,
733                                   Optional<StringRef> CheckHash) {
734   std::string Indent(IndentLevel * 2, ' ');
735   uint64_t BlockBitStart = Stream.GetCurrentBitNo();
736 
737   // Get the statistics for this BlockID.
738   PerBlockIDStats &BlockStats = BlockIDStats[BlockID];
739 
740   BlockStats.NumInstances++;
741 
742   // BLOCKINFO is a special part of the stream.
743   bool DumpRecords = O.hasValue();
744   if (BlockID == bitc::BLOCKINFO_BLOCK_ID) {
745     if (O)
746       O->OS << Indent << "<BLOCKINFO_BLOCK/>\n";
747     Expected<Optional<BitstreamBlockInfo>> MaybeNewBlockInfo =
748         Stream.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true);
749     if (!MaybeNewBlockInfo)
750       return MaybeNewBlockInfo.takeError();
751     Optional<BitstreamBlockInfo> NewBlockInfo =
752         std::move(MaybeNewBlockInfo.get());
753     if (!NewBlockInfo)
754       return reportError("Malformed BlockInfoBlock");
755     BlockInfo = std::move(*NewBlockInfo);
756     if (Error Err = Stream.JumpToBit(BlockBitStart))
757       return Err;
758     // It's not really interesting to dump the contents of the blockinfo
759     // block.
760     DumpRecords = false;
761   }
762 
763   unsigned NumWords = 0;
764   if (Error Err = Stream.EnterSubBlock(BlockID, &NumWords))
765     return Err;
766 
767   // Keep it for later, when we see a MODULE_HASH record
768   uint64_t BlockEntryPos = Stream.getCurrentByteNo();
769 
770   Optional<const char *> BlockName = None;
771   if (DumpRecords) {
772     O->OS << Indent << "<";
773     if ((BlockName = GetBlockName(BlockID, BlockInfo, CurStreamType)))
774       O->OS << *BlockName;
775     else
776       O->OS << "UnknownBlock" << BlockID;
777 
778     if (!O->Symbolic && BlockName)
779       O->OS << " BlockID=" << BlockID;
780 
781     O->OS << " NumWords=" << NumWords
782           << " BlockCodeSize=" << Stream.getAbbrevIDWidth() << ">\n";
783   }
784 
785   SmallVector<uint64_t, 64> Record;
786 
787   // Keep the offset to the metadata index if seen.
788   uint64_t MetadataIndexOffset = 0;
789 
790   // Read all the records for this block.
791   while (1) {
792     if (Stream.AtEndOfStream())
793       return reportError("Premature end of bitstream");
794 
795     uint64_t RecordStartBit = Stream.GetCurrentBitNo();
796 
797     Expected<BitstreamEntry> MaybeEntry =
798         Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs);
799     if (!MaybeEntry)
800       return MaybeEntry.takeError();
801     BitstreamEntry Entry = MaybeEntry.get();
802 
803     switch (Entry.Kind) {
804     case BitstreamEntry::Error:
805       return reportError("malformed bitcode file");
806     case BitstreamEntry::EndBlock: {
807       uint64_t BlockBitEnd = Stream.GetCurrentBitNo();
808       BlockStats.NumBits += BlockBitEnd - BlockBitStart;
809       if (DumpRecords) {
810         O->OS << Indent << "</";
811         if (BlockName)
812           O->OS << *BlockName << ">\n";
813         else
814           O->OS << "UnknownBlock" << BlockID << ">\n";
815       }
816       return Error::success();
817     }
818 
819     case BitstreamEntry::SubBlock: {
820       uint64_t SubBlockBitStart = Stream.GetCurrentBitNo();
821       if (Error E = parseBlock(Entry.ID, IndentLevel + 1, O, CheckHash))
822         return E;
823       ++BlockStats.NumSubBlocks;
824       uint64_t SubBlockBitEnd = Stream.GetCurrentBitNo();
825 
826       // Don't include subblock sizes in the size of this block.
827       BlockBitStart += SubBlockBitEnd - SubBlockBitStart;
828       continue;
829     }
830     case BitstreamEntry::Record:
831       // The interesting case.
832       break;
833     }
834 
835     if (Entry.ID == bitc::DEFINE_ABBREV) {
836       if (Error Err = Stream.ReadAbbrevRecord())
837         return Err;
838       ++BlockStats.NumAbbrevs;
839       continue;
840     }
841 
842     Record.clear();
843 
844     ++BlockStats.NumRecords;
845 
846     StringRef Blob;
847     uint64_t CurrentRecordPos = Stream.GetCurrentBitNo();
848     Expected<unsigned> MaybeCode = Stream.readRecord(Entry.ID, Record, &Blob);
849     if (!MaybeCode)
850       return MaybeCode.takeError();
851     unsigned Code = MaybeCode.get();
852 
853     // Increment the # occurrences of this code.
854     if (BlockStats.CodeFreq.size() <= Code)
855       BlockStats.CodeFreq.resize(Code + 1);
856     BlockStats.CodeFreq[Code].NumInstances++;
857     BlockStats.CodeFreq[Code].TotalBits +=
858         Stream.GetCurrentBitNo() - RecordStartBit;
859     if (Entry.ID != bitc::UNABBREV_RECORD) {
860       BlockStats.CodeFreq[Code].NumAbbrev++;
861       ++BlockStats.NumAbbreviatedRecords;
862     }
863 
864     if (DumpRecords) {
865       O->OS << Indent << "  <";
866       Optional<const char *> CodeName =
867           GetCodeName(Code, BlockID, BlockInfo, CurStreamType);
868       if (CodeName)
869         O->OS << *CodeName;
870       else
871         O->OS << "UnknownCode" << Code;
872       if (!O->Symbolic && CodeName)
873         O->OS << " codeid=" << Code;
874       const BitCodeAbbrev *Abbv = nullptr;
875       if (Entry.ID != bitc::UNABBREV_RECORD) {
876         Abbv = Stream.getAbbrev(Entry.ID);
877         O->OS << " abbrevid=" << Entry.ID;
878       }
879 
880       for (unsigned i = 0, e = Record.size(); i != e; ++i)
881         O->OS << " op" << i << "=" << (int64_t)Record[i];
882 
883       // If we found a metadata index, let's verify that we had an offset
884       // before and validate its forward reference offset was correct!
885       if (BlockID == bitc::METADATA_BLOCK_ID) {
886         if (Code == bitc::METADATA_INDEX_OFFSET) {
887           if (Record.size() != 2)
888             O->OS << "(Invalid record)";
889           else {
890             auto Offset = Record[0] + (Record[1] << 32);
891             MetadataIndexOffset = Stream.GetCurrentBitNo() + Offset;
892           }
893         }
894         if (Code == bitc::METADATA_INDEX) {
895           O->OS << " (offset ";
896           if (MetadataIndexOffset == RecordStartBit)
897             O->OS << "match)";
898           else
899             O->OS << "mismatch: " << MetadataIndexOffset << " vs "
900                   << RecordStartBit << ")";
901         }
902       }
903 
904       // If we found a module hash, let's verify that it matches!
905       if (BlockID == bitc::MODULE_BLOCK_ID && Code == bitc::MODULE_CODE_HASH &&
906           CheckHash.hasValue()) {
907         if (Record.size() != 5)
908           O->OS << " (invalid)";
909         else {
910           // Recompute the hash and compare it to the one in the bitcode
911           SHA1 Hasher;
912           StringRef Hash;
913           Hasher.update(*CheckHash);
914           {
915             int BlockSize = (CurrentRecordPos / 8) - BlockEntryPos;
916             auto Ptr = Stream.getPointerToByte(BlockEntryPos, BlockSize);
917             Hasher.update(ArrayRef<uint8_t>(Ptr, BlockSize));
918             Hash = Hasher.result();
919           }
920           std::array<char, 20> RecordedHash;
921           int Pos = 0;
922           for (auto &Val : Record) {
923             assert(!(Val >> 32) && "Unexpected high bits set");
924             support::endian::write32be(&RecordedHash[Pos], Val);
925             Pos += 4;
926           }
927           if (Hash == StringRef(RecordedHash.data(), RecordedHash.size()))
928             O->OS << " (match)";
929           else
930             O->OS << " (!mismatch!)";
931         }
932       }
933 
934       O->OS << "/>";
935 
936       if (Abbv) {
937         for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) {
938           const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
939           if (!Op.isEncoding() || Op.getEncoding() != BitCodeAbbrevOp::Array)
940             continue;
941           assert(i + 2 == e && "Array op not second to last");
942           std::string Str;
943           bool ArrayIsPrintable = true;
944           for (unsigned j = i - 1, je = Record.size(); j != je; ++j) {
945             if (!isPrint(static_cast<unsigned char>(Record[j]))) {
946               ArrayIsPrintable = false;
947               break;
948             }
949             Str += (char)Record[j];
950           }
951           if (ArrayIsPrintable)
952             O->OS << " record string = '" << Str << "'";
953           break;
954         }
955       }
956 
957       if (Blob.data()) {
958         if (canDecodeBlob(Code, BlockID)) {
959           if (Error E = decodeMetadataStringsBlob(Indent, Record, Blob, O->OS))
960             return E;
961         } else {
962           O->OS << " blob data = ";
963           if (O->ShowBinaryBlobs) {
964             O->OS << "'";
965             O->OS.write_escaped(Blob, /*hex=*/true) << "'";
966           } else {
967             bool BlobIsPrintable = true;
968             for (unsigned i = 0, e = Blob.size(); i != e; ++i)
969               if (!isPrint(static_cast<unsigned char>(Blob[i]))) {
970                 BlobIsPrintable = false;
971                 break;
972               }
973 
974             if (BlobIsPrintable)
975               O->OS << "'" << Blob << "'";
976             else
977               O->OS << "unprintable, " << Blob.size() << " bytes.";
978           }
979         }
980       }
981 
982       O->OS << "\n";
983     }
984 
985     // Make sure that we can skip the current record.
986     if (Error Err = Stream.JumpToBit(CurrentRecordPos))
987       return Err;
988     if (Expected<unsigned> Skipped = Stream.skipRecord(Entry.ID))
989       ; // Do nothing.
990     else
991       return Skipped.takeError();
992   }
993 }
994 
995