1 //===- llvm/Bitcode/BitcodeReader.h - Bitcode reader ------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This header defines interfaces to read LLVM bitcode files/streams.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_BITCODE_BITCODEREADER_H
14 #define LLVM_BITCODE_BITCODEREADER_H
15 
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/Bitstream/BitCodeEnums.h"
19 #include "llvm/IR/GlobalValue.h"
20 #include "llvm/Support/Endian.h"
21 #include "llvm/Support/Error.h"
22 #include "llvm/Support/ErrorOr.h"
23 #include "llvm/Support/MemoryBufferRef.h"
24 #include <cstdint>
25 #include <memory>
26 #include <optional>
27 #include <string>
28 #include <system_error>
29 #include <vector>
30 namespace llvm {
31 
32 class LLVMContext;
33 class Module;
34 class MemoryBuffer;
35 class Metadata;
36 class ModuleSummaryIndex;
37 class Type;
38 class Value;
39 
40 // Callback to override the data layout string of an imported bitcode module.
41 // The first argument is the target triple, the second argument the data layout
42 // string from the input, or a default string. It will be used if the callback
43 // returns std::nullopt.
44 typedef std::function<std::optional<std::string>(StringRef, StringRef)>
45     DataLayoutCallbackFuncTy;
46 
47 typedef std::function<Type *(unsigned)> GetTypeByIDTy;
48 
49 typedef std::function<unsigned(unsigned, unsigned)> GetContainedTypeIDTy;
50 
51 typedef std::function<void(Value *, unsigned, GetTypeByIDTy,
52                            GetContainedTypeIDTy)>
53     ValueTypeCallbackTy;
54 
55 typedef std::function<void(Metadata **, unsigned, GetTypeByIDTy,
56                            GetContainedTypeIDTy)>
57     MDTypeCallbackTy;
58 
59 // These functions are for converting Expected/Error values to
60 // ErrorOr/std::error_code for compatibility with legacy clients. FIXME:
61 // Remove these functions once no longer needed by the C and libLTO APIs.
62 
63 std::error_code errorToErrorCodeAndEmitErrors(LLVMContext &Ctx, Error Err);
64 
65 template <typename T>
66 ErrorOr<T> expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected<T> Val) {
67   if (!Val)
68     return errorToErrorCodeAndEmitErrors(Ctx, Val.takeError());
69   return std::move(*Val);
70 }
71 
72 struct ParserCallbacks {
73   std::optional<DataLayoutCallbackFuncTy> DataLayout;
74   /// The ValueType callback is called for every function definition or
75   /// declaration and allows accessing the type information, also behind
76   /// pointers. This can be useful, when the opaque pointer upgrade cleans all
77   /// type information behind pointers.
78   /// The second argument to ValueTypeCallback is the type ID of the
79   /// function, the two passed functions can be used to extract type
80   /// information.
81   std::optional<ValueTypeCallbackTy> ValueType;
82   /// The MDType callback is called for every value in metadata.
83   std::optional<MDTypeCallbackTy> MDType;
84 
85   ParserCallbacks() = default;
86   explicit ParserCallbacks(DataLayoutCallbackFuncTy DataLayout)
87       : DataLayout(DataLayout) {}
88 };
89 
90   struct BitcodeFileContents;
91 
92   /// Basic information extracted from a bitcode module to be used for LTO.
93   struct BitcodeLTOInfo {
94     bool IsThinLTO;
95     bool HasSummary;
96     bool EnableSplitLTOUnit;
97     bool UnifiedLTO;
98   };
99 
100   /// Represents a module in a bitcode file.
101   class BitcodeModule {
102     // This covers the identification (if present) and module blocks.
103     ArrayRef<uint8_t> Buffer;
104     StringRef ModuleIdentifier;
105 
106     // The string table used to interpret this module.
107     StringRef Strtab;
108 
109     // The bitstream location of the IDENTIFICATION_BLOCK.
110     uint64_t IdentificationBit;
111 
112     // The bitstream location of this module's MODULE_BLOCK.
113     uint64_t ModuleBit;
114 
115     BitcodeModule(ArrayRef<uint8_t> Buffer, StringRef ModuleIdentifier,
116                   uint64_t IdentificationBit, uint64_t ModuleBit)
117         : Buffer(Buffer), ModuleIdentifier(ModuleIdentifier),
118           IdentificationBit(IdentificationBit), ModuleBit(ModuleBit) {}
119 
120     // Calls the ctor.
121     friend Expected<BitcodeFileContents>
122     getBitcodeFileContents(MemoryBufferRef Buffer);
123 
124     Expected<std::unique_ptr<Module>>
125     getModuleImpl(LLVMContext &Context, bool MaterializeAll,
126                   bool ShouldLazyLoadMetadata, bool IsImporting,
127                   ParserCallbacks Callbacks = {});
128 
129   public:
130     StringRef getBuffer() const {
131       return StringRef((const char *)Buffer.begin(), Buffer.size());
132     }
133 
134     StringRef getStrtab() const { return Strtab; }
135 
136     StringRef getModuleIdentifier() const { return ModuleIdentifier; }
137 
138     /// Read the bitcode module and prepare for lazy deserialization of function
139     /// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well.
140     /// If IsImporting is true, this module is being parsed for ThinLTO
141     /// importing into another module.
142     Expected<std::unique_ptr<Module>>
143     getLazyModule(LLVMContext &Context, bool ShouldLazyLoadMetadata,
144                   bool IsImporting, ParserCallbacks Callbacks = {});
145 
146     /// Read the entire bitcode module and return it.
147     Expected<std::unique_ptr<Module>>
148     parseModule(LLVMContext &Context, ParserCallbacks Callbacks = {});
149 
150     /// Returns information about the module to be used for LTO: whether to
151     /// compile with ThinLTO, and whether it has a summary.
152     Expected<BitcodeLTOInfo> getLTOInfo();
153 
154     /// Parse the specified bitcode buffer, returning the module summary index.
155     Expected<std::unique_ptr<ModuleSummaryIndex>> getSummary();
156 
157     /// Parse the specified bitcode buffer and merge its module summary index
158     /// into CombinedIndex.
159     Error
160     readSummary(ModuleSummaryIndex &CombinedIndex, StringRef ModulePath,
161                 uint64_t ModuleId,
162                 std::function<bool(GlobalValue::GUID)> IsPrevailing = nullptr);
163   };
164 
165   struct BitcodeFileContents {
166     std::vector<BitcodeModule> Mods;
167     StringRef Symtab, StrtabForSymtab;
168   };
169 
170   /// Returns the contents of a bitcode file. This includes the raw contents of
171   /// the symbol table embedded in the bitcode file. Clients which require a
172   /// symbol table should prefer to use irsymtab::read instead of this function
173   /// because it creates a reader for the irsymtab and handles upgrading bitcode
174   /// files without a symbol table or with an old symbol table.
175   Expected<BitcodeFileContents> getBitcodeFileContents(MemoryBufferRef Buffer);
176 
177   /// Returns a list of modules in the specified bitcode buffer.
178   Expected<std::vector<BitcodeModule>>
179   getBitcodeModuleList(MemoryBufferRef Buffer);
180 
181   /// Read the header of the specified bitcode buffer and prepare for lazy
182   /// deserialization of function bodies. If ShouldLazyLoadMetadata is true,
183   /// lazily load metadata as well. If IsImporting is true, this module is
184   /// being parsed for ThinLTO importing into another module.
185   Expected<std::unique_ptr<Module>>
186   getLazyBitcodeModule(MemoryBufferRef Buffer, LLVMContext &Context,
187                        bool ShouldLazyLoadMetadata = false,
188                        bool IsImporting = false,
189                        ParserCallbacks Callbacks = {});
190 
191   /// Like getLazyBitcodeModule, except that the module takes ownership of
192   /// the memory buffer if successful. If successful, this moves Buffer. On
193   /// error, this *does not* move Buffer. If IsImporting is true, this module is
194   /// being parsed for ThinLTO importing into another module.
195   Expected<std::unique_ptr<Module>> getOwningLazyBitcodeModule(
196       std::unique_ptr<MemoryBuffer> &&Buffer, LLVMContext &Context,
197       bool ShouldLazyLoadMetadata = false, bool IsImporting = false,
198       ParserCallbacks Callbacks = {});
199 
200   /// Read the header of the specified bitcode buffer and extract just the
201   /// triple information. If successful, this returns a string. On error, this
202   /// returns "".
203   Expected<std::string> getBitcodeTargetTriple(MemoryBufferRef Buffer);
204 
205   /// Return true if \p Buffer contains a bitcode file with ObjC code (category
206   /// or class) in it.
207   Expected<bool> isBitcodeContainingObjCCategory(MemoryBufferRef Buffer);
208 
209   /// Read the header of the specified bitcode buffer and extract just the
210   /// producer string information. If successful, this returns a string. On
211   /// error, this returns "".
212   Expected<std::string> getBitcodeProducerString(MemoryBufferRef Buffer);
213 
214   /// Read the specified bitcode file, returning the module.
215   Expected<std::unique_ptr<Module>>
216   parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context,
217                    ParserCallbacks Callbacks = {});
218 
219   /// Returns LTO information for the specified bitcode file.
220   Expected<BitcodeLTOInfo> getBitcodeLTOInfo(MemoryBufferRef Buffer);
221 
222   /// Parse the specified bitcode buffer, returning the module summary index.
223   Expected<std::unique_ptr<ModuleSummaryIndex>>
224   getModuleSummaryIndex(MemoryBufferRef Buffer);
225 
226   /// Parse the specified bitcode buffer and merge the index into CombinedIndex.
227   Error readModuleSummaryIndex(MemoryBufferRef Buffer,
228                                ModuleSummaryIndex &CombinedIndex,
229                                uint64_t ModuleId);
230 
231   /// Parse the module summary index out of an IR file and return the module
232   /// summary index object if found, or an empty summary if not. If Path refers
233   /// to an empty file and IgnoreEmptyThinLTOIndexFile is true, then
234   /// this function will return nullptr.
235   Expected<std::unique_ptr<ModuleSummaryIndex>>
236   getModuleSummaryIndexForFile(StringRef Path,
237                                bool IgnoreEmptyThinLTOIndexFile = false);
238 
239   /// isBitcodeWrapper - Return true if the given bytes are the magic bytes
240   /// for an LLVM IR bitcode wrapper.
241   inline bool isBitcodeWrapper(const unsigned char *BufPtr,
242                                const unsigned char *BufEnd) {
243     // See if you can find the hidden message in the magic bytes :-).
244     // (Hint: it's a little-endian encoding.)
245     return BufPtr != BufEnd &&
246            BufPtr[0] == 0xDE &&
247            BufPtr[1] == 0xC0 &&
248            BufPtr[2] == 0x17 &&
249            BufPtr[3] == 0x0B;
250   }
251 
252   /// isRawBitcode - Return true if the given bytes are the magic bytes for
253   /// raw LLVM IR bitcode (without a wrapper).
254   inline bool isRawBitcode(const unsigned char *BufPtr,
255                            const unsigned char *BufEnd) {
256     // These bytes sort of have a hidden message, but it's not in
257     // little-endian this time, and it's a little redundant.
258     return BufPtr != BufEnd &&
259            BufPtr[0] == 'B' &&
260            BufPtr[1] == 'C' &&
261            BufPtr[2] == 0xc0 &&
262            BufPtr[3] == 0xde;
263   }
264 
265   /// isBitcode - Return true if the given bytes are the magic bytes for
266   /// LLVM IR bitcode, either with or without a wrapper.
267   inline bool isBitcode(const unsigned char *BufPtr,
268                         const unsigned char *BufEnd) {
269     return isBitcodeWrapper(BufPtr, BufEnd) ||
270            isRawBitcode(BufPtr, BufEnd);
271   }
272 
273   /// SkipBitcodeWrapperHeader - Some systems wrap bc files with a special
274   /// header for padding or other reasons.  The format of this header is:
275   ///
276   /// struct bc_header {
277   ///   uint32_t Magic;         // 0x0B17C0DE
278   ///   uint32_t Version;       // Version, currently always 0.
279   ///   uint32_t BitcodeOffset; // Offset to traditional bitcode file.
280   ///   uint32_t BitcodeSize;   // Size of traditional bitcode file.
281   ///   ... potentially other gunk ...
282   /// };
283   ///
284   /// This function is called when we find a file with a matching magic number.
285   /// In this case, skip down to the subsection of the file that is actually a
286   /// BC file.
287   /// If 'VerifyBufferSize' is true, check that the buffer is large enough to
288   /// contain the whole bitcode file.
289   inline bool SkipBitcodeWrapperHeader(const unsigned char *&BufPtr,
290                                        const unsigned char *&BufEnd,
291                                        bool VerifyBufferSize) {
292     // Must contain the offset and size field!
293     if (unsigned(BufEnd - BufPtr) < BWH_SizeField + 4)
294       return true;
295 
296     unsigned Offset = support::endian::read32le(&BufPtr[BWH_OffsetField]);
297     unsigned Size = support::endian::read32le(&BufPtr[BWH_SizeField]);
298     uint64_t BitcodeOffsetEnd = (uint64_t)Offset + (uint64_t)Size;
299 
300     // Verify that Offset+Size fits in the file.
301     if (VerifyBufferSize && BitcodeOffsetEnd > uint64_t(BufEnd-BufPtr))
302       return true;
303     BufPtr += Offset;
304     BufEnd = BufPtr+Size;
305     return false;
306   }
307 
308   APInt readWideAPInt(ArrayRef<uint64_t> Vals, unsigned TypeBits);
309 
310   const std::error_category &BitcodeErrorCategory();
311   enum class BitcodeError { CorruptedBitcode = 1 };
312   inline std::error_code make_error_code(BitcodeError E) {
313     return std::error_code(static_cast<int>(E), BitcodeErrorCategory());
314   }
315 
316 } // end namespace llvm
317 
318 namespace std {
319 
320 template <> struct is_error_code_enum<llvm::BitcodeError> : std::true_type {};
321 
322 } // end namespace std
323 
324 #endif // LLVM_BITCODE_BITCODEREADER_H
325