1 //===- llvm/Bitcode/BitcodeReader.h - Bitcode reader ------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This header defines interfaces to read LLVM bitcode files/streams.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #ifndef LLVM_BITCODE_BITCODEREADER_H
14 #define LLVM_BITCODE_BITCODEREADER_H
15
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/Bitstream/BitCodeEnums.h"
19 #include "llvm/IR/GlobalValue.h"
20 #include "llvm/Support/Endian.h"
21 #include "llvm/Support/Error.h"
22 #include "llvm/Support/ErrorOr.h"
23 #include "llvm/Support/MemoryBufferRef.h"
24 #include <cstdint>
25 #include <memory>
26 #include <optional>
27 #include <string>
28 #include <system_error>
29 #include <vector>
30 namespace llvm {
31
32 class LLVMContext;
33 class Module;
34 class MemoryBuffer;
35 class Metadata;
36 class ModuleSummaryIndex;
37 class Type;
38 class Value;
39
40 // Callback to override the data layout string of an imported bitcode module.
41 // The first argument is the target triple, the second argument the data layout
42 // string from the input, or a default string. It will be used if the callback
43 // returns std::nullopt.
44 typedef std::function<std::optional<std::string>(StringRef, StringRef)>
45 DataLayoutCallbackFuncTy;
46
47 typedef std::function<Type *(unsigned)> GetTypeByIDTy;
48
49 typedef std::function<unsigned(unsigned, unsigned)> GetContainedTypeIDTy;
50
51 typedef std::function<void(Value *, unsigned, GetTypeByIDTy,
52 GetContainedTypeIDTy)>
53 ValueTypeCallbackTy;
54
55 typedef std::function<void(Metadata **, unsigned, GetTypeByIDTy,
56 GetContainedTypeIDTy)>
57 MDTypeCallbackTy;
58
59 // These functions are for converting Expected/Error values to
60 // ErrorOr/std::error_code for compatibility with legacy clients. FIXME:
61 // Remove these functions once no longer needed by the C and libLTO APIs.
62
63 std::error_code errorToErrorCodeAndEmitErrors(LLVMContext &Ctx, Error Err);
64
65 template <typename T>
expectedToErrorOrAndEmitErrors(LLVMContext & Ctx,Expected<T> Val)66 ErrorOr<T> expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected<T> Val) {
67 if (!Val)
68 return errorToErrorCodeAndEmitErrors(Ctx, Val.takeError());
69 return std::move(*Val);
70 }
71
72 struct ParserCallbacks {
73 std::optional<DataLayoutCallbackFuncTy> DataLayout;
74 /// The ValueType callback is called for every function definition or
75 /// declaration and allows accessing the type information, also behind
76 /// pointers. This can be useful, when the opaque pointer upgrade cleans all
77 /// type information behind pointers.
78 /// The second argument to ValueTypeCallback is the type ID of the
79 /// function, the two passed functions can be used to extract type
80 /// information.
81 std::optional<ValueTypeCallbackTy> ValueType;
82 /// The MDType callback is called for every value in metadata.
83 std::optional<MDTypeCallbackTy> MDType;
84
85 ParserCallbacks() = default;
ParserCallbacksParserCallbacks86 explicit ParserCallbacks(DataLayoutCallbackFuncTy DataLayout)
87 : DataLayout(DataLayout) {}
88 };
89
90 struct BitcodeFileContents;
91
92 /// Basic information extracted from a bitcode module to be used for LTO.
93 struct BitcodeLTOInfo {
94 bool IsThinLTO;
95 bool HasSummary;
96 bool EnableSplitLTOUnit;
97 };
98
99 /// Represents a module in a bitcode file.
100 class BitcodeModule {
101 // This covers the identification (if present) and module blocks.
102 ArrayRef<uint8_t> Buffer;
103 StringRef ModuleIdentifier;
104
105 // The string table used to interpret this module.
106 StringRef Strtab;
107
108 // The bitstream location of the IDENTIFICATION_BLOCK.
109 uint64_t IdentificationBit;
110
111 // The bitstream location of this module's MODULE_BLOCK.
112 uint64_t ModuleBit;
113
BitcodeModule(ArrayRef<uint8_t> Buffer,StringRef ModuleIdentifier,uint64_t IdentificationBit,uint64_t ModuleBit)114 BitcodeModule(ArrayRef<uint8_t> Buffer, StringRef ModuleIdentifier,
115 uint64_t IdentificationBit, uint64_t ModuleBit)
116 : Buffer(Buffer), ModuleIdentifier(ModuleIdentifier),
117 IdentificationBit(IdentificationBit), ModuleBit(ModuleBit) {}
118
119 // Calls the ctor.
120 friend Expected<BitcodeFileContents>
121 getBitcodeFileContents(MemoryBufferRef Buffer);
122
123 Expected<std::unique_ptr<Module>>
124 getModuleImpl(LLVMContext &Context, bool MaterializeAll,
125 bool ShouldLazyLoadMetadata, bool IsImporting,
126 ParserCallbacks Callbacks = {});
127
128 public:
getBuffer()129 StringRef getBuffer() const {
130 return StringRef((const char *)Buffer.begin(), Buffer.size());
131 }
132
getStrtab()133 StringRef getStrtab() const { return Strtab; }
134
getModuleIdentifier()135 StringRef getModuleIdentifier() const { return ModuleIdentifier; }
136
137 /// Read the bitcode module and prepare for lazy deserialization of function
138 /// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well.
139 /// If IsImporting is true, this module is being parsed for ThinLTO
140 /// importing into another module.
141 Expected<std::unique_ptr<Module>>
142 getLazyModule(LLVMContext &Context, bool ShouldLazyLoadMetadata,
143 bool IsImporting, ParserCallbacks Callbacks = {});
144
145 /// Read the entire bitcode module and return it.
146 Expected<std::unique_ptr<Module>>
147 parseModule(LLVMContext &Context, ParserCallbacks Callbacks = {});
148
149 /// Returns information about the module to be used for LTO: whether to
150 /// compile with ThinLTO, and whether it has a summary.
151 Expected<BitcodeLTOInfo> getLTOInfo();
152
153 /// Parse the specified bitcode buffer, returning the module summary index.
154 Expected<std::unique_ptr<ModuleSummaryIndex>> getSummary();
155
156 /// Parse the specified bitcode buffer and merge its module summary index
157 /// into CombinedIndex.
158 Error
159 readSummary(ModuleSummaryIndex &CombinedIndex, StringRef ModulePath,
160 uint64_t ModuleId,
161 std::function<bool(GlobalValue::GUID)> IsPrevailing = nullptr);
162 };
163
164 struct BitcodeFileContents {
165 std::vector<BitcodeModule> Mods;
166 StringRef Symtab, StrtabForSymtab;
167 };
168
169 /// Returns the contents of a bitcode file. This includes the raw contents of
170 /// the symbol table embedded in the bitcode file. Clients which require a
171 /// symbol table should prefer to use irsymtab::read instead of this function
172 /// because it creates a reader for the irsymtab and handles upgrading bitcode
173 /// files without a symbol table or with an old symbol table.
174 Expected<BitcodeFileContents> getBitcodeFileContents(MemoryBufferRef Buffer);
175
176 /// Returns a list of modules in the specified bitcode buffer.
177 Expected<std::vector<BitcodeModule>>
178 getBitcodeModuleList(MemoryBufferRef Buffer);
179
180 /// Read the header of the specified bitcode buffer and prepare for lazy
181 /// deserialization of function bodies. If ShouldLazyLoadMetadata is true,
182 /// lazily load metadata as well. If IsImporting is true, this module is
183 /// being parsed for ThinLTO importing into another module.
184 Expected<std::unique_ptr<Module>>
185 getLazyBitcodeModule(MemoryBufferRef Buffer, LLVMContext &Context,
186 bool ShouldLazyLoadMetadata = false,
187 bool IsImporting = false,
188 ParserCallbacks Callbacks = {});
189
190 /// Like getLazyBitcodeModule, except that the module takes ownership of
191 /// the memory buffer if successful. If successful, this moves Buffer. On
192 /// error, this *does not* move Buffer. If IsImporting is true, this module is
193 /// being parsed for ThinLTO importing into another module.
194 Expected<std::unique_ptr<Module>> getOwningLazyBitcodeModule(
195 std::unique_ptr<MemoryBuffer> &&Buffer, LLVMContext &Context,
196 bool ShouldLazyLoadMetadata = false, bool IsImporting = false,
197 ParserCallbacks Callbacks = {});
198
199 /// Read the header of the specified bitcode buffer and extract just the
200 /// triple information. If successful, this returns a string. On error, this
201 /// returns "".
202 Expected<std::string> getBitcodeTargetTriple(MemoryBufferRef Buffer);
203
204 /// Return true if \p Buffer contains a bitcode file with ObjC code (category
205 /// or class) in it.
206 Expected<bool> isBitcodeContainingObjCCategory(MemoryBufferRef Buffer);
207
208 /// Read the header of the specified bitcode buffer and extract just the
209 /// producer string information. If successful, this returns a string. On
210 /// error, this returns "".
211 Expected<std::string> getBitcodeProducerString(MemoryBufferRef Buffer);
212
213 /// Read the specified bitcode file, returning the module.
214 Expected<std::unique_ptr<Module>>
215 parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context,
216 ParserCallbacks Callbacks = {});
217
218 /// Returns LTO information for the specified bitcode file.
219 Expected<BitcodeLTOInfo> getBitcodeLTOInfo(MemoryBufferRef Buffer);
220
221 /// Parse the specified bitcode buffer, returning the module summary index.
222 Expected<std::unique_ptr<ModuleSummaryIndex>>
223 getModuleSummaryIndex(MemoryBufferRef Buffer);
224
225 /// Parse the specified bitcode buffer and merge the index into CombinedIndex.
226 Error readModuleSummaryIndex(MemoryBufferRef Buffer,
227 ModuleSummaryIndex &CombinedIndex,
228 uint64_t ModuleId);
229
230 /// Parse the module summary index out of an IR file and return the module
231 /// summary index object if found, or an empty summary if not. If Path refers
232 /// to an empty file and IgnoreEmptyThinLTOIndexFile is true, then
233 /// this function will return nullptr.
234 Expected<std::unique_ptr<ModuleSummaryIndex>>
235 getModuleSummaryIndexForFile(StringRef Path,
236 bool IgnoreEmptyThinLTOIndexFile = false);
237
238 /// isBitcodeWrapper - Return true if the given bytes are the magic bytes
239 /// for an LLVM IR bitcode wrapper.
isBitcodeWrapper(const unsigned char * BufPtr,const unsigned char * BufEnd)240 inline bool isBitcodeWrapper(const unsigned char *BufPtr,
241 const unsigned char *BufEnd) {
242 // See if you can find the hidden message in the magic bytes :-).
243 // (Hint: it's a little-endian encoding.)
244 return BufPtr != BufEnd &&
245 BufPtr[0] == 0xDE &&
246 BufPtr[1] == 0xC0 &&
247 BufPtr[2] == 0x17 &&
248 BufPtr[3] == 0x0B;
249 }
250
251 /// isRawBitcode - Return true if the given bytes are the magic bytes for
252 /// raw LLVM IR bitcode (without a wrapper).
isRawBitcode(const unsigned char * BufPtr,const unsigned char * BufEnd)253 inline bool isRawBitcode(const unsigned char *BufPtr,
254 const unsigned char *BufEnd) {
255 // These bytes sort of have a hidden message, but it's not in
256 // little-endian this time, and it's a little redundant.
257 return BufPtr != BufEnd &&
258 BufPtr[0] == 'B' &&
259 BufPtr[1] == 'C' &&
260 BufPtr[2] == 0xc0 &&
261 BufPtr[3] == 0xde;
262 }
263
264 /// isBitcode - Return true if the given bytes are the magic bytes for
265 /// LLVM IR bitcode, either with or without a wrapper.
isBitcode(const unsigned char * BufPtr,const unsigned char * BufEnd)266 inline bool isBitcode(const unsigned char *BufPtr,
267 const unsigned char *BufEnd) {
268 return isBitcodeWrapper(BufPtr, BufEnd) ||
269 isRawBitcode(BufPtr, BufEnd);
270 }
271
272 /// SkipBitcodeWrapperHeader - Some systems wrap bc files with a special
273 /// header for padding or other reasons. The format of this header is:
274 ///
275 /// struct bc_header {
276 /// uint32_t Magic; // 0x0B17C0DE
277 /// uint32_t Version; // Version, currently always 0.
278 /// uint32_t BitcodeOffset; // Offset to traditional bitcode file.
279 /// uint32_t BitcodeSize; // Size of traditional bitcode file.
280 /// ... potentially other gunk ...
281 /// };
282 ///
283 /// This function is called when we find a file with a matching magic number.
284 /// In this case, skip down to the subsection of the file that is actually a
285 /// BC file.
286 /// If 'VerifyBufferSize' is true, check that the buffer is large enough to
287 /// contain the whole bitcode file.
SkipBitcodeWrapperHeader(const unsigned char * & BufPtr,const unsigned char * & BufEnd,bool VerifyBufferSize)288 inline bool SkipBitcodeWrapperHeader(const unsigned char *&BufPtr,
289 const unsigned char *&BufEnd,
290 bool VerifyBufferSize) {
291 // Must contain the offset and size field!
292 if (unsigned(BufEnd - BufPtr) < BWH_SizeField + 4)
293 return true;
294
295 unsigned Offset = support::endian::read32le(&BufPtr[BWH_OffsetField]);
296 unsigned Size = support::endian::read32le(&BufPtr[BWH_SizeField]);
297 uint64_t BitcodeOffsetEnd = (uint64_t)Offset + (uint64_t)Size;
298
299 // Verify that Offset+Size fits in the file.
300 if (VerifyBufferSize && BitcodeOffsetEnd > uint64_t(BufEnd-BufPtr))
301 return true;
302 BufPtr += Offset;
303 BufEnd = BufPtr+Size;
304 return false;
305 }
306
307 APInt readWideAPInt(ArrayRef<uint64_t> Vals, unsigned TypeBits);
308
309 const std::error_category &BitcodeErrorCategory();
310 enum class BitcodeError { CorruptedBitcode = 1 };
make_error_code(BitcodeError E)311 inline std::error_code make_error_code(BitcodeError E) {
312 return std::error_code(static_cast<int>(E), BitcodeErrorCategory());
313 }
314
315 } // end namespace llvm
316
317 namespace std {
318
319 template <> struct is_error_code_enum<llvm::BitcodeError> : std::true_type {};
320
321 } // end namespace std
322
323 #endif // LLVM_BITCODE_BITCODEREADER_H
324