1 //===- llvm/Bitcode/BitcodeReader.h - Bitcode reader ------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This header defines interfaces to read LLVM bitcode files/streams.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_BITCODE_BITCODEREADER_H
14 #define LLVM_BITCODE_BITCODEREADER_H
15 
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/Bitstream/BitCodeEnums.h"
19 #include "llvm/Support/Endian.h"
20 #include "llvm/Support/Error.h"
21 #include "llvm/Support/ErrorOr.h"
22 #include "llvm/Support/MemoryBufferRef.h"
23 #include <cstdint>
24 #include <memory>
25 #include <string>
26 #include <system_error>
27 #include <vector>
28 namespace llvm {
29 
30 class LLVMContext;
31 class Module;
32 class MemoryBuffer;
33 class ModuleSummaryIndex;
34 
35 typedef llvm::function_ref<Optional<std::string>(StringRef)>
36     DataLayoutCallbackTy;
37 
38   // These functions are for converting Expected/Error values to
39   // ErrorOr/std::error_code for compatibility with legacy clients. FIXME:
40   // Remove these functions once no longer needed by the C and libLTO APIs.
41 
42   std::error_code errorToErrorCodeAndEmitErrors(LLVMContext &Ctx, Error Err);
43 
44   template <typename T>
45   ErrorOr<T> expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected<T> Val) {
46     if (!Val)
47       return errorToErrorCodeAndEmitErrors(Ctx, Val.takeError());
48     return std::move(*Val);
49   }
50 
51   struct BitcodeFileContents;
52 
53   /// Basic information extracted from a bitcode module to be used for LTO.
54   struct BitcodeLTOInfo {
55     bool IsThinLTO;
56     bool HasSummary;
57     bool EnableSplitLTOUnit;
58   };
59 
60   /// Represents a module in a bitcode file.
61   class BitcodeModule {
62     // This covers the identification (if present) and module blocks.
63     ArrayRef<uint8_t> Buffer;
64     StringRef ModuleIdentifier;
65 
66     // The string table used to interpret this module.
67     StringRef Strtab;
68 
69     // The bitstream location of the IDENTIFICATION_BLOCK.
70     uint64_t IdentificationBit;
71 
72     // The bitstream location of this module's MODULE_BLOCK.
73     uint64_t ModuleBit;
74 
75     BitcodeModule(ArrayRef<uint8_t> Buffer, StringRef ModuleIdentifier,
76                   uint64_t IdentificationBit, uint64_t ModuleBit)
77         : Buffer(Buffer), ModuleIdentifier(ModuleIdentifier),
78           IdentificationBit(IdentificationBit), ModuleBit(ModuleBit) {}
79 
80     // Calls the ctor.
81     friend Expected<BitcodeFileContents>
82     getBitcodeFileContents(MemoryBufferRef Buffer);
83 
84     Expected<std::unique_ptr<Module>>
85     getModuleImpl(LLVMContext &Context, bool MaterializeAll,
86                   bool ShouldLazyLoadMetadata, bool IsImporting,
87                   DataLayoutCallbackTy DataLayoutCallback);
88 
89   public:
90     StringRef getBuffer() const {
91       return StringRef((const char *)Buffer.begin(), Buffer.size());
92     }
93 
94     StringRef getStrtab() const { return Strtab; }
95 
96     StringRef getModuleIdentifier() const { return ModuleIdentifier; }
97 
98     /// Read the bitcode module and prepare for lazy deserialization of function
99     /// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well.
100     /// If IsImporting is true, this module is being parsed for ThinLTO
101     /// importing into another module.
102     Expected<std::unique_ptr<Module>> getLazyModule(LLVMContext &Context,
103                                                     bool ShouldLazyLoadMetadata,
104                                                     bool IsImporting);
105 
106     /// Read the entire bitcode module and return it.
107     Expected<std::unique_ptr<Module>> parseModule(
108         LLVMContext &Context, DataLayoutCallbackTy DataLayoutCallback =
109                                   [](StringRef) { return None; });
110 
111     /// Returns information about the module to be used for LTO: whether to
112     /// compile with ThinLTO, and whether it has a summary.
113     Expected<BitcodeLTOInfo> getLTOInfo();
114 
115     /// Parse the specified bitcode buffer, returning the module summary index.
116     Expected<std::unique_ptr<ModuleSummaryIndex>> getSummary();
117 
118     /// Parse the specified bitcode buffer and merge its module summary index
119     /// into CombinedIndex.
120     Error readSummary(ModuleSummaryIndex &CombinedIndex, StringRef ModulePath,
121                       uint64_t ModuleId);
122   };
123 
124   struct BitcodeFileContents {
125     std::vector<BitcodeModule> Mods;
126     StringRef Symtab, StrtabForSymtab;
127   };
128 
129   /// Returns the contents of a bitcode file. This includes the raw contents of
130   /// the symbol table embedded in the bitcode file. Clients which require a
131   /// symbol table should prefer to use irsymtab::read instead of this function
132   /// because it creates a reader for the irsymtab and handles upgrading bitcode
133   /// files without a symbol table or with an old symbol table.
134   Expected<BitcodeFileContents> getBitcodeFileContents(MemoryBufferRef Buffer);
135 
136   /// Returns a list of modules in the specified bitcode buffer.
137   Expected<std::vector<BitcodeModule>>
138   getBitcodeModuleList(MemoryBufferRef Buffer);
139 
140   /// Read the header of the specified bitcode buffer and prepare for lazy
141   /// deserialization of function bodies. If ShouldLazyLoadMetadata is true,
142   /// lazily load metadata as well. If IsImporting is true, this module is
143   /// being parsed for ThinLTO importing into another module.
144   Expected<std::unique_ptr<Module>>
145   getLazyBitcodeModule(MemoryBufferRef Buffer, LLVMContext &Context,
146                        bool ShouldLazyLoadMetadata = false,
147                        bool IsImporting = false);
148 
149   /// Like getLazyBitcodeModule, except that the module takes ownership of
150   /// the memory buffer if successful. If successful, this moves Buffer. On
151   /// error, this *does not* move Buffer. If IsImporting is true, this module is
152   /// being parsed for ThinLTO importing into another module.
153   Expected<std::unique_ptr<Module>> getOwningLazyBitcodeModule(
154       std::unique_ptr<MemoryBuffer> &&Buffer, LLVMContext &Context,
155       bool ShouldLazyLoadMetadata = false, bool IsImporting = false);
156 
157   /// Read the header of the specified bitcode buffer and extract just the
158   /// triple information. If successful, this returns a string. On error, this
159   /// returns "".
160   Expected<std::string> getBitcodeTargetTriple(MemoryBufferRef Buffer);
161 
162   /// Return true if \p Buffer contains a bitcode file with ObjC code (category
163   /// or class) in it.
164   Expected<bool> isBitcodeContainingObjCCategory(MemoryBufferRef Buffer);
165 
166   /// Read the header of the specified bitcode buffer and extract just the
167   /// producer string information. If successful, this returns a string. On
168   /// error, this returns "".
169   Expected<std::string> getBitcodeProducerString(MemoryBufferRef Buffer);
170 
171   /// Read the specified bitcode file, returning the module.
172   Expected<std::unique_ptr<Module>> parseBitcodeFile(
173       MemoryBufferRef Buffer, LLVMContext &Context,
174       DataLayoutCallbackTy DataLayoutCallback = [](StringRef) {
175         return None;
176       });
177 
178   /// Returns LTO information for the specified bitcode file.
179   Expected<BitcodeLTOInfo> getBitcodeLTOInfo(MemoryBufferRef Buffer);
180 
181   /// Parse the specified bitcode buffer, returning the module summary index.
182   Expected<std::unique_ptr<ModuleSummaryIndex>>
183   getModuleSummaryIndex(MemoryBufferRef Buffer);
184 
185   /// Parse the specified bitcode buffer and merge the index into CombinedIndex.
186   Error readModuleSummaryIndex(MemoryBufferRef Buffer,
187                                ModuleSummaryIndex &CombinedIndex,
188                                uint64_t ModuleId);
189 
190   /// Parse the module summary index out of an IR file and return the module
191   /// summary index object if found, or an empty summary if not. If Path refers
192   /// to an empty file and IgnoreEmptyThinLTOIndexFile is true, then
193   /// this function will return nullptr.
194   Expected<std::unique_ptr<ModuleSummaryIndex>>
195   getModuleSummaryIndexForFile(StringRef Path,
196                                bool IgnoreEmptyThinLTOIndexFile = false);
197 
198   /// isBitcodeWrapper - Return true if the given bytes are the magic bytes
199   /// for an LLVM IR bitcode wrapper.
200   inline bool isBitcodeWrapper(const unsigned char *BufPtr,
201                                const unsigned char *BufEnd) {
202     // See if you can find the hidden message in the magic bytes :-).
203     // (Hint: it's a little-endian encoding.)
204     return BufPtr != BufEnd &&
205            BufPtr[0] == 0xDE &&
206            BufPtr[1] == 0xC0 &&
207            BufPtr[2] == 0x17 &&
208            BufPtr[3] == 0x0B;
209   }
210 
211   /// isRawBitcode - Return true if the given bytes are the magic bytes for
212   /// raw LLVM IR bitcode (without a wrapper).
213   inline bool isRawBitcode(const unsigned char *BufPtr,
214                            const unsigned char *BufEnd) {
215     // These bytes sort of have a hidden message, but it's not in
216     // little-endian this time, and it's a little redundant.
217     return BufPtr != BufEnd &&
218            BufPtr[0] == 'B' &&
219            BufPtr[1] == 'C' &&
220            BufPtr[2] == 0xc0 &&
221            BufPtr[3] == 0xde;
222   }
223 
224   /// isBitcode - Return true if the given bytes are the magic bytes for
225   /// LLVM IR bitcode, either with or without a wrapper.
226   inline bool isBitcode(const unsigned char *BufPtr,
227                         const unsigned char *BufEnd) {
228     return isBitcodeWrapper(BufPtr, BufEnd) ||
229            isRawBitcode(BufPtr, BufEnd);
230   }
231 
232   /// SkipBitcodeWrapperHeader - Some systems wrap bc files with a special
233   /// header for padding or other reasons.  The format of this header is:
234   ///
235   /// struct bc_header {
236   ///   uint32_t Magic;         // 0x0B17C0DE
237   ///   uint32_t Version;       // Version, currently always 0.
238   ///   uint32_t BitcodeOffset; // Offset to traditional bitcode file.
239   ///   uint32_t BitcodeSize;   // Size of traditional bitcode file.
240   ///   ... potentially other gunk ...
241   /// };
242   ///
243   /// This function is called when we find a file with a matching magic number.
244   /// In this case, skip down to the subsection of the file that is actually a
245   /// BC file.
246   /// If 'VerifyBufferSize' is true, check that the buffer is large enough to
247   /// contain the whole bitcode file.
248   inline bool SkipBitcodeWrapperHeader(const unsigned char *&BufPtr,
249                                        const unsigned char *&BufEnd,
250                                        bool VerifyBufferSize) {
251     // Must contain the offset and size field!
252     if (unsigned(BufEnd - BufPtr) < BWH_SizeField + 4)
253       return true;
254 
255     unsigned Offset = support::endian::read32le(&BufPtr[BWH_OffsetField]);
256     unsigned Size = support::endian::read32le(&BufPtr[BWH_SizeField]);
257     uint64_t BitcodeOffsetEnd = (uint64_t)Offset + (uint64_t)Size;
258 
259     // Verify that Offset+Size fits in the file.
260     if (VerifyBufferSize && BitcodeOffsetEnd > uint64_t(BufEnd-BufPtr))
261       return true;
262     BufPtr += Offset;
263     BufEnd = BufPtr+Size;
264     return false;
265   }
266 
267   APInt readWideAPInt(ArrayRef<uint64_t> Vals, unsigned TypeBits);
268 
269   const std::error_category &BitcodeErrorCategory();
270   enum class BitcodeError { CorruptedBitcode = 1 };
271   inline std::error_code make_error_code(BitcodeError E) {
272     return std::error_code(static_cast<int>(E), BitcodeErrorCategory());
273   }
274 
275 } // end namespace llvm
276 
277 namespace std {
278 
279 template <> struct is_error_code_enum<llvm::BitcodeError> : std::true_type {};
280 
281 } // end namespace std
282 
283 #endif // LLVM_BITCODE_BITCODEREADER_H
284