1 //===- llvm/Bitcode/BitcodeReader.h - Bitcode reader ------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This header defines interfaces to read LLVM bitcode files/streams.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_BITCODE_BITCODEREADER_H
14 #define LLVM_BITCODE_BITCODEREADER_H
15 
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/Bitstream/BitCodes.h"
19 #include "llvm/IR/ModuleSummaryIndex.h"
20 #include "llvm/Support/Endian.h"
21 #include "llvm/Support/Error.h"
22 #include "llvm/Support/ErrorOr.h"
23 #include "llvm/Support/MemoryBuffer.h"
24 #include <cstdint>
25 #include <memory>
26 #include <string>
27 #include <system_error>
28 #include <vector>
29 namespace llvm {
30 
31 class LLVMContext;
32 class Module;
33 
34   // These functions are for converting Expected/Error values to
35   // ErrorOr/std::error_code for compatibility with legacy clients. FIXME:
36   // Remove these functions once no longer needed by the C and libLTO APIs.
37 
38   std::error_code errorToErrorCodeAndEmitErrors(LLVMContext &Ctx, Error Err);
39 
40   template <typename T>
41   ErrorOr<T> expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected<T> Val) {
42     if (!Val)
43       return errorToErrorCodeAndEmitErrors(Ctx, Val.takeError());
44     return std::move(*Val);
45   }
46 
47   struct BitcodeFileContents;
48 
49   /// Basic information extracted from a bitcode module to be used for LTO.
50   struct BitcodeLTOInfo {
51     bool IsThinLTO;
52     bool HasSummary;
53     bool EnableSplitLTOUnit;
54   };
55 
56   /// Represents a module in a bitcode file.
57   class BitcodeModule {
58     // This covers the identification (if present) and module blocks.
59     ArrayRef<uint8_t> Buffer;
60     StringRef ModuleIdentifier;
61 
62     // The string table used to interpret this module.
63     StringRef Strtab;
64 
65     // The bitstream location of the IDENTIFICATION_BLOCK.
66     uint64_t IdentificationBit;
67 
68     // The bitstream location of this module's MODULE_BLOCK.
69     uint64_t ModuleBit;
70 
71     BitcodeModule(ArrayRef<uint8_t> Buffer, StringRef ModuleIdentifier,
72                   uint64_t IdentificationBit, uint64_t ModuleBit)
73         : Buffer(Buffer), ModuleIdentifier(ModuleIdentifier),
74           IdentificationBit(IdentificationBit), ModuleBit(ModuleBit) {}
75 
76     // Calls the ctor.
77     friend Expected<BitcodeFileContents>
78     getBitcodeFileContents(MemoryBufferRef Buffer);
79 
80     Expected<std::unique_ptr<Module>> getModuleImpl(LLVMContext &Context,
81                                                     bool MaterializeAll,
82                                                     bool ShouldLazyLoadMetadata,
83                                                     bool IsImporting);
84 
85   public:
86     StringRef getBuffer() const {
87       return StringRef((const char *)Buffer.begin(), Buffer.size());
88     }
89 
90     StringRef getStrtab() const { return Strtab; }
91 
92     StringRef getModuleIdentifier() const { return ModuleIdentifier; }
93 
94     /// Read the bitcode module and prepare for lazy deserialization of function
95     /// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well.
96     /// If IsImporting is true, this module is being parsed for ThinLTO
97     /// importing into another module.
98     Expected<std::unique_ptr<Module>> getLazyModule(LLVMContext &Context,
99                                                     bool ShouldLazyLoadMetadata,
100                                                     bool IsImporting);
101 
102     /// Read the entire bitcode module and return it.
103     Expected<std::unique_ptr<Module>> parseModule(LLVMContext &Context);
104 
105     /// Returns information about the module to be used for LTO: whether to
106     /// compile with ThinLTO, and whether it has a summary.
107     Expected<BitcodeLTOInfo> getLTOInfo();
108 
109     /// Parse the specified bitcode buffer, returning the module summary index.
110     Expected<std::unique_ptr<ModuleSummaryIndex>> getSummary();
111 
112     /// Parse the specified bitcode buffer and merge its module summary index
113     /// into CombinedIndex.
114     Error readSummary(ModuleSummaryIndex &CombinedIndex, StringRef ModulePath,
115                       uint64_t ModuleId);
116   };
117 
118   struct BitcodeFileContents {
119     std::vector<BitcodeModule> Mods;
120     StringRef Symtab, StrtabForSymtab;
121   };
122 
123   /// Returns the contents of a bitcode file. This includes the raw contents of
124   /// the symbol table embedded in the bitcode file. Clients which require a
125   /// symbol table should prefer to use irsymtab::read instead of this function
126   /// because it creates a reader for the irsymtab and handles upgrading bitcode
127   /// files without a symbol table or with an old symbol table.
128   Expected<BitcodeFileContents> getBitcodeFileContents(MemoryBufferRef Buffer);
129 
130   /// Returns a list of modules in the specified bitcode buffer.
131   Expected<std::vector<BitcodeModule>>
132   getBitcodeModuleList(MemoryBufferRef Buffer);
133 
134   /// Read the header of the specified bitcode buffer and prepare for lazy
135   /// deserialization of function bodies. If ShouldLazyLoadMetadata is true,
136   /// lazily load metadata as well. If IsImporting is true, this module is
137   /// being parsed for ThinLTO importing into another module.
138   Expected<std::unique_ptr<Module>>
139   getLazyBitcodeModule(MemoryBufferRef Buffer, LLVMContext &Context,
140                        bool ShouldLazyLoadMetadata = false,
141                        bool IsImporting = false);
142 
143   /// Like getLazyBitcodeModule, except that the module takes ownership of
144   /// the memory buffer if successful. If successful, this moves Buffer. On
145   /// error, this *does not* move Buffer. If IsImporting is true, this module is
146   /// being parsed for ThinLTO importing into another module.
147   Expected<std::unique_ptr<Module>> getOwningLazyBitcodeModule(
148       std::unique_ptr<MemoryBuffer> &&Buffer, LLVMContext &Context,
149       bool ShouldLazyLoadMetadata = false, bool IsImporting = false);
150 
151   /// Read the header of the specified bitcode buffer and extract just the
152   /// triple information. If successful, this returns a string. On error, this
153   /// returns "".
154   Expected<std::string> getBitcodeTargetTriple(MemoryBufferRef Buffer);
155 
156   /// Return true if \p Buffer contains a bitcode file with ObjC code (category
157   /// or class) in it.
158   Expected<bool> isBitcodeContainingObjCCategory(MemoryBufferRef Buffer);
159 
160   /// Read the header of the specified bitcode buffer and extract just the
161   /// producer string information. If successful, this returns a string. On
162   /// error, this returns "".
163   Expected<std::string> getBitcodeProducerString(MemoryBufferRef Buffer);
164 
165   /// Read the specified bitcode file, returning the module.
166   Expected<std::unique_ptr<Module>> parseBitcodeFile(MemoryBufferRef Buffer,
167                                                      LLVMContext &Context);
168 
169   /// Returns LTO information for the specified bitcode file.
170   Expected<BitcodeLTOInfo> getBitcodeLTOInfo(MemoryBufferRef Buffer);
171 
172   /// Parse the specified bitcode buffer, returning the module summary index.
173   Expected<std::unique_ptr<ModuleSummaryIndex>>
174   getModuleSummaryIndex(MemoryBufferRef Buffer);
175 
176   /// Parse the specified bitcode buffer and merge the index into CombinedIndex.
177   Error readModuleSummaryIndex(MemoryBufferRef Buffer,
178                                ModuleSummaryIndex &CombinedIndex,
179                                uint64_t ModuleId);
180 
181   /// Parse the module summary index out of an IR file and return the module
182   /// summary index object if found, or an empty summary if not. If Path refers
183   /// to an empty file and IgnoreEmptyThinLTOIndexFile is true, then
184   /// this function will return nullptr.
185   Expected<std::unique_ptr<ModuleSummaryIndex>>
186   getModuleSummaryIndexForFile(StringRef Path,
187                                bool IgnoreEmptyThinLTOIndexFile = false);
188 
189   /// isBitcodeWrapper - Return true if the given bytes are the magic bytes
190   /// for an LLVM IR bitcode wrapper.
191   inline bool isBitcodeWrapper(const unsigned char *BufPtr,
192                                const unsigned char *BufEnd) {
193     // See if you can find the hidden message in the magic bytes :-).
194     // (Hint: it's a little-endian encoding.)
195     return BufPtr != BufEnd &&
196            BufPtr[0] == 0xDE &&
197            BufPtr[1] == 0xC0 &&
198            BufPtr[2] == 0x17 &&
199            BufPtr[3] == 0x0B;
200   }
201 
202   /// isRawBitcode - Return true if the given bytes are the magic bytes for
203   /// raw LLVM IR bitcode (without a wrapper).
204   inline bool isRawBitcode(const unsigned char *BufPtr,
205                            const unsigned char *BufEnd) {
206     // These bytes sort of have a hidden message, but it's not in
207     // little-endian this time, and it's a little redundant.
208     return BufPtr != BufEnd &&
209            BufPtr[0] == 'B' &&
210            BufPtr[1] == 'C' &&
211            BufPtr[2] == 0xc0 &&
212            BufPtr[3] == 0xde;
213   }
214 
215   /// isBitcode - Return true if the given bytes are the magic bytes for
216   /// LLVM IR bitcode, either with or without a wrapper.
217   inline bool isBitcode(const unsigned char *BufPtr,
218                         const unsigned char *BufEnd) {
219     return isBitcodeWrapper(BufPtr, BufEnd) ||
220            isRawBitcode(BufPtr, BufEnd);
221   }
222 
223   /// SkipBitcodeWrapperHeader - Some systems wrap bc files with a special
224   /// header for padding or other reasons.  The format of this header is:
225   ///
226   /// struct bc_header {
227   ///   uint32_t Magic;         // 0x0B17C0DE
228   ///   uint32_t Version;       // Version, currently always 0.
229   ///   uint32_t BitcodeOffset; // Offset to traditional bitcode file.
230   ///   uint32_t BitcodeSize;   // Size of traditional bitcode file.
231   ///   ... potentially other gunk ...
232   /// };
233   ///
234   /// This function is called when we find a file with a matching magic number.
235   /// In this case, skip down to the subsection of the file that is actually a
236   /// BC file.
237   /// If 'VerifyBufferSize' is true, check that the buffer is large enough to
238   /// contain the whole bitcode file.
239   inline bool SkipBitcodeWrapperHeader(const unsigned char *&BufPtr,
240                                        const unsigned char *&BufEnd,
241                                        bool VerifyBufferSize) {
242     // Must contain the offset and size field!
243     if (unsigned(BufEnd - BufPtr) < BWH_SizeField + 4)
244       return true;
245 
246     unsigned Offset = support::endian::read32le(&BufPtr[BWH_OffsetField]);
247     unsigned Size = support::endian::read32le(&BufPtr[BWH_SizeField]);
248     uint64_t BitcodeOffsetEnd = (uint64_t)Offset + (uint64_t)Size;
249 
250     // Verify that Offset+Size fits in the file.
251     if (VerifyBufferSize && BitcodeOffsetEnd > uint64_t(BufEnd-BufPtr))
252       return true;
253     BufPtr += Offset;
254     BufEnd = BufPtr+Size;
255     return false;
256   }
257 
258   const std::error_category &BitcodeErrorCategory();
259   enum class BitcodeError { CorruptedBitcode = 1 };
260   inline std::error_code make_error_code(BitcodeError E) {
261     return std::error_code(static_cast<int>(E), BitcodeErrorCategory());
262   }
263 
264 } // end namespace llvm
265 
266 namespace std {
267 
268 template <> struct is_error_code_enum<llvm::BitcodeError> : std::true_type {};
269 
270 } // end namespace std
271 
272 #endif // LLVM_BITCODE_BITCODEREADER_H
273