1 //===- llvm/Bitcode/BitcodeReader.h - Bitcode reader ------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This header defines interfaces to read LLVM bitcode files/streams. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef LLVM_BITCODE_BITCODEREADER_H 14 #define LLVM_BITCODE_BITCODEREADER_H 15 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/StringRef.h" 18 #include "llvm/Bitstream/BitCodeEnums.h" 19 #include "llvm/IR/GlobalValue.h" 20 #include "llvm/Support/Endian.h" 21 #include "llvm/Support/Error.h" 22 #include "llvm/Support/ErrorOr.h" 23 #include "llvm/Support/MemoryBufferRef.h" 24 #include <cstdint> 25 #include <memory> 26 #include <optional> 27 #include <string> 28 #include <system_error> 29 #include <vector> 30 namespace llvm { 31 32 class LLVMContext; 33 class Module; 34 class MemoryBuffer; 35 class Metadata; 36 class ModuleSummaryIndex; 37 class Type; 38 class Value; 39 40 // Callback to override the data layout string of an imported bitcode module. 41 // The first argument is the target triple, the second argument the data layout 42 // string from the input, or a default string. It will be used if the callback 43 // returns std::nullopt. 44 typedef std::function<std::optional<std::string>(StringRef, StringRef)> 45 DataLayoutCallbackFuncTy; 46 47 typedef std::function<Type *(unsigned)> GetTypeByIDTy; 48 49 typedef std::function<unsigned(unsigned, unsigned)> GetContainedTypeIDTy; 50 51 typedef std::function<void(Value *, unsigned, GetTypeByIDTy, 52 GetContainedTypeIDTy)> 53 ValueTypeCallbackTy; 54 55 typedef std::function<void(Metadata **, unsigned, GetTypeByIDTy, 56 GetContainedTypeIDTy)> 57 MDTypeCallbackTy; 58 59 // These functions are for converting Expected/Error values to 60 // ErrorOr/std::error_code for compatibility with legacy clients. FIXME: 61 // Remove these functions once no longer needed by the C and libLTO APIs. 62 63 std::error_code errorToErrorCodeAndEmitErrors(LLVMContext &Ctx, Error Err); 64 65 template <typename T> 66 ErrorOr<T> expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected<T> Val) { 67 if (!Val) 68 return errorToErrorCodeAndEmitErrors(Ctx, Val.takeError()); 69 return std::move(*Val); 70 } 71 72 struct ParserCallbacks { 73 std::optional<DataLayoutCallbackFuncTy> DataLayout; 74 /// The ValueType callback is called for every function definition or 75 /// declaration and allows accessing the type information, also behind 76 /// pointers. This can be useful, when the opaque pointer upgrade cleans all 77 /// type information behind pointers. 78 /// The second argument to ValueTypeCallback is the type ID of the 79 /// function, the two passed functions can be used to extract type 80 /// information. 81 std::optional<ValueTypeCallbackTy> ValueType; 82 /// The MDType callback is called for every value in metadata. 83 std::optional<MDTypeCallbackTy> MDType; 84 85 ParserCallbacks() = default; 86 explicit ParserCallbacks(DataLayoutCallbackFuncTy DataLayout) 87 : DataLayout(DataLayout) {} 88 }; 89 90 struct BitcodeFileContents; 91 92 /// Basic information extracted from a bitcode module to be used for LTO. 93 struct BitcodeLTOInfo { 94 bool IsThinLTO; 95 bool HasSummary; 96 bool EnableSplitLTOUnit; 97 }; 98 99 /// Represents a module in a bitcode file. 100 class BitcodeModule { 101 // This covers the identification (if present) and module blocks. 102 ArrayRef<uint8_t> Buffer; 103 StringRef ModuleIdentifier; 104 105 // The string table used to interpret this module. 106 StringRef Strtab; 107 108 // The bitstream location of the IDENTIFICATION_BLOCK. 109 uint64_t IdentificationBit; 110 111 // The bitstream location of this module's MODULE_BLOCK. 112 uint64_t ModuleBit; 113 114 BitcodeModule(ArrayRef<uint8_t> Buffer, StringRef ModuleIdentifier, 115 uint64_t IdentificationBit, uint64_t ModuleBit) 116 : Buffer(Buffer), ModuleIdentifier(ModuleIdentifier), 117 IdentificationBit(IdentificationBit), ModuleBit(ModuleBit) {} 118 119 // Calls the ctor. 120 friend Expected<BitcodeFileContents> 121 getBitcodeFileContents(MemoryBufferRef Buffer); 122 123 Expected<std::unique_ptr<Module>> 124 getModuleImpl(LLVMContext &Context, bool MaterializeAll, 125 bool ShouldLazyLoadMetadata, bool IsImporting, 126 ParserCallbacks Callbacks = {}); 127 128 public: 129 StringRef getBuffer() const { 130 return StringRef((const char *)Buffer.begin(), Buffer.size()); 131 } 132 133 StringRef getStrtab() const { return Strtab; } 134 135 StringRef getModuleIdentifier() const { return ModuleIdentifier; } 136 137 /// Read the bitcode module and prepare for lazy deserialization of function 138 /// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well. 139 /// If IsImporting is true, this module is being parsed for ThinLTO 140 /// importing into another module. 141 Expected<std::unique_ptr<Module>> 142 getLazyModule(LLVMContext &Context, bool ShouldLazyLoadMetadata, 143 bool IsImporting, ParserCallbacks Callbacks = {}); 144 145 /// Read the entire bitcode module and return it. 146 Expected<std::unique_ptr<Module>> 147 parseModule(LLVMContext &Context, ParserCallbacks Callbacks = {}); 148 149 /// Returns information about the module to be used for LTO: whether to 150 /// compile with ThinLTO, and whether it has a summary. 151 Expected<BitcodeLTOInfo> getLTOInfo(); 152 153 /// Parse the specified bitcode buffer, returning the module summary index. 154 Expected<std::unique_ptr<ModuleSummaryIndex>> getSummary(); 155 156 /// Parse the specified bitcode buffer and merge its module summary index 157 /// into CombinedIndex. 158 Error 159 readSummary(ModuleSummaryIndex &CombinedIndex, StringRef ModulePath, 160 uint64_t ModuleId, 161 std::function<bool(GlobalValue::GUID)> IsPrevailing = nullptr); 162 }; 163 164 struct BitcodeFileContents { 165 std::vector<BitcodeModule> Mods; 166 StringRef Symtab, StrtabForSymtab; 167 }; 168 169 /// Returns the contents of a bitcode file. This includes the raw contents of 170 /// the symbol table embedded in the bitcode file. Clients which require a 171 /// symbol table should prefer to use irsymtab::read instead of this function 172 /// because it creates a reader for the irsymtab and handles upgrading bitcode 173 /// files without a symbol table or with an old symbol table. 174 Expected<BitcodeFileContents> getBitcodeFileContents(MemoryBufferRef Buffer); 175 176 /// Returns a list of modules in the specified bitcode buffer. 177 Expected<std::vector<BitcodeModule>> 178 getBitcodeModuleList(MemoryBufferRef Buffer); 179 180 /// Read the header of the specified bitcode buffer and prepare for lazy 181 /// deserialization of function bodies. If ShouldLazyLoadMetadata is true, 182 /// lazily load metadata as well. If IsImporting is true, this module is 183 /// being parsed for ThinLTO importing into another module. 184 Expected<std::unique_ptr<Module>> 185 getLazyBitcodeModule(MemoryBufferRef Buffer, LLVMContext &Context, 186 bool ShouldLazyLoadMetadata = false, 187 bool IsImporting = false, 188 ParserCallbacks Callbacks = {}); 189 190 /// Like getLazyBitcodeModule, except that the module takes ownership of 191 /// the memory buffer if successful. If successful, this moves Buffer. On 192 /// error, this *does not* move Buffer. If IsImporting is true, this module is 193 /// being parsed for ThinLTO importing into another module. 194 Expected<std::unique_ptr<Module>> getOwningLazyBitcodeModule( 195 std::unique_ptr<MemoryBuffer> &&Buffer, LLVMContext &Context, 196 bool ShouldLazyLoadMetadata = false, bool IsImporting = false, 197 ParserCallbacks Callbacks = {}); 198 199 /// Read the header of the specified bitcode buffer and extract just the 200 /// triple information. If successful, this returns a string. On error, this 201 /// returns "". 202 Expected<std::string> getBitcodeTargetTriple(MemoryBufferRef Buffer); 203 204 /// Return true if \p Buffer contains a bitcode file with ObjC code (category 205 /// or class) in it. 206 Expected<bool> isBitcodeContainingObjCCategory(MemoryBufferRef Buffer); 207 208 /// Read the header of the specified bitcode buffer and extract just the 209 /// producer string information. If successful, this returns a string. On 210 /// error, this returns "". 211 Expected<std::string> getBitcodeProducerString(MemoryBufferRef Buffer); 212 213 /// Read the specified bitcode file, returning the module. 214 Expected<std::unique_ptr<Module>> 215 parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, 216 ParserCallbacks Callbacks = {}); 217 218 /// Returns LTO information for the specified bitcode file. 219 Expected<BitcodeLTOInfo> getBitcodeLTOInfo(MemoryBufferRef Buffer); 220 221 /// Parse the specified bitcode buffer, returning the module summary index. 222 Expected<std::unique_ptr<ModuleSummaryIndex>> 223 getModuleSummaryIndex(MemoryBufferRef Buffer); 224 225 /// Parse the specified bitcode buffer and merge the index into CombinedIndex. 226 Error readModuleSummaryIndex(MemoryBufferRef Buffer, 227 ModuleSummaryIndex &CombinedIndex, 228 uint64_t ModuleId); 229 230 /// Parse the module summary index out of an IR file and return the module 231 /// summary index object if found, or an empty summary if not. If Path refers 232 /// to an empty file and IgnoreEmptyThinLTOIndexFile is true, then 233 /// this function will return nullptr. 234 Expected<std::unique_ptr<ModuleSummaryIndex>> 235 getModuleSummaryIndexForFile(StringRef Path, 236 bool IgnoreEmptyThinLTOIndexFile = false); 237 238 /// isBitcodeWrapper - Return true if the given bytes are the magic bytes 239 /// for an LLVM IR bitcode wrapper. 240 inline bool isBitcodeWrapper(const unsigned char *BufPtr, 241 const unsigned char *BufEnd) { 242 // See if you can find the hidden message in the magic bytes :-). 243 // (Hint: it's a little-endian encoding.) 244 return BufPtr != BufEnd && 245 BufPtr[0] == 0xDE && 246 BufPtr[1] == 0xC0 && 247 BufPtr[2] == 0x17 && 248 BufPtr[3] == 0x0B; 249 } 250 251 /// isRawBitcode - Return true if the given bytes are the magic bytes for 252 /// raw LLVM IR bitcode (without a wrapper). 253 inline bool isRawBitcode(const unsigned char *BufPtr, 254 const unsigned char *BufEnd) { 255 // These bytes sort of have a hidden message, but it's not in 256 // little-endian this time, and it's a little redundant. 257 return BufPtr != BufEnd && 258 BufPtr[0] == 'B' && 259 BufPtr[1] == 'C' && 260 BufPtr[2] == 0xc0 && 261 BufPtr[3] == 0xde; 262 } 263 264 /// isBitcode - Return true if the given bytes are the magic bytes for 265 /// LLVM IR bitcode, either with or without a wrapper. 266 inline bool isBitcode(const unsigned char *BufPtr, 267 const unsigned char *BufEnd) { 268 return isBitcodeWrapper(BufPtr, BufEnd) || 269 isRawBitcode(BufPtr, BufEnd); 270 } 271 272 /// SkipBitcodeWrapperHeader - Some systems wrap bc files with a special 273 /// header for padding or other reasons. The format of this header is: 274 /// 275 /// struct bc_header { 276 /// uint32_t Magic; // 0x0B17C0DE 277 /// uint32_t Version; // Version, currently always 0. 278 /// uint32_t BitcodeOffset; // Offset to traditional bitcode file. 279 /// uint32_t BitcodeSize; // Size of traditional bitcode file. 280 /// ... potentially other gunk ... 281 /// }; 282 /// 283 /// This function is called when we find a file with a matching magic number. 284 /// In this case, skip down to the subsection of the file that is actually a 285 /// BC file. 286 /// If 'VerifyBufferSize' is true, check that the buffer is large enough to 287 /// contain the whole bitcode file. 288 inline bool SkipBitcodeWrapperHeader(const unsigned char *&BufPtr, 289 const unsigned char *&BufEnd, 290 bool VerifyBufferSize) { 291 // Must contain the offset and size field! 292 if (unsigned(BufEnd - BufPtr) < BWH_SizeField + 4) 293 return true; 294 295 unsigned Offset = support::endian::read32le(&BufPtr[BWH_OffsetField]); 296 unsigned Size = support::endian::read32le(&BufPtr[BWH_SizeField]); 297 uint64_t BitcodeOffsetEnd = (uint64_t)Offset + (uint64_t)Size; 298 299 // Verify that Offset+Size fits in the file. 300 if (VerifyBufferSize && BitcodeOffsetEnd > uint64_t(BufEnd-BufPtr)) 301 return true; 302 BufPtr += Offset; 303 BufEnd = BufPtr+Size; 304 return false; 305 } 306 307 APInt readWideAPInt(ArrayRef<uint64_t> Vals, unsigned TypeBits); 308 309 const std::error_category &BitcodeErrorCategory(); 310 enum class BitcodeError { CorruptedBitcode = 1 }; 311 inline std::error_code make_error_code(BitcodeError E) { 312 return std::error_code(static_cast<int>(E), BitcodeErrorCategory()); 313 } 314 315 } // end namespace llvm 316 317 namespace std { 318 319 template <> struct is_error_code_enum<llvm::BitcodeError> : std::true_type {}; 320 321 } // end namespace std 322 323 #endif // LLVM_BITCODE_BITCODEREADER_H 324