1 //===- llvm/Bitcode/BitcodeReader.h - Bitcode reader ------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This header defines interfaces to read LLVM bitcode files/streams. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef LLVM_BITCODE_BITCODEREADER_H 14 #define LLVM_BITCODE_BITCODEREADER_H 15 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/StringRef.h" 18 #include "llvm/Bitstream/BitCodeEnums.h" 19 #include "llvm/IR/GlobalValue.h" 20 #include "llvm/Support/Endian.h" 21 #include "llvm/Support/Error.h" 22 #include "llvm/Support/ErrorOr.h" 23 #include "llvm/Support/MemoryBufferRef.h" 24 #include <cstdint> 25 #include <memory> 26 #include <optional> 27 #include <string> 28 #include <system_error> 29 #include <vector> 30 namespace llvm { 31 32 class LLVMContext; 33 class Module; 34 class MemoryBuffer; 35 class Metadata; 36 class ModuleSummaryIndex; 37 class Type; 38 class Value; 39 40 // Callback to override the data layout string of an imported bitcode module. 41 // The first argument is the target triple, the second argument the data layout 42 // string from the input, or a default string. It will be used if the callback 43 // returns std::nullopt. 44 typedef std::function<std::optional<std::string>(StringRef, StringRef)> 45 DataLayoutCallbackFuncTy; 46 47 typedef std::function<Type *(unsigned)> GetTypeByIDTy; 48 49 typedef std::function<unsigned(unsigned, unsigned)> GetContainedTypeIDTy; 50 51 typedef std::function<void(Value *, unsigned, GetTypeByIDTy, 52 GetContainedTypeIDTy)> 53 ValueTypeCallbackTy; 54 55 typedef std::function<void(Metadata **, unsigned, GetTypeByIDTy, 56 GetContainedTypeIDTy)> 57 MDTypeCallbackTy; 58 59 // These functions are for converting Expected/Error values to 60 // ErrorOr/std::error_code for compatibility with legacy clients. FIXME: 61 // Remove these functions once no longer needed by the C and libLTO APIs. 62 63 std::error_code errorToErrorCodeAndEmitErrors(LLVMContext &Ctx, Error Err); 64 65 template <typename T> 66 ErrorOr<T> expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected<T> Val) { 67 if (!Val) 68 return errorToErrorCodeAndEmitErrors(Ctx, Val.takeError()); 69 return std::move(*Val); 70 } 71 72 struct ParserCallbacks { 73 std::optional<DataLayoutCallbackFuncTy> DataLayout; 74 /// The ValueType callback is called for every function definition or 75 /// declaration and allows accessing the type information, also behind 76 /// pointers. This can be useful, when the opaque pointer upgrade cleans all 77 /// type information behind pointers. 78 /// The second argument to ValueTypeCallback is the type ID of the 79 /// function, the two passed functions can be used to extract type 80 /// information. 81 std::optional<ValueTypeCallbackTy> ValueType; 82 /// The MDType callback is called for every value in metadata. 83 std::optional<MDTypeCallbackTy> MDType; 84 85 ParserCallbacks() = default; 86 explicit ParserCallbacks(DataLayoutCallbackFuncTy DataLayout) 87 : DataLayout(DataLayout) {} 88 }; 89 90 struct BitcodeFileContents; 91 92 /// Basic information extracted from a bitcode module to be used for LTO. 93 struct BitcodeLTOInfo { 94 bool IsThinLTO; 95 bool HasSummary; 96 bool EnableSplitLTOUnit; 97 bool UnifiedLTO; 98 }; 99 100 /// Represents a module in a bitcode file. 101 class BitcodeModule { 102 // This covers the identification (if present) and module blocks. 103 ArrayRef<uint8_t> Buffer; 104 StringRef ModuleIdentifier; 105 106 // The string table used to interpret this module. 107 StringRef Strtab; 108 109 // The bitstream location of the IDENTIFICATION_BLOCK. 110 uint64_t IdentificationBit; 111 112 // The bitstream location of this module's MODULE_BLOCK. 113 uint64_t ModuleBit; 114 115 BitcodeModule(ArrayRef<uint8_t> Buffer, StringRef ModuleIdentifier, 116 uint64_t IdentificationBit, uint64_t ModuleBit) 117 : Buffer(Buffer), ModuleIdentifier(ModuleIdentifier), 118 IdentificationBit(IdentificationBit), ModuleBit(ModuleBit) {} 119 120 // Calls the ctor. 121 friend Expected<BitcodeFileContents> 122 getBitcodeFileContents(MemoryBufferRef Buffer); 123 124 Expected<std::unique_ptr<Module>> 125 getModuleImpl(LLVMContext &Context, bool MaterializeAll, 126 bool ShouldLazyLoadMetadata, bool IsImporting, 127 ParserCallbacks Callbacks = {}); 128 129 public: 130 StringRef getBuffer() const { 131 return StringRef((const char *)Buffer.begin(), Buffer.size()); 132 } 133 134 StringRef getStrtab() const { return Strtab; } 135 136 StringRef getModuleIdentifier() const { return ModuleIdentifier; } 137 138 /// Read the bitcode module and prepare for lazy deserialization of function 139 /// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well. 140 /// If IsImporting is true, this module is being parsed for ThinLTO 141 /// importing into another module. 142 Expected<std::unique_ptr<Module>> 143 getLazyModule(LLVMContext &Context, bool ShouldLazyLoadMetadata, 144 bool IsImporting, ParserCallbacks Callbacks = {}); 145 146 /// Read the entire bitcode module and return it. 147 Expected<std::unique_ptr<Module>> 148 parseModule(LLVMContext &Context, ParserCallbacks Callbacks = {}); 149 150 /// Returns information about the module to be used for LTO: whether to 151 /// compile with ThinLTO, and whether it has a summary. 152 Expected<BitcodeLTOInfo> getLTOInfo(); 153 154 /// Parse the specified bitcode buffer, returning the module summary index. 155 Expected<std::unique_ptr<ModuleSummaryIndex>> getSummary(); 156 157 /// Parse the specified bitcode buffer and merge its module summary index 158 /// into CombinedIndex. 159 Error 160 readSummary(ModuleSummaryIndex &CombinedIndex, StringRef ModulePath, 161 uint64_t ModuleId, 162 std::function<bool(GlobalValue::GUID)> IsPrevailing = nullptr); 163 }; 164 165 struct BitcodeFileContents { 166 std::vector<BitcodeModule> Mods; 167 StringRef Symtab, StrtabForSymtab; 168 }; 169 170 /// Returns the contents of a bitcode file. This includes the raw contents of 171 /// the symbol table embedded in the bitcode file. Clients which require a 172 /// symbol table should prefer to use irsymtab::read instead of this function 173 /// because it creates a reader for the irsymtab and handles upgrading bitcode 174 /// files without a symbol table or with an old symbol table. 175 Expected<BitcodeFileContents> getBitcodeFileContents(MemoryBufferRef Buffer); 176 177 /// Returns a list of modules in the specified bitcode buffer. 178 Expected<std::vector<BitcodeModule>> 179 getBitcodeModuleList(MemoryBufferRef Buffer); 180 181 /// Read the header of the specified bitcode buffer and prepare for lazy 182 /// deserialization of function bodies. If ShouldLazyLoadMetadata is true, 183 /// lazily load metadata as well. If IsImporting is true, this module is 184 /// being parsed for ThinLTO importing into another module. 185 Expected<std::unique_ptr<Module>> 186 getLazyBitcodeModule(MemoryBufferRef Buffer, LLVMContext &Context, 187 bool ShouldLazyLoadMetadata = false, 188 bool IsImporting = false, 189 ParserCallbacks Callbacks = {}); 190 191 /// Like getLazyBitcodeModule, except that the module takes ownership of 192 /// the memory buffer if successful. If successful, this moves Buffer. On 193 /// error, this *does not* move Buffer. If IsImporting is true, this module is 194 /// being parsed for ThinLTO importing into another module. 195 Expected<std::unique_ptr<Module>> getOwningLazyBitcodeModule( 196 std::unique_ptr<MemoryBuffer> &&Buffer, LLVMContext &Context, 197 bool ShouldLazyLoadMetadata = false, bool IsImporting = false, 198 ParserCallbacks Callbacks = {}); 199 200 /// Read the header of the specified bitcode buffer and extract just the 201 /// triple information. If successful, this returns a string. On error, this 202 /// returns "". 203 Expected<std::string> getBitcodeTargetTriple(MemoryBufferRef Buffer); 204 205 /// Return true if \p Buffer contains a bitcode file with ObjC code (category 206 /// or class) in it. 207 Expected<bool> isBitcodeContainingObjCCategory(MemoryBufferRef Buffer); 208 209 /// Read the header of the specified bitcode buffer and extract just the 210 /// producer string information. If successful, this returns a string. On 211 /// error, this returns "". 212 Expected<std::string> getBitcodeProducerString(MemoryBufferRef Buffer); 213 214 /// Read the specified bitcode file, returning the module. 215 Expected<std::unique_ptr<Module>> 216 parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, 217 ParserCallbacks Callbacks = {}); 218 219 /// Returns LTO information for the specified bitcode file. 220 Expected<BitcodeLTOInfo> getBitcodeLTOInfo(MemoryBufferRef Buffer); 221 222 /// Parse the specified bitcode buffer, returning the module summary index. 223 Expected<std::unique_ptr<ModuleSummaryIndex>> 224 getModuleSummaryIndex(MemoryBufferRef Buffer); 225 226 /// Parse the specified bitcode buffer and merge the index into CombinedIndex. 227 Error readModuleSummaryIndex(MemoryBufferRef Buffer, 228 ModuleSummaryIndex &CombinedIndex, 229 uint64_t ModuleId); 230 231 /// Parse the module summary index out of an IR file and return the module 232 /// summary index object if found, or an empty summary if not. If Path refers 233 /// to an empty file and IgnoreEmptyThinLTOIndexFile is true, then 234 /// this function will return nullptr. 235 Expected<std::unique_ptr<ModuleSummaryIndex>> 236 getModuleSummaryIndexForFile(StringRef Path, 237 bool IgnoreEmptyThinLTOIndexFile = false); 238 239 /// isBitcodeWrapper - Return true if the given bytes are the magic bytes 240 /// for an LLVM IR bitcode wrapper. 241 inline bool isBitcodeWrapper(const unsigned char *BufPtr, 242 const unsigned char *BufEnd) { 243 // See if you can find the hidden message in the magic bytes :-). 244 // (Hint: it's a little-endian encoding.) 245 return BufPtr != BufEnd && 246 BufPtr[0] == 0xDE && 247 BufPtr[1] == 0xC0 && 248 BufPtr[2] == 0x17 && 249 BufPtr[3] == 0x0B; 250 } 251 252 /// isRawBitcode - Return true if the given bytes are the magic bytes for 253 /// raw LLVM IR bitcode (without a wrapper). 254 inline bool isRawBitcode(const unsigned char *BufPtr, 255 const unsigned char *BufEnd) { 256 // These bytes sort of have a hidden message, but it's not in 257 // little-endian this time, and it's a little redundant. 258 return BufPtr != BufEnd && 259 BufPtr[0] == 'B' && 260 BufPtr[1] == 'C' && 261 BufPtr[2] == 0xc0 && 262 BufPtr[3] == 0xde; 263 } 264 265 /// isBitcode - Return true if the given bytes are the magic bytes for 266 /// LLVM IR bitcode, either with or without a wrapper. 267 inline bool isBitcode(const unsigned char *BufPtr, 268 const unsigned char *BufEnd) { 269 return isBitcodeWrapper(BufPtr, BufEnd) || 270 isRawBitcode(BufPtr, BufEnd); 271 } 272 273 /// SkipBitcodeWrapperHeader - Some systems wrap bc files with a special 274 /// header for padding or other reasons. The format of this header is: 275 /// 276 /// struct bc_header { 277 /// uint32_t Magic; // 0x0B17C0DE 278 /// uint32_t Version; // Version, currently always 0. 279 /// uint32_t BitcodeOffset; // Offset to traditional bitcode file. 280 /// uint32_t BitcodeSize; // Size of traditional bitcode file. 281 /// ... potentially other gunk ... 282 /// }; 283 /// 284 /// This function is called when we find a file with a matching magic number. 285 /// In this case, skip down to the subsection of the file that is actually a 286 /// BC file. 287 /// If 'VerifyBufferSize' is true, check that the buffer is large enough to 288 /// contain the whole bitcode file. 289 inline bool SkipBitcodeWrapperHeader(const unsigned char *&BufPtr, 290 const unsigned char *&BufEnd, 291 bool VerifyBufferSize) { 292 // Must contain the offset and size field! 293 if (unsigned(BufEnd - BufPtr) < BWH_SizeField + 4) 294 return true; 295 296 unsigned Offset = support::endian::read32le(&BufPtr[BWH_OffsetField]); 297 unsigned Size = support::endian::read32le(&BufPtr[BWH_SizeField]); 298 uint64_t BitcodeOffsetEnd = (uint64_t)Offset + (uint64_t)Size; 299 300 // Verify that Offset+Size fits in the file. 301 if (VerifyBufferSize && BitcodeOffsetEnd > uint64_t(BufEnd-BufPtr)) 302 return true; 303 BufPtr += Offset; 304 BufEnd = BufPtr+Size; 305 return false; 306 } 307 308 APInt readWideAPInt(ArrayRef<uint64_t> Vals, unsigned TypeBits); 309 310 const std::error_category &BitcodeErrorCategory(); 311 enum class BitcodeError { CorruptedBitcode = 1 }; 312 inline std::error_code make_error_code(BitcodeError E) { 313 return std::error_code(static_cast<int>(E), BitcodeErrorCategory()); 314 } 315 316 } // end namespace llvm 317 318 namespace std { 319 320 template <> struct is_error_code_enum<llvm::BitcodeError> : std::true_type {}; 321 322 } // end namespace std 323 324 #endif // LLVM_BITCODE_BITCODEREADER_H 325