1 //===- llvm/Bitcode/BitcodeReader.h - Bitcode reader ------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This header defines interfaces to read LLVM bitcode files/streams. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef LLVM_BITCODE_BITCODEREADER_H 14 #define LLVM_BITCODE_BITCODEREADER_H 15 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/StringRef.h" 18 #include "llvm/Bitstream/BitCodeEnums.h" 19 #include "llvm/Support/Endian.h" 20 #include "llvm/Support/Error.h" 21 #include "llvm/Support/ErrorOr.h" 22 #include "llvm/Support/MemoryBufferRef.h" 23 #include <cstdint> 24 #include <memory> 25 #include <string> 26 #include <system_error> 27 #include <vector> 28 namespace llvm { 29 30 class LLVMContext; 31 class Module; 32 class MemoryBuffer; 33 class ModuleSummaryIndex; 34 35 typedef llvm::function_ref<Optional<std::string>(StringRef)> 36 DataLayoutCallbackTy; 37 38 // These functions are for converting Expected/Error values to 39 // ErrorOr/std::error_code for compatibility with legacy clients. FIXME: 40 // Remove these functions once no longer needed by the C and libLTO APIs. 41 42 std::error_code errorToErrorCodeAndEmitErrors(LLVMContext &Ctx, Error Err); 43 44 template <typename T> 45 ErrorOr<T> expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected<T> Val) { 46 if (!Val) 47 return errorToErrorCodeAndEmitErrors(Ctx, Val.takeError()); 48 return std::move(*Val); 49 } 50 51 struct BitcodeFileContents; 52 53 /// Basic information extracted from a bitcode module to be used for LTO. 54 struct BitcodeLTOInfo { 55 bool IsThinLTO; 56 bool HasSummary; 57 bool EnableSplitLTOUnit; 58 }; 59 60 /// Represents a module in a bitcode file. 61 class BitcodeModule { 62 // This covers the identification (if present) and module blocks. 63 ArrayRef<uint8_t> Buffer; 64 StringRef ModuleIdentifier; 65 66 // The string table used to interpret this module. 67 StringRef Strtab; 68 69 // The bitstream location of the IDENTIFICATION_BLOCK. 70 uint64_t IdentificationBit; 71 72 // The bitstream location of this module's MODULE_BLOCK. 73 uint64_t ModuleBit; 74 75 BitcodeModule(ArrayRef<uint8_t> Buffer, StringRef ModuleIdentifier, 76 uint64_t IdentificationBit, uint64_t ModuleBit) 77 : Buffer(Buffer), ModuleIdentifier(ModuleIdentifier), 78 IdentificationBit(IdentificationBit), ModuleBit(ModuleBit) {} 79 80 // Calls the ctor. 81 friend Expected<BitcodeFileContents> 82 getBitcodeFileContents(MemoryBufferRef Buffer); 83 84 Expected<std::unique_ptr<Module>> 85 getModuleImpl(LLVMContext &Context, bool MaterializeAll, 86 bool ShouldLazyLoadMetadata, bool IsImporting, 87 DataLayoutCallbackTy DataLayoutCallback); 88 89 public: 90 StringRef getBuffer() const { 91 return StringRef((const char *)Buffer.begin(), Buffer.size()); 92 } 93 94 StringRef getStrtab() const { return Strtab; } 95 96 StringRef getModuleIdentifier() const { return ModuleIdentifier; } 97 98 /// Read the bitcode module and prepare for lazy deserialization of function 99 /// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well. 100 /// If IsImporting is true, this module is being parsed for ThinLTO 101 /// importing into another module. 102 Expected<std::unique_ptr<Module>> getLazyModule(LLVMContext &Context, 103 bool ShouldLazyLoadMetadata, 104 bool IsImporting); 105 106 /// Read the entire bitcode module and return it. 107 Expected<std::unique_ptr<Module>> parseModule( 108 LLVMContext &Context, DataLayoutCallbackTy DataLayoutCallback = 109 [](StringRef) { return None; }); 110 111 /// Returns information about the module to be used for LTO: whether to 112 /// compile with ThinLTO, and whether it has a summary. 113 Expected<BitcodeLTOInfo> getLTOInfo(); 114 115 /// Parse the specified bitcode buffer, returning the module summary index. 116 Expected<std::unique_ptr<ModuleSummaryIndex>> getSummary(); 117 118 /// Parse the specified bitcode buffer and merge its module summary index 119 /// into CombinedIndex. 120 Error readSummary(ModuleSummaryIndex &CombinedIndex, StringRef ModulePath, 121 uint64_t ModuleId); 122 }; 123 124 struct BitcodeFileContents { 125 std::vector<BitcodeModule> Mods; 126 StringRef Symtab, StrtabForSymtab; 127 }; 128 129 /// Returns the contents of a bitcode file. This includes the raw contents of 130 /// the symbol table embedded in the bitcode file. Clients which require a 131 /// symbol table should prefer to use irsymtab::read instead of this function 132 /// because it creates a reader for the irsymtab and handles upgrading bitcode 133 /// files without a symbol table or with an old symbol table. 134 Expected<BitcodeFileContents> getBitcodeFileContents(MemoryBufferRef Buffer); 135 136 /// Returns a list of modules in the specified bitcode buffer. 137 Expected<std::vector<BitcodeModule>> 138 getBitcodeModuleList(MemoryBufferRef Buffer); 139 140 /// Read the header of the specified bitcode buffer and prepare for lazy 141 /// deserialization of function bodies. If ShouldLazyLoadMetadata is true, 142 /// lazily load metadata as well. If IsImporting is true, this module is 143 /// being parsed for ThinLTO importing into another module. 144 Expected<std::unique_ptr<Module>> 145 getLazyBitcodeModule(MemoryBufferRef Buffer, LLVMContext &Context, 146 bool ShouldLazyLoadMetadata = false, 147 bool IsImporting = false); 148 149 /// Like getLazyBitcodeModule, except that the module takes ownership of 150 /// the memory buffer if successful. If successful, this moves Buffer. On 151 /// error, this *does not* move Buffer. If IsImporting is true, this module is 152 /// being parsed for ThinLTO importing into another module. 153 Expected<std::unique_ptr<Module>> getOwningLazyBitcodeModule( 154 std::unique_ptr<MemoryBuffer> &&Buffer, LLVMContext &Context, 155 bool ShouldLazyLoadMetadata = false, bool IsImporting = false); 156 157 /// Read the header of the specified bitcode buffer and extract just the 158 /// triple information. If successful, this returns a string. On error, this 159 /// returns "". 160 Expected<std::string> getBitcodeTargetTriple(MemoryBufferRef Buffer); 161 162 /// Return true if \p Buffer contains a bitcode file with ObjC code (category 163 /// or class) in it. 164 Expected<bool> isBitcodeContainingObjCCategory(MemoryBufferRef Buffer); 165 166 /// Read the header of the specified bitcode buffer and extract just the 167 /// producer string information. If successful, this returns a string. On 168 /// error, this returns "". 169 Expected<std::string> getBitcodeProducerString(MemoryBufferRef Buffer); 170 171 /// Read the specified bitcode file, returning the module. 172 Expected<std::unique_ptr<Module>> parseBitcodeFile( 173 MemoryBufferRef Buffer, LLVMContext &Context, 174 DataLayoutCallbackTy DataLayoutCallback = [](StringRef) { 175 return None; 176 }); 177 178 /// Returns LTO information for the specified bitcode file. 179 Expected<BitcodeLTOInfo> getBitcodeLTOInfo(MemoryBufferRef Buffer); 180 181 /// Parse the specified bitcode buffer, returning the module summary index. 182 Expected<std::unique_ptr<ModuleSummaryIndex>> 183 getModuleSummaryIndex(MemoryBufferRef Buffer); 184 185 /// Parse the specified bitcode buffer and merge the index into CombinedIndex. 186 Error readModuleSummaryIndex(MemoryBufferRef Buffer, 187 ModuleSummaryIndex &CombinedIndex, 188 uint64_t ModuleId); 189 190 /// Parse the module summary index out of an IR file and return the module 191 /// summary index object if found, or an empty summary if not. If Path refers 192 /// to an empty file and IgnoreEmptyThinLTOIndexFile is true, then 193 /// this function will return nullptr. 194 Expected<std::unique_ptr<ModuleSummaryIndex>> 195 getModuleSummaryIndexForFile(StringRef Path, 196 bool IgnoreEmptyThinLTOIndexFile = false); 197 198 /// isBitcodeWrapper - Return true if the given bytes are the magic bytes 199 /// for an LLVM IR bitcode wrapper. 200 inline bool isBitcodeWrapper(const unsigned char *BufPtr, 201 const unsigned char *BufEnd) { 202 // See if you can find the hidden message in the magic bytes :-). 203 // (Hint: it's a little-endian encoding.) 204 return BufPtr != BufEnd && 205 BufPtr[0] == 0xDE && 206 BufPtr[1] == 0xC0 && 207 BufPtr[2] == 0x17 && 208 BufPtr[3] == 0x0B; 209 } 210 211 /// isRawBitcode - Return true if the given bytes are the magic bytes for 212 /// raw LLVM IR bitcode (without a wrapper). 213 inline bool isRawBitcode(const unsigned char *BufPtr, 214 const unsigned char *BufEnd) { 215 // These bytes sort of have a hidden message, but it's not in 216 // little-endian this time, and it's a little redundant. 217 return BufPtr != BufEnd && 218 BufPtr[0] == 'B' && 219 BufPtr[1] == 'C' && 220 BufPtr[2] == 0xc0 && 221 BufPtr[3] == 0xde; 222 } 223 224 /// isBitcode - Return true if the given bytes are the magic bytes for 225 /// LLVM IR bitcode, either with or without a wrapper. 226 inline bool isBitcode(const unsigned char *BufPtr, 227 const unsigned char *BufEnd) { 228 return isBitcodeWrapper(BufPtr, BufEnd) || 229 isRawBitcode(BufPtr, BufEnd); 230 } 231 232 /// SkipBitcodeWrapperHeader - Some systems wrap bc files with a special 233 /// header for padding or other reasons. The format of this header is: 234 /// 235 /// struct bc_header { 236 /// uint32_t Magic; // 0x0B17C0DE 237 /// uint32_t Version; // Version, currently always 0. 238 /// uint32_t BitcodeOffset; // Offset to traditional bitcode file. 239 /// uint32_t BitcodeSize; // Size of traditional bitcode file. 240 /// ... potentially other gunk ... 241 /// }; 242 /// 243 /// This function is called when we find a file with a matching magic number. 244 /// In this case, skip down to the subsection of the file that is actually a 245 /// BC file. 246 /// If 'VerifyBufferSize' is true, check that the buffer is large enough to 247 /// contain the whole bitcode file. 248 inline bool SkipBitcodeWrapperHeader(const unsigned char *&BufPtr, 249 const unsigned char *&BufEnd, 250 bool VerifyBufferSize) { 251 // Must contain the offset and size field! 252 if (unsigned(BufEnd - BufPtr) < BWH_SizeField + 4) 253 return true; 254 255 unsigned Offset = support::endian::read32le(&BufPtr[BWH_OffsetField]); 256 unsigned Size = support::endian::read32le(&BufPtr[BWH_SizeField]); 257 uint64_t BitcodeOffsetEnd = (uint64_t)Offset + (uint64_t)Size; 258 259 // Verify that Offset+Size fits in the file. 260 if (VerifyBufferSize && BitcodeOffsetEnd > uint64_t(BufEnd-BufPtr)) 261 return true; 262 BufPtr += Offset; 263 BufEnd = BufPtr+Size; 264 return false; 265 } 266 267 APInt readWideAPInt(ArrayRef<uint64_t> Vals, unsigned TypeBits); 268 269 const std::error_category &BitcodeErrorCategory(); 270 enum class BitcodeError { CorruptedBitcode = 1 }; 271 inline std::error_code make_error_code(BitcodeError E) { 272 return std::error_code(static_cast<int>(E), BitcodeErrorCategory()); 273 } 274 275 } // end namespace llvm 276 277 namespace std { 278 279 template <> struct is_error_code_enum<llvm::BitcodeError> : std::true_type {}; 280 281 } // end namespace std 282 283 #endif // LLVM_BITCODE_BITCODEREADER_H 284