1 //===- llvm/Bitcode/BitcodeReader.h - Bitcode reader ------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This header defines interfaces to read LLVM bitcode files/streams. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef LLVM_BITCODE_BITCODEREADER_H 14 #define LLVM_BITCODE_BITCODEREADER_H 15 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/StringRef.h" 18 #include "llvm/Bitstream/BitCodes.h" 19 #include "llvm/IR/ModuleSummaryIndex.h" 20 #include "llvm/Support/Endian.h" 21 #include "llvm/Support/Error.h" 22 #include "llvm/Support/ErrorOr.h" 23 #include "llvm/Support/MemoryBuffer.h" 24 #include <cstdint> 25 #include <memory> 26 #include <string> 27 #include <system_error> 28 #include <vector> 29 namespace llvm { 30 31 class LLVMContext; 32 class Module; 33 34 typedef llvm::function_ref<Optional<std::string>(StringRef)> 35 DataLayoutCallbackTy; 36 37 // These functions are for converting Expected/Error values to 38 // ErrorOr/std::error_code for compatibility with legacy clients. FIXME: 39 // Remove these functions once no longer needed by the C and libLTO APIs. 40 41 std::error_code errorToErrorCodeAndEmitErrors(LLVMContext &Ctx, Error Err); 42 43 template <typename T> expectedToErrorOrAndEmitErrors(LLVMContext & Ctx,Expected<T> Val)44 ErrorOr<T> expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected<T> Val) { 45 if (!Val) 46 return errorToErrorCodeAndEmitErrors(Ctx, Val.takeError()); 47 return std::move(*Val); 48 } 49 50 struct BitcodeFileContents; 51 52 /// Basic information extracted from a bitcode module to be used for LTO. 53 struct BitcodeLTOInfo { 54 bool IsThinLTO; 55 bool HasSummary; 56 bool EnableSplitLTOUnit; 57 }; 58 59 /// Represents a module in a bitcode file. 60 class BitcodeModule { 61 // This covers the identification (if present) and module blocks. 62 ArrayRef<uint8_t> Buffer; 63 StringRef ModuleIdentifier; 64 65 // The string table used to interpret this module. 66 StringRef Strtab; 67 68 // The bitstream location of the IDENTIFICATION_BLOCK. 69 uint64_t IdentificationBit; 70 71 // The bitstream location of this module's MODULE_BLOCK. 72 uint64_t ModuleBit; 73 BitcodeModule(ArrayRef<uint8_t> Buffer,StringRef ModuleIdentifier,uint64_t IdentificationBit,uint64_t ModuleBit)74 BitcodeModule(ArrayRef<uint8_t> Buffer, StringRef ModuleIdentifier, 75 uint64_t IdentificationBit, uint64_t ModuleBit) 76 : Buffer(Buffer), ModuleIdentifier(ModuleIdentifier), 77 IdentificationBit(IdentificationBit), ModuleBit(ModuleBit) {} 78 79 // Calls the ctor. 80 friend Expected<BitcodeFileContents> 81 getBitcodeFileContents(MemoryBufferRef Buffer); 82 83 Expected<std::unique_ptr<Module>> 84 getModuleImpl(LLVMContext &Context, bool MaterializeAll, 85 bool ShouldLazyLoadMetadata, bool IsImporting, 86 DataLayoutCallbackTy DataLayoutCallback); 87 88 public: getBuffer()89 StringRef getBuffer() const { 90 return StringRef((const char *)Buffer.begin(), Buffer.size()); 91 } 92 getStrtab()93 StringRef getStrtab() const { return Strtab; } 94 getModuleIdentifier()95 StringRef getModuleIdentifier() const { return ModuleIdentifier; } 96 97 /// Read the bitcode module and prepare for lazy deserialization of function 98 /// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well. 99 /// If IsImporting is true, this module is being parsed for ThinLTO 100 /// importing into another module. 101 Expected<std::unique_ptr<Module>> getLazyModule(LLVMContext &Context, 102 bool ShouldLazyLoadMetadata, 103 bool IsImporting); 104 105 /// Read the entire bitcode module and return it. 106 Expected<std::unique_ptr<Module>> parseModule( 107 LLVMContext &Context, DataLayoutCallbackTy DataLayoutCallback = 108 [](StringRef) { return None; }); 109 110 /// Returns information about the module to be used for LTO: whether to 111 /// compile with ThinLTO, and whether it has a summary. 112 Expected<BitcodeLTOInfo> getLTOInfo(); 113 114 /// Parse the specified bitcode buffer, returning the module summary index. 115 Expected<std::unique_ptr<ModuleSummaryIndex>> getSummary(); 116 117 /// Parse the specified bitcode buffer and merge its module summary index 118 /// into CombinedIndex. 119 Error readSummary(ModuleSummaryIndex &CombinedIndex, StringRef ModulePath, 120 uint64_t ModuleId); 121 }; 122 123 struct BitcodeFileContents { 124 std::vector<BitcodeModule> Mods; 125 StringRef Symtab, StrtabForSymtab; 126 }; 127 128 /// Returns the contents of a bitcode file. This includes the raw contents of 129 /// the symbol table embedded in the bitcode file. Clients which require a 130 /// symbol table should prefer to use irsymtab::read instead of this function 131 /// because it creates a reader for the irsymtab and handles upgrading bitcode 132 /// files without a symbol table or with an old symbol table. 133 Expected<BitcodeFileContents> getBitcodeFileContents(MemoryBufferRef Buffer); 134 135 /// Returns a list of modules in the specified bitcode buffer. 136 Expected<std::vector<BitcodeModule>> 137 getBitcodeModuleList(MemoryBufferRef Buffer); 138 139 /// Read the header of the specified bitcode buffer and prepare for lazy 140 /// deserialization of function bodies. If ShouldLazyLoadMetadata is true, 141 /// lazily load metadata as well. If IsImporting is true, this module is 142 /// being parsed for ThinLTO importing into another module. 143 Expected<std::unique_ptr<Module>> 144 getLazyBitcodeModule(MemoryBufferRef Buffer, LLVMContext &Context, 145 bool ShouldLazyLoadMetadata = false, 146 bool IsImporting = false); 147 148 /// Like getLazyBitcodeModule, except that the module takes ownership of 149 /// the memory buffer if successful. If successful, this moves Buffer. On 150 /// error, this *does not* move Buffer. If IsImporting is true, this module is 151 /// being parsed for ThinLTO importing into another module. 152 Expected<std::unique_ptr<Module>> getOwningLazyBitcodeModule( 153 std::unique_ptr<MemoryBuffer> &&Buffer, LLVMContext &Context, 154 bool ShouldLazyLoadMetadata = false, bool IsImporting = false); 155 156 /// Read the header of the specified bitcode buffer and extract just the 157 /// triple information. If successful, this returns a string. On error, this 158 /// returns "". 159 Expected<std::string> getBitcodeTargetTriple(MemoryBufferRef Buffer); 160 161 /// Return true if \p Buffer contains a bitcode file with ObjC code (category 162 /// or class) in it. 163 Expected<bool> isBitcodeContainingObjCCategory(MemoryBufferRef Buffer); 164 165 /// Read the header of the specified bitcode buffer and extract just the 166 /// producer string information. If successful, this returns a string. On 167 /// error, this returns "". 168 Expected<std::string> getBitcodeProducerString(MemoryBufferRef Buffer); 169 170 /// Read the specified bitcode file, returning the module. 171 Expected<std::unique_ptr<Module>> parseBitcodeFile( 172 MemoryBufferRef Buffer, LLVMContext &Context, 173 DataLayoutCallbackTy DataLayoutCallback = [](StringRef) { 174 return None; 175 }); 176 177 /// Returns LTO information for the specified bitcode file. 178 Expected<BitcodeLTOInfo> getBitcodeLTOInfo(MemoryBufferRef Buffer); 179 180 /// Parse the specified bitcode buffer, returning the module summary index. 181 Expected<std::unique_ptr<ModuleSummaryIndex>> 182 getModuleSummaryIndex(MemoryBufferRef Buffer); 183 184 /// Parse the specified bitcode buffer and merge the index into CombinedIndex. 185 Error readModuleSummaryIndex(MemoryBufferRef Buffer, 186 ModuleSummaryIndex &CombinedIndex, 187 uint64_t ModuleId); 188 189 /// Parse the module summary index out of an IR file and return the module 190 /// summary index object if found, or an empty summary if not. If Path refers 191 /// to an empty file and IgnoreEmptyThinLTOIndexFile is true, then 192 /// this function will return nullptr. 193 Expected<std::unique_ptr<ModuleSummaryIndex>> 194 getModuleSummaryIndexForFile(StringRef Path, 195 bool IgnoreEmptyThinLTOIndexFile = false); 196 197 /// isBitcodeWrapper - Return true if the given bytes are the magic bytes 198 /// for an LLVM IR bitcode wrapper. isBitcodeWrapper(const unsigned char * BufPtr,const unsigned char * BufEnd)199 inline bool isBitcodeWrapper(const unsigned char *BufPtr, 200 const unsigned char *BufEnd) { 201 // See if you can find the hidden message in the magic bytes :-). 202 // (Hint: it's a little-endian encoding.) 203 return BufPtr != BufEnd && 204 BufPtr[0] == 0xDE && 205 BufPtr[1] == 0xC0 && 206 BufPtr[2] == 0x17 && 207 BufPtr[3] == 0x0B; 208 } 209 210 /// isRawBitcode - Return true if the given bytes are the magic bytes for 211 /// raw LLVM IR bitcode (without a wrapper). isRawBitcode(const unsigned char * BufPtr,const unsigned char * BufEnd)212 inline bool isRawBitcode(const unsigned char *BufPtr, 213 const unsigned char *BufEnd) { 214 // These bytes sort of have a hidden message, but it's not in 215 // little-endian this time, and it's a little redundant. 216 return BufPtr != BufEnd && 217 BufPtr[0] == 'B' && 218 BufPtr[1] == 'C' && 219 BufPtr[2] == 0xc0 && 220 BufPtr[3] == 0xde; 221 } 222 223 /// isBitcode - Return true if the given bytes are the magic bytes for 224 /// LLVM IR bitcode, either with or without a wrapper. isBitcode(const unsigned char * BufPtr,const unsigned char * BufEnd)225 inline bool isBitcode(const unsigned char *BufPtr, 226 const unsigned char *BufEnd) { 227 return isBitcodeWrapper(BufPtr, BufEnd) || 228 isRawBitcode(BufPtr, BufEnd); 229 } 230 231 /// SkipBitcodeWrapperHeader - Some systems wrap bc files with a special 232 /// header for padding or other reasons. The format of this header is: 233 /// 234 /// struct bc_header { 235 /// uint32_t Magic; // 0x0B17C0DE 236 /// uint32_t Version; // Version, currently always 0. 237 /// uint32_t BitcodeOffset; // Offset to traditional bitcode file. 238 /// uint32_t BitcodeSize; // Size of traditional bitcode file. 239 /// ... potentially other gunk ... 240 /// }; 241 /// 242 /// This function is called when we find a file with a matching magic number. 243 /// In this case, skip down to the subsection of the file that is actually a 244 /// BC file. 245 /// If 'VerifyBufferSize' is true, check that the buffer is large enough to 246 /// contain the whole bitcode file. SkipBitcodeWrapperHeader(const unsigned char * & BufPtr,const unsigned char * & BufEnd,bool VerifyBufferSize)247 inline bool SkipBitcodeWrapperHeader(const unsigned char *&BufPtr, 248 const unsigned char *&BufEnd, 249 bool VerifyBufferSize) { 250 // Must contain the offset and size field! 251 if (unsigned(BufEnd - BufPtr) < BWH_SizeField + 4) 252 return true; 253 254 unsigned Offset = support::endian::read32le(&BufPtr[BWH_OffsetField]); 255 unsigned Size = support::endian::read32le(&BufPtr[BWH_SizeField]); 256 uint64_t BitcodeOffsetEnd = (uint64_t)Offset + (uint64_t)Size; 257 258 // Verify that Offset+Size fits in the file. 259 if (VerifyBufferSize && BitcodeOffsetEnd > uint64_t(BufEnd-BufPtr)) 260 return true; 261 BufPtr += Offset; 262 BufEnd = BufPtr+Size; 263 return false; 264 } 265 266 APInt readWideAPInt(ArrayRef<uint64_t> Vals, unsigned TypeBits); 267 268 const std::error_category &BitcodeErrorCategory(); 269 enum class BitcodeError { CorruptedBitcode = 1 }; make_error_code(BitcodeError E)270 inline std::error_code make_error_code(BitcodeError E) { 271 return std::error_code(static_cast<int>(E), BitcodeErrorCategory()); 272 } 273 274 } // end namespace llvm 275 276 namespace std { 277 278 template <> struct is_error_code_enum<llvm::BitcodeError> : std::true_type {}; 279 280 } // end namespace std 281 282 #endif // LLVM_BITCODE_BITCODEREADER_H 283