1 //===- ModuleFile.h - Module file description -------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the Module class, which describes a module that has 10 // been loaded from an AST file. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_CLANG_SERIALIZATION_MODULEFILE_H 15 #define LLVM_CLANG_SERIALIZATION_MODULEFILE_H 16 17 #include "clang/Basic/FileManager.h" 18 #include "clang/Basic/Module.h" 19 #include "clang/Basic/SourceLocation.h" 20 #include "clang/Serialization/ASTBitCodes.h" 21 #include "clang/Serialization/ContinuousRangeMap.h" 22 #include "clang/Serialization/ModuleFileExtension.h" 23 #include "llvm/ADT/BitVector.h" 24 #include "llvm/ADT/DenseMap.h" 25 #include "llvm/ADT/PointerIntPair.h" 26 #include "llvm/ADT/SetVector.h" 27 #include "llvm/ADT/SmallVector.h" 28 #include "llvm/ADT/StringRef.h" 29 #include "llvm/Bitstream/BitstreamReader.h" 30 #include "llvm/Support/Endian.h" 31 #include <cassert> 32 #include <cstdint> 33 #include <memory> 34 #include <string> 35 #include <vector> 36 37 namespace clang { 38 39 namespace serialization { 40 41 /// Specifies the kind of module that has been loaded. 42 enum ModuleKind { 43 /// File is an implicitly-loaded module. 44 MK_ImplicitModule, 45 46 /// File is an explicitly-loaded module. 47 MK_ExplicitModule, 48 49 /// File is a PCH file treated as such. 50 MK_PCH, 51 52 /// File is a PCH file treated as the preamble. 53 MK_Preamble, 54 55 /// File is a PCH file treated as the actual main file. 56 MK_MainFile, 57 58 /// File is from a prebuilt module path. 59 MK_PrebuiltModule 60 }; 61 62 /// The input file that has been loaded from this AST file, along with 63 /// bools indicating whether this was an overridden buffer or if it was 64 /// out-of-date or not-found. 65 class InputFile { 66 enum { 67 Overridden = 1, 68 OutOfDate = 2, 69 NotFound = 3 70 }; 71 llvm::PointerIntPair<const FileEntryRef::MapEntry *, 2, unsigned> Val; 72 73 public: 74 InputFile() = default; 75 76 InputFile(FileEntryRef File, bool isOverridden = false, 77 bool isOutOfDate = false) { 78 assert(!(isOverridden && isOutOfDate) && 79 "an overridden cannot be out-of-date"); 80 unsigned intVal = 0; 81 if (isOverridden) 82 intVal = Overridden; 83 else if (isOutOfDate) 84 intVal = OutOfDate; 85 Val.setPointerAndInt(&File.getMapEntry(), intVal); 86 } 87 88 static InputFile getNotFound() { 89 InputFile File; 90 File.Val.setInt(NotFound); 91 return File; 92 } 93 94 OptionalFileEntryRefDegradesToFileEntryPtr getFile() const { 95 if (auto *P = Val.getPointer()) 96 return FileEntryRef(*P); 97 return None; 98 } 99 bool isOverridden() const { return Val.getInt() == Overridden; } 100 bool isOutOfDate() const { return Val.getInt() == OutOfDate; } 101 bool isNotFound() const { return Val.getInt() == NotFound; } 102 }; 103 104 /// Information about a module that has been loaded by the ASTReader. 105 /// 106 /// Each instance of the Module class corresponds to a single AST file, which 107 /// may be a precompiled header, precompiled preamble, a module, or an AST file 108 /// of some sort loaded as the main file, all of which are specific formulations 109 /// of the general notion of a "module". A module may depend on any number of 110 /// other modules. 111 class ModuleFile { 112 public: 113 ModuleFile(ModuleKind Kind, unsigned Generation) 114 : Kind(Kind), Generation(Generation) {} 115 ~ModuleFile(); 116 117 // === General information === 118 119 /// The index of this module in the list of modules. 120 unsigned Index = 0; 121 122 /// The type of this module. 123 ModuleKind Kind; 124 125 /// The file name of the module file. 126 std::string FileName; 127 128 /// The name of the module. 129 std::string ModuleName; 130 131 /// The base directory of the module. 132 std::string BaseDirectory; 133 134 std::string getTimestampFilename() const { 135 return FileName + ".timestamp"; 136 } 137 138 /// The original source file name that was used to build the 139 /// primary AST file, which may have been modified for 140 /// relocatable-pch support. 141 std::string OriginalSourceFileName; 142 143 /// The actual original source file name that was used to 144 /// build this AST file. 145 std::string ActualOriginalSourceFileName; 146 147 /// The file ID for the original source file that was used to 148 /// build this AST file. 149 FileID OriginalSourceFileID; 150 151 /// The directory that the PCH was originally created in. Used to 152 /// allow resolving headers even after headers+PCH was moved to a new path. 153 std::string OriginalDir; 154 155 std::string ModuleMapPath; 156 157 /// Whether this precompiled header is a relocatable PCH file. 158 bool RelocatablePCH = false; 159 160 /// Whether timestamps are included in this module file. 161 bool HasTimestamps = false; 162 163 /// Whether the top-level module has been read from the AST file. 164 bool DidReadTopLevelSubmodule = false; 165 166 /// The file entry for the module file. 167 OptionalFileEntryRefDegradesToFileEntryPtr File; 168 169 /// The signature of the module file, which may be used instead of the size 170 /// and modification time to identify this particular file. 171 ASTFileSignature Signature; 172 173 /// The signature of the AST block of the module file, this can be used to 174 /// unique module files based on AST contents. 175 ASTFileSignature ASTBlockHash; 176 177 /// The bit vector denoting usage of each header search entry (true = used). 178 llvm::BitVector SearchPathUsage; 179 180 /// Whether this module has been directly imported by the 181 /// user. 182 bool DirectlyImported = false; 183 184 /// The generation of which this module file is a part. 185 unsigned Generation; 186 187 /// The memory buffer that stores the data associated with 188 /// this AST file, owned by the InMemoryModuleCache. 189 llvm::MemoryBuffer *Buffer; 190 191 /// The size of this file, in bits. 192 uint64_t SizeInBits = 0; 193 194 /// The global bit offset (or base) of this module 195 uint64_t GlobalBitOffset = 0; 196 197 /// The bit offset of the AST block of this module. 198 uint64_t ASTBlockStartOffset = 0; 199 200 /// The serialized bitstream data for this file. 201 StringRef Data; 202 203 /// The main bitstream cursor for the main block. 204 llvm::BitstreamCursor Stream; 205 206 /// The source location where the module was explicitly or implicitly 207 /// imported in the local translation unit. 208 /// 209 /// If module A depends on and imports module B, both modules will have the 210 /// same DirectImportLoc, but different ImportLoc (B's ImportLoc will be a 211 /// source location inside module A). 212 /// 213 /// WARNING: This is largely useless. It doesn't tell you when a module was 214 /// made visible, just when the first submodule of that module was imported. 215 SourceLocation DirectImportLoc; 216 217 /// The source location where this module was first imported. 218 SourceLocation ImportLoc; 219 220 /// The first source location in this module. 221 SourceLocation FirstLoc; 222 223 /// The list of extension readers that are attached to this module 224 /// file. 225 std::vector<std::unique_ptr<ModuleFileExtensionReader>> ExtensionReaders; 226 227 /// The module offset map data for this file. If non-empty, the various 228 /// ContinuousRangeMaps described below have not yet been populated. 229 StringRef ModuleOffsetMap; 230 231 // === Input Files === 232 233 /// The cursor to the start of the input-files block. 234 llvm::BitstreamCursor InputFilesCursor; 235 236 /// Offsets for all of the input file entries in the AST file. 237 const llvm::support::unaligned_uint64_t *InputFileOffsets = nullptr; 238 239 /// The input files that have been loaded from this AST file. 240 std::vector<InputFile> InputFilesLoaded; 241 242 // All user input files reside at the index range [0, NumUserInputFiles), and 243 // system input files reside at [NumUserInputFiles, InputFilesLoaded.size()). 244 unsigned NumUserInputFiles = 0; 245 246 /// If non-zero, specifies the time when we last validated input 247 /// files. Zero means we never validated them. 248 /// 249 /// The time is specified in seconds since the start of the Epoch. 250 uint64_t InputFilesValidationTimestamp = 0; 251 252 // === Source Locations === 253 254 /// Cursor used to read source location entries. 255 llvm::BitstreamCursor SLocEntryCursor; 256 257 /// The bit offset to the start of the SOURCE_MANAGER_BLOCK. 258 uint64_t SourceManagerBlockStartOffset = 0; 259 260 /// The number of source location entries in this AST file. 261 unsigned LocalNumSLocEntries = 0; 262 263 /// The base ID in the source manager's view of this module. 264 int SLocEntryBaseID = 0; 265 266 /// The base offset in the source manager's view of this module. 267 SourceLocation::UIntTy SLocEntryBaseOffset = 0; 268 269 /// Base file offset for the offsets in SLocEntryOffsets. Real file offset 270 /// for the entry is SLocEntryOffsetsBase + SLocEntryOffsets[i]. 271 uint64_t SLocEntryOffsetsBase = 0; 272 273 /// Offsets for all of the source location entries in the 274 /// AST file. 275 const uint32_t *SLocEntryOffsets = nullptr; 276 277 /// SLocEntries that we're going to preload. 278 SmallVector<uint64_t, 4> PreloadSLocEntries; 279 280 /// Remapping table for source locations in this module. 281 ContinuousRangeMap<SourceLocation::UIntTy, SourceLocation::IntTy, 2> 282 SLocRemap; 283 284 // === Identifiers === 285 286 /// The number of identifiers in this AST file. 287 unsigned LocalNumIdentifiers = 0; 288 289 /// Offsets into the identifier table data. 290 /// 291 /// This array is indexed by the identifier ID (-1), and provides 292 /// the offset into IdentifierTableData where the string data is 293 /// stored. 294 const uint32_t *IdentifierOffsets = nullptr; 295 296 /// Base identifier ID for identifiers local to this module. 297 serialization::IdentID BaseIdentifierID = 0; 298 299 /// Remapping table for identifier IDs in this module. 300 ContinuousRangeMap<uint32_t, int, 2> IdentifierRemap; 301 302 /// Actual data for the on-disk hash table of identifiers. 303 /// 304 /// This pointer points into a memory buffer, where the on-disk hash 305 /// table for identifiers actually lives. 306 const unsigned char *IdentifierTableData = nullptr; 307 308 /// A pointer to an on-disk hash table of opaque type 309 /// IdentifierHashTable. 310 void *IdentifierLookupTable = nullptr; 311 312 /// Offsets of identifiers that we're going to preload within 313 /// IdentifierTableData. 314 std::vector<unsigned> PreloadIdentifierOffsets; 315 316 // === Macros === 317 318 /// The cursor to the start of the preprocessor block, which stores 319 /// all of the macro definitions. 320 llvm::BitstreamCursor MacroCursor; 321 322 /// The number of macros in this AST file. 323 unsigned LocalNumMacros = 0; 324 325 /// Base file offset for the offsets in MacroOffsets. Real file offset for 326 /// the entry is MacroOffsetsBase + MacroOffsets[i]. 327 uint64_t MacroOffsetsBase = 0; 328 329 /// Offsets of macros in the preprocessor block. 330 /// 331 /// This array is indexed by the macro ID (-1), and provides 332 /// the offset into the preprocessor block where macro definitions are 333 /// stored. 334 const uint32_t *MacroOffsets = nullptr; 335 336 /// Base macro ID for macros local to this module. 337 serialization::MacroID BaseMacroID = 0; 338 339 /// Remapping table for macro IDs in this module. 340 ContinuousRangeMap<uint32_t, int, 2> MacroRemap; 341 342 /// The offset of the start of the set of defined macros. 343 uint64_t MacroStartOffset = 0; 344 345 // === Detailed PreprocessingRecord === 346 347 /// The cursor to the start of the (optional) detailed preprocessing 348 /// record block. 349 llvm::BitstreamCursor PreprocessorDetailCursor; 350 351 /// The offset of the start of the preprocessor detail cursor. 352 uint64_t PreprocessorDetailStartOffset = 0; 353 354 /// Base preprocessed entity ID for preprocessed entities local to 355 /// this module. 356 serialization::PreprocessedEntityID BasePreprocessedEntityID = 0; 357 358 /// Remapping table for preprocessed entity IDs in this module. 359 ContinuousRangeMap<uint32_t, int, 2> PreprocessedEntityRemap; 360 361 const PPEntityOffset *PreprocessedEntityOffsets = nullptr; 362 unsigned NumPreprocessedEntities = 0; 363 364 /// Base ID for preprocessed skipped ranges local to this module. 365 unsigned BasePreprocessedSkippedRangeID = 0; 366 367 const PPSkippedRange *PreprocessedSkippedRangeOffsets = nullptr; 368 unsigned NumPreprocessedSkippedRanges = 0; 369 370 // === Header search information === 371 372 /// The number of local HeaderFileInfo structures. 373 unsigned LocalNumHeaderFileInfos = 0; 374 375 /// Actual data for the on-disk hash table of header file 376 /// information. 377 /// 378 /// This pointer points into a memory buffer, where the on-disk hash 379 /// table for header file information actually lives. 380 const char *HeaderFileInfoTableData = nullptr; 381 382 /// The on-disk hash table that contains information about each of 383 /// the header files. 384 void *HeaderFileInfoTable = nullptr; 385 386 // === Submodule information === 387 388 /// The number of submodules in this module. 389 unsigned LocalNumSubmodules = 0; 390 391 /// Base submodule ID for submodules local to this module. 392 serialization::SubmoduleID BaseSubmoduleID = 0; 393 394 /// Remapping table for submodule IDs in this module. 395 ContinuousRangeMap<uint32_t, int, 2> SubmoduleRemap; 396 397 // === Selectors === 398 399 /// The number of selectors new to this file. 400 /// 401 /// This is the number of entries in SelectorOffsets. 402 unsigned LocalNumSelectors = 0; 403 404 /// Offsets into the selector lookup table's data array 405 /// where each selector resides. 406 const uint32_t *SelectorOffsets = nullptr; 407 408 /// Base selector ID for selectors local to this module. 409 serialization::SelectorID BaseSelectorID = 0; 410 411 /// Remapping table for selector IDs in this module. 412 ContinuousRangeMap<uint32_t, int, 2> SelectorRemap; 413 414 /// A pointer to the character data that comprises the selector table 415 /// 416 /// The SelectorOffsets table refers into this memory. 417 const unsigned char *SelectorLookupTableData = nullptr; 418 419 /// A pointer to an on-disk hash table of opaque type 420 /// ASTSelectorLookupTable. 421 /// 422 /// This hash table provides the IDs of all selectors, and the associated 423 /// instance and factory methods. 424 void *SelectorLookupTable = nullptr; 425 426 // === Declarations === 427 428 /// DeclsCursor - This is a cursor to the start of the DECLTYPES_BLOCK block. 429 /// It has read all the abbreviations at the start of the block and is ready 430 /// to jump around with these in context. 431 llvm::BitstreamCursor DeclsCursor; 432 433 /// The offset to the start of the DECLTYPES_BLOCK block. 434 uint64_t DeclsBlockStartOffset = 0; 435 436 /// The number of declarations in this AST file. 437 unsigned LocalNumDecls = 0; 438 439 /// Offset of each declaration within the bitstream, indexed 440 /// by the declaration ID (-1). 441 const DeclOffset *DeclOffsets = nullptr; 442 443 /// Base declaration ID for declarations local to this module. 444 serialization::DeclID BaseDeclID = 0; 445 446 /// Remapping table for declaration IDs in this module. 447 ContinuousRangeMap<uint32_t, int, 2> DeclRemap; 448 449 /// Mapping from the module files that this module file depends on 450 /// to the base declaration ID for that module as it is understood within this 451 /// module. 452 /// 453 /// This is effectively a reverse global-to-local mapping for declaration 454 /// IDs, so that we can interpret a true global ID (for this translation unit) 455 /// as a local ID (for this module file). 456 llvm::DenseMap<ModuleFile *, serialization::DeclID> GlobalToLocalDeclIDs; 457 458 /// Array of file-level DeclIDs sorted by file. 459 const serialization::DeclID *FileSortedDecls = nullptr; 460 unsigned NumFileSortedDecls = 0; 461 462 /// Array of category list location information within this 463 /// module file, sorted by the definition ID. 464 const serialization::ObjCCategoriesInfo *ObjCCategoriesMap = nullptr; 465 466 /// The number of redeclaration info entries in ObjCCategoriesMap. 467 unsigned LocalNumObjCCategoriesInMap = 0; 468 469 /// The Objective-C category lists for categories known to this 470 /// module. 471 SmallVector<uint64_t, 1> ObjCCategories; 472 473 // === Types === 474 475 /// The number of types in this AST file. 476 unsigned LocalNumTypes = 0; 477 478 /// Offset of each type within the bitstream, indexed by the 479 /// type ID, or the representation of a Type*. 480 const UnderalignedInt64 *TypeOffsets = nullptr; 481 482 /// Base type ID for types local to this module as represented in 483 /// the global type ID space. 484 serialization::TypeID BaseTypeIndex = 0; 485 486 /// Remapping table for type IDs in this module. 487 ContinuousRangeMap<uint32_t, int, 2> TypeRemap; 488 489 // === Miscellaneous === 490 491 /// Diagnostic IDs and their mappings that the user changed. 492 SmallVector<uint64_t, 8> PragmaDiagMappings; 493 494 /// List of modules which depend on this module 495 llvm::SetVector<ModuleFile *> ImportedBy; 496 497 /// List of modules which this module depends on 498 llvm::SetVector<ModuleFile *> Imports; 499 500 /// Determine whether this module was directly imported at 501 /// any point during translation. 502 bool isDirectlyImported() const { return DirectlyImported; } 503 504 /// Is this a module file for a module (rather than a PCH or similar). 505 bool isModule() const { 506 return Kind == MK_ImplicitModule || Kind == MK_ExplicitModule || 507 Kind == MK_PrebuiltModule; 508 } 509 510 /// Dump debugging output for this module. 511 void dump(); 512 }; 513 514 } // namespace serialization 515 516 } // namespace clang 517 518 #endif // LLVM_CLANG_SERIALIZATION_MODULEFILE_H 519