1 //===- ModuleFile.h - Module file description -------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the Module class, which describes a module that has 10 // been loaded from an AST file. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_CLANG_SERIALIZATION_MODULEFILE_H 15 #define LLVM_CLANG_SERIALIZATION_MODULEFILE_H 16 17 #include "clang/Basic/FileManager.h" 18 #include "clang/Basic/Module.h" 19 #include "clang/Basic/SourceLocation.h" 20 #include "clang/Serialization/ASTBitCodes.h" 21 #include "clang/Serialization/ContinuousRangeMap.h" 22 #include "clang/Serialization/ModuleFileExtension.h" 23 #include "llvm/ADT/BitVector.h" 24 #include "llvm/ADT/DenseMap.h" 25 #include "llvm/ADT/PointerIntPair.h" 26 #include "llvm/ADT/SetVector.h" 27 #include "llvm/ADT/SmallVector.h" 28 #include "llvm/ADT/StringRef.h" 29 #include "llvm/Bitstream/BitstreamReader.h" 30 #include "llvm/Support/Endian.h" 31 #include <cassert> 32 #include <cstdint> 33 #include <memory> 34 #include <string> 35 #include <vector> 36 37 namespace clang { 38 39 namespace serialization { 40 41 /// Specifies the kind of module that has been loaded. 42 enum ModuleKind { 43 /// File is an implicitly-loaded module. 44 MK_ImplicitModule, 45 46 /// File is an explicitly-loaded module. 47 MK_ExplicitModule, 48 49 /// File is a PCH file treated as such. 50 MK_PCH, 51 52 /// File is a PCH file treated as the preamble. 53 MK_Preamble, 54 55 /// File is a PCH file treated as the actual main file. 56 MK_MainFile, 57 58 /// File is from a prebuilt module path. 59 MK_PrebuiltModule 60 }; 61 62 /// The input file info that has been loaded from an AST file. 63 struct InputFileInfo { 64 std::string Filename; 65 uint64_t ContentHash; 66 off_t StoredSize; 67 time_t StoredTime; 68 bool Overridden; 69 bool Transient; 70 bool TopLevelModuleMap; 71 }; 72 73 /// The input file that has been loaded from this AST file, along with 74 /// bools indicating whether this was an overridden buffer or if it was 75 /// out-of-date or not-found. 76 class InputFile { 77 enum { 78 Overridden = 1, 79 OutOfDate = 2, 80 NotFound = 3 81 }; 82 llvm::PointerIntPair<const FileEntryRef::MapEntry *, 2, unsigned> Val; 83 84 public: 85 InputFile() = default; 86 87 InputFile(FileEntryRef File, bool isOverridden = false, 88 bool isOutOfDate = false) { 89 assert(!(isOverridden && isOutOfDate) && 90 "an overridden cannot be out-of-date"); 91 unsigned intVal = 0; 92 if (isOverridden) 93 intVal = Overridden; 94 else if (isOutOfDate) 95 intVal = OutOfDate; 96 Val.setPointerAndInt(&File.getMapEntry(), intVal); 97 } 98 getNotFound()99 static InputFile getNotFound() { 100 InputFile File; 101 File.Val.setInt(NotFound); 102 return File; 103 } 104 getFile()105 OptionalFileEntryRefDegradesToFileEntryPtr getFile() const { 106 if (auto *P = Val.getPointer()) 107 return FileEntryRef(*P); 108 return std::nullopt; 109 } isOverridden()110 bool isOverridden() const { return Val.getInt() == Overridden; } isOutOfDate()111 bool isOutOfDate() const { return Val.getInt() == OutOfDate; } isNotFound()112 bool isNotFound() const { return Val.getInt() == NotFound; } 113 }; 114 115 /// Information about a module that has been loaded by the ASTReader. 116 /// 117 /// Each instance of the Module class corresponds to a single AST file, which 118 /// may be a precompiled header, precompiled preamble, a module, or an AST file 119 /// of some sort loaded as the main file, all of which are specific formulations 120 /// of the general notion of a "module". A module may depend on any number of 121 /// other modules. 122 class ModuleFile { 123 public: ModuleFile(ModuleKind Kind,unsigned Generation)124 ModuleFile(ModuleKind Kind, unsigned Generation) 125 : Kind(Kind), Generation(Generation) {} 126 ~ModuleFile(); 127 128 // === General information === 129 130 /// The index of this module in the list of modules. 131 unsigned Index = 0; 132 133 /// The type of this module. 134 ModuleKind Kind; 135 136 /// The file name of the module file. 137 std::string FileName; 138 139 /// The name of the module. 140 std::string ModuleName; 141 142 /// The base directory of the module. 143 std::string BaseDirectory; 144 getTimestampFilename()145 std::string getTimestampFilename() const { 146 return FileName + ".timestamp"; 147 } 148 149 /// The original source file name that was used to build the 150 /// primary AST file, which may have been modified for 151 /// relocatable-pch support. 152 std::string OriginalSourceFileName; 153 154 /// The actual original source file name that was used to 155 /// build this AST file. 156 std::string ActualOriginalSourceFileName; 157 158 /// The file ID for the original source file that was used to 159 /// build this AST file. 160 FileID OriginalSourceFileID; 161 162 std::string ModuleMapPath; 163 164 /// Whether this precompiled header is a relocatable PCH file. 165 bool RelocatablePCH = false; 166 167 /// Whether timestamps are included in this module file. 168 bool HasTimestamps = false; 169 170 /// Whether the top-level module has been read from the AST file. 171 bool DidReadTopLevelSubmodule = false; 172 173 /// The file entry for the module file. 174 OptionalFileEntryRefDegradesToFileEntryPtr File; 175 176 /// The signature of the module file, which may be used instead of the size 177 /// and modification time to identify this particular file. 178 ASTFileSignature Signature; 179 180 /// The signature of the AST block of the module file, this can be used to 181 /// unique module files based on AST contents. 182 ASTFileSignature ASTBlockHash; 183 184 /// The bit vector denoting usage of each header search entry (true = used). 185 llvm::BitVector SearchPathUsage; 186 187 /// Whether this module has been directly imported by the 188 /// user. 189 bool DirectlyImported = false; 190 191 /// The generation of which this module file is a part. 192 unsigned Generation; 193 194 /// The memory buffer that stores the data associated with 195 /// this AST file, owned by the InMemoryModuleCache. 196 llvm::MemoryBuffer *Buffer; 197 198 /// The size of this file, in bits. 199 uint64_t SizeInBits = 0; 200 201 /// The global bit offset (or base) of this module 202 uint64_t GlobalBitOffset = 0; 203 204 /// The bit offset of the AST block of this module. 205 uint64_t ASTBlockStartOffset = 0; 206 207 /// The serialized bitstream data for this file. 208 StringRef Data; 209 210 /// The main bitstream cursor for the main block. 211 llvm::BitstreamCursor Stream; 212 213 /// The source location where the module was explicitly or implicitly 214 /// imported in the local translation unit. 215 /// 216 /// If module A depends on and imports module B, both modules will have the 217 /// same DirectImportLoc, but different ImportLoc (B's ImportLoc will be a 218 /// source location inside module A). 219 /// 220 /// WARNING: This is largely useless. It doesn't tell you when a module was 221 /// made visible, just when the first submodule of that module was imported. 222 SourceLocation DirectImportLoc; 223 224 /// The source location where this module was first imported. 225 SourceLocation ImportLoc; 226 227 /// The first source location in this module. 228 SourceLocation FirstLoc; 229 230 /// The list of extension readers that are attached to this module 231 /// file. 232 std::vector<std::unique_ptr<ModuleFileExtensionReader>> ExtensionReaders; 233 234 /// The module offset map data for this file. If non-empty, the various 235 /// ContinuousRangeMaps described below have not yet been populated. 236 StringRef ModuleOffsetMap; 237 238 // === Input Files === 239 240 /// The cursor to the start of the input-files block. 241 llvm::BitstreamCursor InputFilesCursor; 242 243 /// Offsets for all of the input file entries in the AST file. 244 const llvm::support::unaligned_uint64_t *InputFileOffsets = nullptr; 245 246 /// The input files that have been loaded from this AST file. 247 std::vector<InputFile> InputFilesLoaded; 248 249 /// The input file infos that have been loaded from this AST file. 250 std::vector<InputFileInfo> InputFileInfosLoaded; 251 252 // All user input files reside at the index range [0, NumUserInputFiles), and 253 // system input files reside at [NumUserInputFiles, InputFilesLoaded.size()). 254 unsigned NumUserInputFiles = 0; 255 256 /// If non-zero, specifies the time when we last validated input 257 /// files. Zero means we never validated them. 258 /// 259 /// The time is specified in seconds since the start of the Epoch. 260 uint64_t InputFilesValidationTimestamp = 0; 261 262 // === Source Locations === 263 264 /// Cursor used to read source location entries. 265 llvm::BitstreamCursor SLocEntryCursor; 266 267 /// The bit offset to the start of the SOURCE_MANAGER_BLOCK. 268 uint64_t SourceManagerBlockStartOffset = 0; 269 270 /// The number of source location entries in this AST file. 271 unsigned LocalNumSLocEntries = 0; 272 273 /// The base ID in the source manager's view of this module. 274 int SLocEntryBaseID = 0; 275 276 /// The base offset in the source manager's view of this module. 277 SourceLocation::UIntTy SLocEntryBaseOffset = 0; 278 279 /// Base file offset for the offsets in SLocEntryOffsets. Real file offset 280 /// for the entry is SLocEntryOffsetsBase + SLocEntryOffsets[i]. 281 uint64_t SLocEntryOffsetsBase = 0; 282 283 /// Offsets for all of the source location entries in the 284 /// AST file. 285 const uint32_t *SLocEntryOffsets = nullptr; 286 287 /// SLocEntries that we're going to preload. 288 SmallVector<uint64_t, 4> PreloadSLocEntries; 289 290 /// Remapping table for source locations in this module. 291 ContinuousRangeMap<SourceLocation::UIntTy, SourceLocation::IntTy, 2> 292 SLocRemap; 293 294 // === Identifiers === 295 296 /// The number of identifiers in this AST file. 297 unsigned LocalNumIdentifiers = 0; 298 299 /// Offsets into the identifier table data. 300 /// 301 /// This array is indexed by the identifier ID (-1), and provides 302 /// the offset into IdentifierTableData where the string data is 303 /// stored. 304 const uint32_t *IdentifierOffsets = nullptr; 305 306 /// Base identifier ID for identifiers local to this module. 307 serialization::IdentID BaseIdentifierID = 0; 308 309 /// Remapping table for identifier IDs in this module. 310 ContinuousRangeMap<uint32_t, int, 2> IdentifierRemap; 311 312 /// Actual data for the on-disk hash table of identifiers. 313 /// 314 /// This pointer points into a memory buffer, where the on-disk hash 315 /// table for identifiers actually lives. 316 const unsigned char *IdentifierTableData = nullptr; 317 318 /// A pointer to an on-disk hash table of opaque type 319 /// IdentifierHashTable. 320 void *IdentifierLookupTable = nullptr; 321 322 /// Offsets of identifiers that we're going to preload within 323 /// IdentifierTableData. 324 std::vector<unsigned> PreloadIdentifierOffsets; 325 326 // === Macros === 327 328 /// The cursor to the start of the preprocessor block, which stores 329 /// all of the macro definitions. 330 llvm::BitstreamCursor MacroCursor; 331 332 /// The number of macros in this AST file. 333 unsigned LocalNumMacros = 0; 334 335 /// Base file offset for the offsets in MacroOffsets. Real file offset for 336 /// the entry is MacroOffsetsBase + MacroOffsets[i]. 337 uint64_t MacroOffsetsBase = 0; 338 339 /// Offsets of macros in the preprocessor block. 340 /// 341 /// This array is indexed by the macro ID (-1), and provides 342 /// the offset into the preprocessor block where macro definitions are 343 /// stored. 344 const uint32_t *MacroOffsets = nullptr; 345 346 /// Base macro ID for macros local to this module. 347 serialization::MacroID BaseMacroID = 0; 348 349 /// Remapping table for macro IDs in this module. 350 ContinuousRangeMap<uint32_t, int, 2> MacroRemap; 351 352 /// The offset of the start of the set of defined macros. 353 uint64_t MacroStartOffset = 0; 354 355 // === Detailed PreprocessingRecord === 356 357 /// The cursor to the start of the (optional) detailed preprocessing 358 /// record block. 359 llvm::BitstreamCursor PreprocessorDetailCursor; 360 361 /// The offset of the start of the preprocessor detail cursor. 362 uint64_t PreprocessorDetailStartOffset = 0; 363 364 /// Base preprocessed entity ID for preprocessed entities local to 365 /// this module. 366 serialization::PreprocessedEntityID BasePreprocessedEntityID = 0; 367 368 /// Remapping table for preprocessed entity IDs in this module. 369 ContinuousRangeMap<uint32_t, int, 2> PreprocessedEntityRemap; 370 371 const PPEntityOffset *PreprocessedEntityOffsets = nullptr; 372 unsigned NumPreprocessedEntities = 0; 373 374 /// Base ID for preprocessed skipped ranges local to this module. 375 unsigned BasePreprocessedSkippedRangeID = 0; 376 377 const PPSkippedRange *PreprocessedSkippedRangeOffsets = nullptr; 378 unsigned NumPreprocessedSkippedRanges = 0; 379 380 // === Header search information === 381 382 /// The number of local HeaderFileInfo structures. 383 unsigned LocalNumHeaderFileInfos = 0; 384 385 /// Actual data for the on-disk hash table of header file 386 /// information. 387 /// 388 /// This pointer points into a memory buffer, where the on-disk hash 389 /// table for header file information actually lives. 390 const char *HeaderFileInfoTableData = nullptr; 391 392 /// The on-disk hash table that contains information about each of 393 /// the header files. 394 void *HeaderFileInfoTable = nullptr; 395 396 // === Submodule information === 397 398 /// The number of submodules in this module. 399 unsigned LocalNumSubmodules = 0; 400 401 /// Base submodule ID for submodules local to this module. 402 serialization::SubmoduleID BaseSubmoduleID = 0; 403 404 /// Remapping table for submodule IDs in this module. 405 ContinuousRangeMap<uint32_t, int, 2> SubmoduleRemap; 406 407 // === Selectors === 408 409 /// The number of selectors new to this file. 410 /// 411 /// This is the number of entries in SelectorOffsets. 412 unsigned LocalNumSelectors = 0; 413 414 /// Offsets into the selector lookup table's data array 415 /// where each selector resides. 416 const uint32_t *SelectorOffsets = nullptr; 417 418 /// Base selector ID for selectors local to this module. 419 serialization::SelectorID BaseSelectorID = 0; 420 421 /// Remapping table for selector IDs in this module. 422 ContinuousRangeMap<uint32_t, int, 2> SelectorRemap; 423 424 /// A pointer to the character data that comprises the selector table 425 /// 426 /// The SelectorOffsets table refers into this memory. 427 const unsigned char *SelectorLookupTableData = nullptr; 428 429 /// A pointer to an on-disk hash table of opaque type 430 /// ASTSelectorLookupTable. 431 /// 432 /// This hash table provides the IDs of all selectors, and the associated 433 /// instance and factory methods. 434 void *SelectorLookupTable = nullptr; 435 436 // === Declarations === 437 438 /// DeclsCursor - This is a cursor to the start of the DECLTYPES_BLOCK block. 439 /// It has read all the abbreviations at the start of the block and is ready 440 /// to jump around with these in context. 441 llvm::BitstreamCursor DeclsCursor; 442 443 /// The offset to the start of the DECLTYPES_BLOCK block. 444 uint64_t DeclsBlockStartOffset = 0; 445 446 /// The number of declarations in this AST file. 447 unsigned LocalNumDecls = 0; 448 449 /// Offset of each declaration within the bitstream, indexed 450 /// by the declaration ID (-1). 451 const DeclOffset *DeclOffsets = nullptr; 452 453 /// Base declaration ID for declarations local to this module. 454 serialization::DeclID BaseDeclID = 0; 455 456 /// Remapping table for declaration IDs in this module. 457 ContinuousRangeMap<uint32_t, int, 2> DeclRemap; 458 459 /// Mapping from the module files that this module file depends on 460 /// to the base declaration ID for that module as it is understood within this 461 /// module. 462 /// 463 /// This is effectively a reverse global-to-local mapping for declaration 464 /// IDs, so that we can interpret a true global ID (for this translation unit) 465 /// as a local ID (for this module file). 466 llvm::DenseMap<ModuleFile *, serialization::DeclID> GlobalToLocalDeclIDs; 467 468 /// Array of file-level DeclIDs sorted by file. 469 const serialization::DeclID *FileSortedDecls = nullptr; 470 unsigned NumFileSortedDecls = 0; 471 472 /// Array of category list location information within this 473 /// module file, sorted by the definition ID. 474 const serialization::ObjCCategoriesInfo *ObjCCategoriesMap = nullptr; 475 476 /// The number of redeclaration info entries in ObjCCategoriesMap. 477 unsigned LocalNumObjCCategoriesInMap = 0; 478 479 /// The Objective-C category lists for categories known to this 480 /// module. 481 SmallVector<uint64_t, 1> ObjCCategories; 482 483 // === Types === 484 485 /// The number of types in this AST file. 486 unsigned LocalNumTypes = 0; 487 488 /// Offset of each type within the bitstream, indexed by the 489 /// type ID, or the representation of a Type*. 490 const UnderalignedInt64 *TypeOffsets = nullptr; 491 492 /// Base type ID for types local to this module as represented in 493 /// the global type ID space. 494 serialization::TypeID BaseTypeIndex = 0; 495 496 /// Remapping table for type IDs in this module. 497 ContinuousRangeMap<uint32_t, int, 2> TypeRemap; 498 499 // === Miscellaneous === 500 501 /// Diagnostic IDs and their mappings that the user changed. 502 SmallVector<uint64_t, 8> PragmaDiagMappings; 503 504 /// List of modules which depend on this module 505 llvm::SetVector<ModuleFile *> ImportedBy; 506 507 /// List of modules which this module depends on 508 llvm::SetVector<ModuleFile *> Imports; 509 510 /// Determine whether this module was directly imported at 511 /// any point during translation. isDirectlyImported()512 bool isDirectlyImported() const { return DirectlyImported; } 513 514 /// Is this a module file for a module (rather than a PCH or similar). isModule()515 bool isModule() const { 516 return Kind == MK_ImplicitModule || Kind == MK_ExplicitModule || 517 Kind == MK_PrebuiltModule; 518 } 519 520 /// Dump debugging output for this module. 521 void dump(); 522 }; 523 524 } // namespace serialization 525 526 } // namespace clang 527 528 #endif // LLVM_CLANG_SERIALIZATION_MODULEFILE_H 529