1 //===- ModuleFile.h - Module file description -------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines the Module class, which describes a module that has
10 //  been loaded from an AST file.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_CLANG_SERIALIZATION_MODULEFILE_H
15 #define LLVM_CLANG_SERIALIZATION_MODULEFILE_H
16 
17 #include "clang/Basic/FileManager.h"
18 #include "clang/Basic/Module.h"
19 #include "clang/Basic/SourceLocation.h"
20 #include "clang/Serialization/ASTBitCodes.h"
21 #include "clang/Serialization/ContinuousRangeMap.h"
22 #include "clang/Serialization/ModuleFileExtension.h"
23 #include "llvm/ADT/BitVector.h"
24 #include "llvm/ADT/DenseMap.h"
25 #include "llvm/ADT/PointerIntPair.h"
26 #include "llvm/ADT/SetVector.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/StringRef.h"
29 #include "llvm/Bitstream/BitstreamReader.h"
30 #include "llvm/Support/Endian.h"
31 #include <cassert>
32 #include <cstdint>
33 #include <memory>
34 #include <string>
35 #include <vector>
36 
37 namespace clang {
38 
39 namespace serialization {
40 
41 /// Specifies the kind of module that has been loaded.
42 enum ModuleKind {
43   /// File is an implicitly-loaded module.
44   MK_ImplicitModule,
45 
46   /// File is an explicitly-loaded module.
47   MK_ExplicitModule,
48 
49   /// File is a PCH file treated as such.
50   MK_PCH,
51 
52   /// File is a PCH file treated as the preamble.
53   MK_Preamble,
54 
55   /// File is a PCH file treated as the actual main file.
56   MK_MainFile,
57 
58   /// File is from a prebuilt module path.
59   MK_PrebuiltModule
60 };
61 
62 /// The input file info that has been loaded from an AST file.
63 struct InputFileInfo {
64   std::string Filename;
65   uint64_t ContentHash;
66   off_t StoredSize;
67   time_t StoredTime;
68   bool Overridden;
69   bool Transient;
70   bool TopLevelModuleMap;
71 };
72 
73 /// The input file that has been loaded from this AST file, along with
74 /// bools indicating whether this was an overridden buffer or if it was
75 /// out-of-date or not-found.
76 class InputFile {
77   enum {
78     Overridden = 1,
79     OutOfDate = 2,
80     NotFound = 3
81   };
82   llvm::PointerIntPair<const FileEntryRef::MapEntry *, 2, unsigned> Val;
83 
84 public:
85   InputFile() = default;
86 
87   InputFile(FileEntryRef File, bool isOverridden = false,
88             bool isOutOfDate = false) {
89     assert(!(isOverridden && isOutOfDate) &&
90            "an overridden cannot be out-of-date");
91     unsigned intVal = 0;
92     if (isOverridden)
93       intVal = Overridden;
94     else if (isOutOfDate)
95       intVal = OutOfDate;
96     Val.setPointerAndInt(&File.getMapEntry(), intVal);
97   }
98 
99   static InputFile getNotFound() {
100     InputFile File;
101     File.Val.setInt(NotFound);
102     return File;
103   }
104 
105   OptionalFileEntryRefDegradesToFileEntryPtr getFile() const {
106     if (auto *P = Val.getPointer())
107       return FileEntryRef(*P);
108     return std::nullopt;
109   }
110   bool isOverridden() const { return Val.getInt() == Overridden; }
111   bool isOutOfDate() const { return Val.getInt() == OutOfDate; }
112   bool isNotFound() const { return Val.getInt() == NotFound; }
113 };
114 
115 /// Information about a module that has been loaded by the ASTReader.
116 ///
117 /// Each instance of the Module class corresponds to a single AST file, which
118 /// may be a precompiled header, precompiled preamble, a module, or an AST file
119 /// of some sort loaded as the main file, all of which are specific formulations
120 /// of the general notion of a "module". A module may depend on any number of
121 /// other modules.
122 class ModuleFile {
123 public:
124   ModuleFile(ModuleKind Kind, unsigned Generation)
125       : Kind(Kind), Generation(Generation) {}
126   ~ModuleFile();
127 
128   // === General information ===
129 
130   /// The index of this module in the list of modules.
131   unsigned Index = 0;
132 
133   /// The type of this module.
134   ModuleKind Kind;
135 
136   /// The file name of the module file.
137   std::string FileName;
138 
139   /// The name of the module.
140   std::string ModuleName;
141 
142   /// The base directory of the module.
143   std::string BaseDirectory;
144 
145   std::string getTimestampFilename() const {
146     return FileName + ".timestamp";
147   }
148 
149   /// The original source file name that was used to build the
150   /// primary AST file, which may have been modified for
151   /// relocatable-pch support.
152   std::string OriginalSourceFileName;
153 
154   /// The actual original source file name that was used to
155   /// build this AST file.
156   std::string ActualOriginalSourceFileName;
157 
158   /// The file ID for the original source file that was used to
159   /// build this AST file.
160   FileID OriginalSourceFileID;
161 
162   std::string ModuleMapPath;
163 
164   /// Whether this precompiled header is a relocatable PCH file.
165   bool RelocatablePCH = false;
166 
167   /// Whether this module file is a standard C++ module.
168   bool StandardCXXModule = false;
169 
170   /// Whether timestamps are included in this module file.
171   bool HasTimestamps = false;
172 
173   /// Whether the top-level module has been read from the AST file.
174   bool DidReadTopLevelSubmodule = false;
175 
176   /// The file entry for the module file.
177   OptionalFileEntryRefDegradesToFileEntryPtr File;
178 
179   /// The signature of the module file, which may be used instead of the size
180   /// and modification time to identify this particular file.
181   ASTFileSignature Signature;
182 
183   /// The signature of the AST block of the module file, this can be used to
184   /// unique module files based on AST contents.
185   ASTFileSignature ASTBlockHash;
186 
187   /// The bit vector denoting usage of each header search entry (true = used).
188   llvm::BitVector SearchPathUsage;
189 
190   /// Whether this module has been directly imported by the
191   /// user.
192   bool DirectlyImported = false;
193 
194   /// The generation of which this module file is a part.
195   unsigned Generation;
196 
197   /// The memory buffer that stores the data associated with
198   /// this AST file, owned by the InMemoryModuleCache.
199   llvm::MemoryBuffer *Buffer = nullptr;
200 
201   /// The size of this file, in bits.
202   uint64_t SizeInBits = 0;
203 
204   /// The global bit offset (or base) of this module
205   uint64_t GlobalBitOffset = 0;
206 
207   /// The bit offset of the AST block of this module.
208   uint64_t ASTBlockStartOffset = 0;
209 
210   /// The serialized bitstream data for this file.
211   StringRef Data;
212 
213   /// The main bitstream cursor for the main block.
214   llvm::BitstreamCursor Stream;
215 
216   /// The source location where the module was explicitly or implicitly
217   /// imported in the local translation unit.
218   ///
219   /// If module A depends on and imports module B, both modules will have the
220   /// same DirectImportLoc, but different ImportLoc (B's ImportLoc will be a
221   /// source location inside module A).
222   ///
223   /// WARNING: This is largely useless. It doesn't tell you when a module was
224   /// made visible, just when the first submodule of that module was imported.
225   SourceLocation DirectImportLoc;
226 
227   /// The source location where this module was first imported.
228   SourceLocation ImportLoc;
229 
230   /// The first source location in this module.
231   SourceLocation FirstLoc;
232 
233   /// The list of extension readers that are attached to this module
234   /// file.
235   std::vector<std::unique_ptr<ModuleFileExtensionReader>> ExtensionReaders;
236 
237   /// The module offset map data for this file. If non-empty, the various
238   /// ContinuousRangeMaps described below have not yet been populated.
239   StringRef ModuleOffsetMap;
240 
241   // === Input Files ===
242 
243   /// The cursor to the start of the input-files block.
244   llvm::BitstreamCursor InputFilesCursor;
245 
246   /// Offsets for all of the input file entries in the AST file.
247   const llvm::support::unaligned_uint64_t *InputFileOffsets = nullptr;
248 
249   /// The input files that have been loaded from this AST file.
250   std::vector<InputFile> InputFilesLoaded;
251 
252   /// The input file infos that have been loaded from this AST file.
253   std::vector<InputFileInfo> InputFileInfosLoaded;
254 
255   // All user input files reside at the index range [0, NumUserInputFiles), and
256   // system input files reside at [NumUserInputFiles, InputFilesLoaded.size()).
257   unsigned NumUserInputFiles = 0;
258 
259   /// If non-zero, specifies the time when we last validated input
260   /// files.  Zero means we never validated them.
261   ///
262   /// The time is specified in seconds since the start of the Epoch.
263   uint64_t InputFilesValidationTimestamp = 0;
264 
265   // === Source Locations ===
266 
267   /// Cursor used to read source location entries.
268   llvm::BitstreamCursor SLocEntryCursor;
269 
270   /// The bit offset to the start of the SOURCE_MANAGER_BLOCK.
271   uint64_t SourceManagerBlockStartOffset = 0;
272 
273   /// The number of source location entries in this AST file.
274   unsigned LocalNumSLocEntries = 0;
275 
276   /// The base ID in the source manager's view of this module.
277   int SLocEntryBaseID = 0;
278 
279   /// The base offset in the source manager's view of this module.
280   SourceLocation::UIntTy SLocEntryBaseOffset = 0;
281 
282   /// Base file offset for the offsets in SLocEntryOffsets. Real file offset
283   /// for the entry is SLocEntryOffsetsBase + SLocEntryOffsets[i].
284   uint64_t SLocEntryOffsetsBase = 0;
285 
286   /// Offsets for all of the source location entries in the
287   /// AST file.
288   const uint32_t *SLocEntryOffsets = nullptr;
289 
290   /// SLocEntries that we're going to preload.
291   SmallVector<uint64_t, 4> PreloadSLocEntries;
292 
293   /// Remapping table for source locations in this module.
294   ContinuousRangeMap<SourceLocation::UIntTy, SourceLocation::IntTy, 2>
295       SLocRemap;
296 
297   // === Identifiers ===
298 
299   /// The number of identifiers in this AST file.
300   unsigned LocalNumIdentifiers = 0;
301 
302   /// Offsets into the identifier table data.
303   ///
304   /// This array is indexed by the identifier ID (-1), and provides
305   /// the offset into IdentifierTableData where the string data is
306   /// stored.
307   const uint32_t *IdentifierOffsets = nullptr;
308 
309   /// Base identifier ID for identifiers local to this module.
310   serialization::IdentID BaseIdentifierID = 0;
311 
312   /// Remapping table for identifier IDs in this module.
313   ContinuousRangeMap<uint32_t, int, 2> IdentifierRemap;
314 
315   /// Actual data for the on-disk hash table of identifiers.
316   ///
317   /// This pointer points into a memory buffer, where the on-disk hash
318   /// table for identifiers actually lives.
319   const unsigned char *IdentifierTableData = nullptr;
320 
321   /// A pointer to an on-disk hash table of opaque type
322   /// IdentifierHashTable.
323   void *IdentifierLookupTable = nullptr;
324 
325   /// Offsets of identifiers that we're going to preload within
326   /// IdentifierTableData.
327   std::vector<unsigned> PreloadIdentifierOffsets;
328 
329   // === Macros ===
330 
331   /// The cursor to the start of the preprocessor block, which stores
332   /// all of the macro definitions.
333   llvm::BitstreamCursor MacroCursor;
334 
335   /// The number of macros in this AST file.
336   unsigned LocalNumMacros = 0;
337 
338   /// Base file offset for the offsets in MacroOffsets. Real file offset for
339   /// the entry is MacroOffsetsBase + MacroOffsets[i].
340   uint64_t MacroOffsetsBase = 0;
341 
342   /// Offsets of macros in the preprocessor block.
343   ///
344   /// This array is indexed by the macro ID (-1), and provides
345   /// the offset into the preprocessor block where macro definitions are
346   /// stored.
347   const uint32_t *MacroOffsets = nullptr;
348 
349   /// Base macro ID for macros local to this module.
350   serialization::MacroID BaseMacroID = 0;
351 
352   /// Remapping table for macro IDs in this module.
353   ContinuousRangeMap<uint32_t, int, 2> MacroRemap;
354 
355   /// The offset of the start of the set of defined macros.
356   uint64_t MacroStartOffset = 0;
357 
358   // === Detailed PreprocessingRecord ===
359 
360   /// The cursor to the start of the (optional) detailed preprocessing
361   /// record block.
362   llvm::BitstreamCursor PreprocessorDetailCursor;
363 
364   /// The offset of the start of the preprocessor detail cursor.
365   uint64_t PreprocessorDetailStartOffset = 0;
366 
367   /// Base preprocessed entity ID for preprocessed entities local to
368   /// this module.
369   serialization::PreprocessedEntityID BasePreprocessedEntityID = 0;
370 
371   /// Remapping table for preprocessed entity IDs in this module.
372   ContinuousRangeMap<uint32_t, int, 2> PreprocessedEntityRemap;
373 
374   const PPEntityOffset *PreprocessedEntityOffsets = nullptr;
375   unsigned NumPreprocessedEntities = 0;
376 
377   /// Base ID for preprocessed skipped ranges local to this module.
378   unsigned BasePreprocessedSkippedRangeID = 0;
379 
380   const PPSkippedRange *PreprocessedSkippedRangeOffsets = nullptr;
381   unsigned NumPreprocessedSkippedRanges = 0;
382 
383   // === Header search information ===
384 
385   /// The number of local HeaderFileInfo structures.
386   unsigned LocalNumHeaderFileInfos = 0;
387 
388   /// Actual data for the on-disk hash table of header file
389   /// information.
390   ///
391   /// This pointer points into a memory buffer, where the on-disk hash
392   /// table for header file information actually lives.
393   const char *HeaderFileInfoTableData = nullptr;
394 
395   /// The on-disk hash table that contains information about each of
396   /// the header files.
397   void *HeaderFileInfoTable = nullptr;
398 
399   // === Submodule information ===
400 
401   /// The number of submodules in this module.
402   unsigned LocalNumSubmodules = 0;
403 
404   /// Base submodule ID for submodules local to this module.
405   serialization::SubmoduleID BaseSubmoduleID = 0;
406 
407   /// Remapping table for submodule IDs in this module.
408   ContinuousRangeMap<uint32_t, int, 2> SubmoduleRemap;
409 
410   // === Selectors ===
411 
412   /// The number of selectors new to this file.
413   ///
414   /// This is the number of entries in SelectorOffsets.
415   unsigned LocalNumSelectors = 0;
416 
417   /// Offsets into the selector lookup table's data array
418   /// where each selector resides.
419   const uint32_t *SelectorOffsets = nullptr;
420 
421   /// Base selector ID for selectors local to this module.
422   serialization::SelectorID BaseSelectorID = 0;
423 
424   /// Remapping table for selector IDs in this module.
425   ContinuousRangeMap<uint32_t, int, 2> SelectorRemap;
426 
427   /// A pointer to the character data that comprises the selector table
428   ///
429   /// The SelectorOffsets table refers into this memory.
430   const unsigned char *SelectorLookupTableData = nullptr;
431 
432   /// A pointer to an on-disk hash table of opaque type
433   /// ASTSelectorLookupTable.
434   ///
435   /// This hash table provides the IDs of all selectors, and the associated
436   /// instance and factory methods.
437   void *SelectorLookupTable = nullptr;
438 
439   // === Declarations ===
440 
441   /// DeclsCursor - This is a cursor to the start of the DECLTYPES_BLOCK block.
442   /// It has read all the abbreviations at the start of the block and is ready
443   /// to jump around with these in context.
444   llvm::BitstreamCursor DeclsCursor;
445 
446   /// The offset to the start of the DECLTYPES_BLOCK block.
447   uint64_t DeclsBlockStartOffset = 0;
448 
449   /// The number of declarations in this AST file.
450   unsigned LocalNumDecls = 0;
451 
452   /// Offset of each declaration within the bitstream, indexed
453   /// by the declaration ID (-1).
454   const DeclOffset *DeclOffsets = nullptr;
455 
456   /// Base declaration ID for declarations local to this module.
457   serialization::DeclID BaseDeclID = 0;
458 
459   /// Remapping table for declaration IDs in this module.
460   ContinuousRangeMap<uint32_t, int, 2> DeclRemap;
461 
462   /// Mapping from the module files that this module file depends on
463   /// to the base declaration ID for that module as it is understood within this
464   /// module.
465   ///
466   /// This is effectively a reverse global-to-local mapping for declaration
467   /// IDs, so that we can interpret a true global ID (for this translation unit)
468   /// as a local ID (for this module file).
469   llvm::DenseMap<ModuleFile *, serialization::DeclID> GlobalToLocalDeclIDs;
470 
471   /// Array of file-level DeclIDs sorted by file.
472   const serialization::DeclID *FileSortedDecls = nullptr;
473   unsigned NumFileSortedDecls = 0;
474 
475   /// Array of category list location information within this
476   /// module file, sorted by the definition ID.
477   const serialization::ObjCCategoriesInfo *ObjCCategoriesMap = nullptr;
478 
479   /// The number of redeclaration info entries in ObjCCategoriesMap.
480   unsigned LocalNumObjCCategoriesInMap = 0;
481 
482   /// The Objective-C category lists for categories known to this
483   /// module.
484   SmallVector<uint64_t, 1> ObjCCategories;
485 
486   // === Types ===
487 
488   /// The number of types in this AST file.
489   unsigned LocalNumTypes = 0;
490 
491   /// Offset of each type within the bitstream, indexed by the
492   /// type ID, or the representation of a Type*.
493   const UnderalignedInt64 *TypeOffsets = nullptr;
494 
495   /// Base type ID for types local to this module as represented in
496   /// the global type ID space.
497   serialization::TypeID BaseTypeIndex = 0;
498 
499   /// Remapping table for type IDs in this module.
500   ContinuousRangeMap<uint32_t, int, 2> TypeRemap;
501 
502   // === Miscellaneous ===
503 
504   /// Diagnostic IDs and their mappings that the user changed.
505   SmallVector<uint64_t, 8> PragmaDiagMappings;
506 
507   /// List of modules which depend on this module
508   llvm::SetVector<ModuleFile *> ImportedBy;
509 
510   /// List of modules which this module depends on
511   llvm::SetVector<ModuleFile *> Imports;
512 
513   /// Determine whether this module was directly imported at
514   /// any point during translation.
515   bool isDirectlyImported() const { return DirectlyImported; }
516 
517   /// Is this a module file for a module (rather than a PCH or similar).
518   bool isModule() const {
519     return Kind == MK_ImplicitModule || Kind == MK_ExplicitModule ||
520            Kind == MK_PrebuiltModule;
521   }
522 
523   /// Dump debugging output for this module.
524   void dump();
525 };
526 
527 } // namespace serialization
528 
529 } // namespace clang
530 
531 #endif // LLVM_CLANG_SERIALIZATION_MODULEFILE_H
532