1 //===- ModuleFile.h - Module file description -------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines the Module class, which describes a module that has
10 //  been loaded from an AST file.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_CLANG_SERIALIZATION_MODULEFILE_H
15 #define LLVM_CLANG_SERIALIZATION_MODULEFILE_H
16 
17 #include "clang/Basic/FileManager.h"
18 #include "clang/Basic/Module.h"
19 #include "clang/Basic/SourceLocation.h"
20 #include "clang/Serialization/ASTBitCodes.h"
21 #include "clang/Serialization/ContinuousRangeMap.h"
22 #include "clang/Serialization/ModuleFileExtension.h"
23 #include "llvm/ADT/BitVector.h"
24 #include "llvm/ADT/DenseMap.h"
25 #include "llvm/ADT/PointerIntPair.h"
26 #include "llvm/ADT/SetVector.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/StringRef.h"
29 #include "llvm/Bitstream/BitstreamReader.h"
30 #include "llvm/Support/Endian.h"
31 #include <cassert>
32 #include <cstdint>
33 #include <memory>
34 #include <string>
35 #include <vector>
36 
37 namespace clang {
38 
39 namespace serialization {
40 
41 /// Specifies the kind of module that has been loaded.
42 enum ModuleKind {
43   /// File is an implicitly-loaded module.
44   MK_ImplicitModule,
45 
46   /// File is an explicitly-loaded module.
47   MK_ExplicitModule,
48 
49   /// File is a PCH file treated as such.
50   MK_PCH,
51 
52   /// File is a PCH file treated as the preamble.
53   MK_Preamble,
54 
55   /// File is a PCH file treated as the actual main file.
56   MK_MainFile,
57 
58   /// File is from a prebuilt module path.
59   MK_PrebuiltModule
60 };
61 
62 /// The input file that has been loaded from this AST file, along with
63 /// bools indicating whether this was an overridden buffer or if it was
64 /// out-of-date or not-found.
65 class InputFile {
66   enum {
67     Overridden = 1,
68     OutOfDate = 2,
69     NotFound = 3
70   };
71   llvm::PointerIntPair<const FileEntryRef::MapEntry *, 2, unsigned> Val;
72 
73 public:
74   InputFile() = default;
75 
76   InputFile(FileEntryRef File, bool isOverridden = false,
77             bool isOutOfDate = false) {
78     assert(!(isOverridden && isOutOfDate) &&
79            "an overridden cannot be out-of-date");
80     unsigned intVal = 0;
81     if (isOverridden)
82       intVal = Overridden;
83     else if (isOutOfDate)
84       intVal = OutOfDate;
85     Val.setPointerAndInt(&File.getMapEntry(), intVal);
86   }
87 
88   static InputFile getNotFound() {
89     InputFile File;
90     File.Val.setInt(NotFound);
91     return File;
92   }
93 
94   OptionalFileEntryRefDegradesToFileEntryPtr getFile() const {
95     if (auto *P = Val.getPointer())
96       return FileEntryRef(*P);
97     return None;
98   }
99   bool isOverridden() const { return Val.getInt() == Overridden; }
100   bool isOutOfDate() const { return Val.getInt() == OutOfDate; }
101   bool isNotFound() const { return Val.getInt() == NotFound; }
102 };
103 
104 /// Information about a module that has been loaded by the ASTReader.
105 ///
106 /// Each instance of the Module class corresponds to a single AST file, which
107 /// may be a precompiled header, precompiled preamble, a module, or an AST file
108 /// of some sort loaded as the main file, all of which are specific formulations
109 /// of the general notion of a "module". A module may depend on any number of
110 /// other modules.
111 class ModuleFile {
112 public:
113   ModuleFile(ModuleKind Kind, unsigned Generation)
114       : Kind(Kind), Generation(Generation) {}
115   ~ModuleFile();
116 
117   // === General information ===
118 
119   /// The index of this module in the list of modules.
120   unsigned Index = 0;
121 
122   /// The type of this module.
123   ModuleKind Kind;
124 
125   /// The file name of the module file.
126   std::string FileName;
127 
128   /// The name of the module.
129   std::string ModuleName;
130 
131   /// The base directory of the module.
132   std::string BaseDirectory;
133 
134   std::string getTimestampFilename() const {
135     return FileName + ".timestamp";
136   }
137 
138   /// The original source file name that was used to build the
139   /// primary AST file, which may have been modified for
140   /// relocatable-pch support.
141   std::string OriginalSourceFileName;
142 
143   /// The actual original source file name that was used to
144   /// build this AST file.
145   std::string ActualOriginalSourceFileName;
146 
147   /// The file ID for the original source file that was used to
148   /// build this AST file.
149   FileID OriginalSourceFileID;
150 
151   /// The directory that the PCH was originally created in. Used to
152   /// allow resolving headers even after headers+PCH was moved to a new path.
153   std::string OriginalDir;
154 
155   std::string ModuleMapPath;
156 
157   /// Whether this precompiled header is a relocatable PCH file.
158   bool RelocatablePCH = false;
159 
160   /// Whether timestamps are included in this module file.
161   bool HasTimestamps = false;
162 
163   /// Whether the top-level module has been read from the AST file.
164   bool DidReadTopLevelSubmodule = false;
165 
166   /// The file entry for the module file.
167   OptionalFileEntryRefDegradesToFileEntryPtr File;
168 
169   /// The signature of the module file, which may be used instead of the size
170   /// and modification time to identify this particular file.
171   ASTFileSignature Signature;
172 
173   /// The signature of the AST block of the module file, this can be used to
174   /// unique module files based on AST contents.
175   ASTFileSignature ASTBlockHash;
176 
177   /// The bit vector denoting usage of each header search entry (true = used).
178   llvm::BitVector SearchPathUsage;
179 
180   /// Whether this module has been directly imported by the
181   /// user.
182   bool DirectlyImported = false;
183 
184   /// The generation of which this module file is a part.
185   unsigned Generation;
186 
187   /// The memory buffer that stores the data associated with
188   /// this AST file, owned by the InMemoryModuleCache.
189   llvm::MemoryBuffer *Buffer;
190 
191   /// The size of this file, in bits.
192   uint64_t SizeInBits = 0;
193 
194   /// The global bit offset (or base) of this module
195   uint64_t GlobalBitOffset = 0;
196 
197   /// The bit offset of the AST block of this module.
198   uint64_t ASTBlockStartOffset = 0;
199 
200   /// The serialized bitstream data for this file.
201   StringRef Data;
202 
203   /// The main bitstream cursor for the main block.
204   llvm::BitstreamCursor Stream;
205 
206   /// The source location where the module was explicitly or implicitly
207   /// imported in the local translation unit.
208   ///
209   /// If module A depends on and imports module B, both modules will have the
210   /// same DirectImportLoc, but different ImportLoc (B's ImportLoc will be a
211   /// source location inside module A).
212   ///
213   /// WARNING: This is largely useless. It doesn't tell you when a module was
214   /// made visible, just when the first submodule of that module was imported.
215   SourceLocation DirectImportLoc;
216 
217   /// The source location where this module was first imported.
218   SourceLocation ImportLoc;
219 
220   /// The first source location in this module.
221   SourceLocation FirstLoc;
222 
223   /// The list of extension readers that are attached to this module
224   /// file.
225   std::vector<std::unique_ptr<ModuleFileExtensionReader>> ExtensionReaders;
226 
227   /// The module offset map data for this file. If non-empty, the various
228   /// ContinuousRangeMaps described below have not yet been populated.
229   StringRef ModuleOffsetMap;
230 
231   // === Input Files ===
232 
233   /// The cursor to the start of the input-files block.
234   llvm::BitstreamCursor InputFilesCursor;
235 
236   /// Offsets for all of the input file entries in the AST file.
237   const llvm::support::unaligned_uint64_t *InputFileOffsets = nullptr;
238 
239   /// The input files that have been loaded from this AST file.
240   std::vector<InputFile> InputFilesLoaded;
241 
242   // All user input files reside at the index range [0, NumUserInputFiles), and
243   // system input files reside at [NumUserInputFiles, InputFilesLoaded.size()).
244   unsigned NumUserInputFiles = 0;
245 
246   /// If non-zero, specifies the time when we last validated input
247   /// files.  Zero means we never validated them.
248   ///
249   /// The time is specified in seconds since the start of the Epoch.
250   uint64_t InputFilesValidationTimestamp = 0;
251 
252   // === Source Locations ===
253 
254   /// Cursor used to read source location entries.
255   llvm::BitstreamCursor SLocEntryCursor;
256 
257   /// The bit offset to the start of the SOURCE_MANAGER_BLOCK.
258   uint64_t SourceManagerBlockStartOffset = 0;
259 
260   /// The number of source location entries in this AST file.
261   unsigned LocalNumSLocEntries = 0;
262 
263   /// The base ID in the source manager's view of this module.
264   int SLocEntryBaseID = 0;
265 
266   /// The base offset in the source manager's view of this module.
267   SourceLocation::UIntTy SLocEntryBaseOffset = 0;
268 
269   /// Base file offset for the offsets in SLocEntryOffsets. Real file offset
270   /// for the entry is SLocEntryOffsetsBase + SLocEntryOffsets[i].
271   uint64_t SLocEntryOffsetsBase = 0;
272 
273   /// Offsets for all of the source location entries in the
274   /// AST file.
275   const uint32_t *SLocEntryOffsets = nullptr;
276 
277   /// SLocEntries that we're going to preload.
278   SmallVector<uint64_t, 4> PreloadSLocEntries;
279 
280   /// Remapping table for source locations in this module.
281   ContinuousRangeMap<SourceLocation::UIntTy, SourceLocation::IntTy, 2>
282       SLocRemap;
283 
284   // === Identifiers ===
285 
286   /// The number of identifiers in this AST file.
287   unsigned LocalNumIdentifiers = 0;
288 
289   /// Offsets into the identifier table data.
290   ///
291   /// This array is indexed by the identifier ID (-1), and provides
292   /// the offset into IdentifierTableData where the string data is
293   /// stored.
294   const uint32_t *IdentifierOffsets = nullptr;
295 
296   /// Base identifier ID for identifiers local to this module.
297   serialization::IdentID BaseIdentifierID = 0;
298 
299   /// Remapping table for identifier IDs in this module.
300   ContinuousRangeMap<uint32_t, int, 2> IdentifierRemap;
301 
302   /// Actual data for the on-disk hash table of identifiers.
303   ///
304   /// This pointer points into a memory buffer, where the on-disk hash
305   /// table for identifiers actually lives.
306   const unsigned char *IdentifierTableData = nullptr;
307 
308   /// A pointer to an on-disk hash table of opaque type
309   /// IdentifierHashTable.
310   void *IdentifierLookupTable = nullptr;
311 
312   /// Offsets of identifiers that we're going to preload within
313   /// IdentifierTableData.
314   std::vector<unsigned> PreloadIdentifierOffsets;
315 
316   // === Macros ===
317 
318   /// The cursor to the start of the preprocessor block, which stores
319   /// all of the macro definitions.
320   llvm::BitstreamCursor MacroCursor;
321 
322   /// The number of macros in this AST file.
323   unsigned LocalNumMacros = 0;
324 
325   /// Base file offset for the offsets in MacroOffsets. Real file offset for
326   /// the entry is MacroOffsetsBase + MacroOffsets[i].
327   uint64_t MacroOffsetsBase = 0;
328 
329   /// Offsets of macros in the preprocessor block.
330   ///
331   /// This array is indexed by the macro ID (-1), and provides
332   /// the offset into the preprocessor block where macro definitions are
333   /// stored.
334   const uint32_t *MacroOffsets = nullptr;
335 
336   /// Base macro ID for macros local to this module.
337   serialization::MacroID BaseMacroID = 0;
338 
339   /// Remapping table for macro IDs in this module.
340   ContinuousRangeMap<uint32_t, int, 2> MacroRemap;
341 
342   /// The offset of the start of the set of defined macros.
343   uint64_t MacroStartOffset = 0;
344 
345   // === Detailed PreprocessingRecord ===
346 
347   /// The cursor to the start of the (optional) detailed preprocessing
348   /// record block.
349   llvm::BitstreamCursor PreprocessorDetailCursor;
350 
351   /// The offset of the start of the preprocessor detail cursor.
352   uint64_t PreprocessorDetailStartOffset = 0;
353 
354   /// Base preprocessed entity ID for preprocessed entities local to
355   /// this module.
356   serialization::PreprocessedEntityID BasePreprocessedEntityID = 0;
357 
358   /// Remapping table for preprocessed entity IDs in this module.
359   ContinuousRangeMap<uint32_t, int, 2> PreprocessedEntityRemap;
360 
361   const PPEntityOffset *PreprocessedEntityOffsets = nullptr;
362   unsigned NumPreprocessedEntities = 0;
363 
364   /// Base ID for preprocessed skipped ranges local to this module.
365   unsigned BasePreprocessedSkippedRangeID = 0;
366 
367   const PPSkippedRange *PreprocessedSkippedRangeOffsets = nullptr;
368   unsigned NumPreprocessedSkippedRanges = 0;
369 
370   // === Header search information ===
371 
372   /// The number of local HeaderFileInfo structures.
373   unsigned LocalNumHeaderFileInfos = 0;
374 
375   /// Actual data for the on-disk hash table of header file
376   /// information.
377   ///
378   /// This pointer points into a memory buffer, where the on-disk hash
379   /// table for header file information actually lives.
380   const char *HeaderFileInfoTableData = nullptr;
381 
382   /// The on-disk hash table that contains information about each of
383   /// the header files.
384   void *HeaderFileInfoTable = nullptr;
385 
386   // === Submodule information ===
387 
388   /// The number of submodules in this module.
389   unsigned LocalNumSubmodules = 0;
390 
391   /// Base submodule ID for submodules local to this module.
392   serialization::SubmoduleID BaseSubmoduleID = 0;
393 
394   /// Remapping table for submodule IDs in this module.
395   ContinuousRangeMap<uint32_t, int, 2> SubmoduleRemap;
396 
397   // === Selectors ===
398 
399   /// The number of selectors new to this file.
400   ///
401   /// This is the number of entries in SelectorOffsets.
402   unsigned LocalNumSelectors = 0;
403 
404   /// Offsets into the selector lookup table's data array
405   /// where each selector resides.
406   const uint32_t *SelectorOffsets = nullptr;
407 
408   /// Base selector ID for selectors local to this module.
409   serialization::SelectorID BaseSelectorID = 0;
410 
411   /// Remapping table for selector IDs in this module.
412   ContinuousRangeMap<uint32_t, int, 2> SelectorRemap;
413 
414   /// A pointer to the character data that comprises the selector table
415   ///
416   /// The SelectorOffsets table refers into this memory.
417   const unsigned char *SelectorLookupTableData = nullptr;
418 
419   /// A pointer to an on-disk hash table of opaque type
420   /// ASTSelectorLookupTable.
421   ///
422   /// This hash table provides the IDs of all selectors, and the associated
423   /// instance and factory methods.
424   void *SelectorLookupTable = nullptr;
425 
426   // === Declarations ===
427 
428   /// DeclsCursor - This is a cursor to the start of the DECLTYPES_BLOCK block.
429   /// It has read all the abbreviations at the start of the block and is ready
430   /// to jump around with these in context.
431   llvm::BitstreamCursor DeclsCursor;
432 
433   /// The offset to the start of the DECLTYPES_BLOCK block.
434   uint64_t DeclsBlockStartOffset = 0;
435 
436   /// The number of declarations in this AST file.
437   unsigned LocalNumDecls = 0;
438 
439   /// Offset of each declaration within the bitstream, indexed
440   /// by the declaration ID (-1).
441   const DeclOffset *DeclOffsets = nullptr;
442 
443   /// Base declaration ID for declarations local to this module.
444   serialization::DeclID BaseDeclID = 0;
445 
446   /// Remapping table for declaration IDs in this module.
447   ContinuousRangeMap<uint32_t, int, 2> DeclRemap;
448 
449   /// Mapping from the module files that this module file depends on
450   /// to the base declaration ID for that module as it is understood within this
451   /// module.
452   ///
453   /// This is effectively a reverse global-to-local mapping for declaration
454   /// IDs, so that we can interpret a true global ID (for this translation unit)
455   /// as a local ID (for this module file).
456   llvm::DenseMap<ModuleFile *, serialization::DeclID> GlobalToLocalDeclIDs;
457 
458   /// Array of file-level DeclIDs sorted by file.
459   const serialization::DeclID *FileSortedDecls = nullptr;
460   unsigned NumFileSortedDecls = 0;
461 
462   /// Array of category list location information within this
463   /// module file, sorted by the definition ID.
464   const serialization::ObjCCategoriesInfo *ObjCCategoriesMap = nullptr;
465 
466   /// The number of redeclaration info entries in ObjCCategoriesMap.
467   unsigned LocalNumObjCCategoriesInMap = 0;
468 
469   /// The Objective-C category lists for categories known to this
470   /// module.
471   SmallVector<uint64_t, 1> ObjCCategories;
472 
473   // === Types ===
474 
475   /// The number of types in this AST file.
476   unsigned LocalNumTypes = 0;
477 
478   /// Offset of each type within the bitstream, indexed by the
479   /// type ID, or the representation of a Type*.
480   const UnderalignedInt64 *TypeOffsets = nullptr;
481 
482   /// Base type ID for types local to this module as represented in
483   /// the global type ID space.
484   serialization::TypeID BaseTypeIndex = 0;
485 
486   /// Remapping table for type IDs in this module.
487   ContinuousRangeMap<uint32_t, int, 2> TypeRemap;
488 
489   // === Miscellaneous ===
490 
491   /// Diagnostic IDs and their mappings that the user changed.
492   SmallVector<uint64_t, 8> PragmaDiagMappings;
493 
494   /// List of modules which depend on this module
495   llvm::SetVector<ModuleFile *> ImportedBy;
496 
497   /// List of modules which this module depends on
498   llvm::SetVector<ModuleFile *> Imports;
499 
500   /// Determine whether this module was directly imported at
501   /// any point during translation.
502   bool isDirectlyImported() const { return DirectlyImported; }
503 
504   /// Is this a module file for a module (rather than a PCH or similar).
505   bool isModule() const {
506     return Kind == MK_ImplicitModule || Kind == MK_ExplicitModule ||
507            Kind == MK_PrebuiltModule;
508   }
509 
510   /// Dump debugging output for this module.
511   void dump();
512 };
513 
514 } // namespace serialization
515 
516 } // namespace clang
517 
518 #endif // LLVM_CLANG_SERIALIZATION_MODULEFILE_H
519