1 //===- Module.h - Module description ----------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines the Module class, which describes a module that has
10 //  been loaded from an AST file.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_CLANG_SERIALIZATION_MODULE_H
15 #define LLVM_CLANG_SERIALIZATION_MODULE_H
16 
17 #include "clang/Basic/Module.h"
18 #include "clang/Basic/SourceLocation.h"
19 #include "clang/Serialization/ASTBitCodes.h"
20 #include "clang/Serialization/ContinuousRangeMap.h"
21 #include "clang/Serialization/ModuleFileExtension.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/ADT/PointerIntPair.h"
24 #include "llvm/ADT/SetVector.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/Bitstream/BitstreamReader.h"
28 #include "llvm/Support/Endian.h"
29 #include <cassert>
30 #include <cstdint>
31 #include <memory>
32 #include <string>
33 #include <vector>
34 
35 namespace clang {
36 
37 class FileEntry;
38 
39 namespace serialization {
40 
41 /// Specifies the kind of module that has been loaded.
42 enum ModuleKind {
43   /// File is an implicitly-loaded module.
44   MK_ImplicitModule,
45 
46   /// File is an explicitly-loaded module.
47   MK_ExplicitModule,
48 
49   /// File is a PCH file treated as such.
50   MK_PCH,
51 
52   /// File is a PCH file treated as the preamble.
53   MK_Preamble,
54 
55   /// File is a PCH file treated as the actual main file.
56   MK_MainFile,
57 
58   /// File is from a prebuilt module path.
59   MK_PrebuiltModule
60 };
61 
62 /// The input file that has been loaded from this AST file, along with
63 /// bools indicating whether this was an overridden buffer or if it was
64 /// out-of-date or not-found.
65 class InputFile {
66   enum {
67     Overridden = 1,
68     OutOfDate = 2,
69     NotFound = 3
70   };
71   llvm::PointerIntPair<const FileEntry *, 2, unsigned> Val;
72 
73 public:
74   InputFile() = default;
75 
76   InputFile(const FileEntry *File,
77             bool isOverridden = false, bool isOutOfDate = false) {
78     assert(!(isOverridden && isOutOfDate) &&
79            "an overridden cannot be out-of-date");
80     unsigned intVal = 0;
81     if (isOverridden)
82       intVal = Overridden;
83     else if (isOutOfDate)
84       intVal = OutOfDate;
85     Val.setPointerAndInt(File, intVal);
86   }
87 
getNotFound()88   static InputFile getNotFound() {
89     InputFile File;
90     File.Val.setInt(NotFound);
91     return File;
92   }
93 
getFile()94   const FileEntry *getFile() const { return Val.getPointer(); }
isOverridden()95   bool isOverridden() const { return Val.getInt() == Overridden; }
isOutOfDate()96   bool isOutOfDate() const { return Val.getInt() == OutOfDate; }
isNotFound()97   bool isNotFound() const { return Val.getInt() == NotFound; }
98 };
99 
100 /// Information about a module that has been loaded by the ASTReader.
101 ///
102 /// Each instance of the Module class corresponds to a single AST file, which
103 /// may be a precompiled header, precompiled preamble, a module, or an AST file
104 /// of some sort loaded as the main file, all of which are specific formulations
105 /// of the general notion of a "module". A module may depend on any number of
106 /// other modules.
107 class ModuleFile {
108 public:
ModuleFile(ModuleKind Kind,unsigned Generation)109   ModuleFile(ModuleKind Kind, unsigned Generation)
110       : Kind(Kind), Generation(Generation) {}
111   ~ModuleFile();
112 
113   // === General information ===
114 
115   /// The index of this module in the list of modules.
116   unsigned Index = 0;
117 
118   /// The type of this module.
119   ModuleKind Kind;
120 
121   /// The file name of the module file.
122   std::string FileName;
123 
124   /// The name of the module.
125   std::string ModuleName;
126 
127   /// The base directory of the module.
128   std::string BaseDirectory;
129 
getTimestampFilename()130   std::string getTimestampFilename() const {
131     return FileName + ".timestamp";
132   }
133 
134   /// The original source file name that was used to build the
135   /// primary AST file, which may have been modified for
136   /// relocatable-pch support.
137   std::string OriginalSourceFileName;
138 
139   /// The actual original source file name that was used to
140   /// build this AST file.
141   std::string ActualOriginalSourceFileName;
142 
143   /// The file ID for the original source file that was used to
144   /// build this AST file.
145   FileID OriginalSourceFileID;
146 
147   /// The directory that the PCH was originally created in. Used to
148   /// allow resolving headers even after headers+PCH was moved to a new path.
149   std::string OriginalDir;
150 
151   std::string ModuleMapPath;
152 
153   /// Whether this precompiled header is a relocatable PCH file.
154   bool RelocatablePCH = false;
155 
156   /// Whether timestamps are included in this module file.
157   bool HasTimestamps = false;
158 
159   /// Whether the PCH has a corresponding object file.
160   bool PCHHasObjectFile = false;
161 
162   /// The file entry for the module file.
163   const FileEntry *File = nullptr;
164 
165   /// The signature of the module file, which may be used instead of the size
166   /// and modification time to identify this particular file.
167   ASTFileSignature Signature;
168 
169   /// Whether this module has been directly imported by the
170   /// user.
171   bool DirectlyImported = false;
172 
173   /// The generation of which this module file is a part.
174   unsigned Generation;
175 
176   /// The memory buffer that stores the data associated with
177   /// this AST file, owned by the InMemoryModuleCache.
178   llvm::MemoryBuffer *Buffer;
179 
180   /// The size of this file, in bits.
181   uint64_t SizeInBits = 0;
182 
183   /// The global bit offset (or base) of this module
184   uint64_t GlobalBitOffset = 0;
185 
186   /// The serialized bitstream data for this file.
187   StringRef Data;
188 
189   /// The main bitstream cursor for the main block.
190   llvm::BitstreamCursor Stream;
191 
192   /// The source location where the module was explicitly or implicitly
193   /// imported in the local translation unit.
194   ///
195   /// If module A depends on and imports module B, both modules will have the
196   /// same DirectImportLoc, but different ImportLoc (B's ImportLoc will be a
197   /// source location inside module A).
198   ///
199   /// WARNING: This is largely useless. It doesn't tell you when a module was
200   /// made visible, just when the first submodule of that module was imported.
201   SourceLocation DirectImportLoc;
202 
203   /// The source location where this module was first imported.
204   SourceLocation ImportLoc;
205 
206   /// The first source location in this module.
207   SourceLocation FirstLoc;
208 
209   /// The list of extension readers that are attached to this module
210   /// file.
211   std::vector<std::unique_ptr<ModuleFileExtensionReader>> ExtensionReaders;
212 
213   /// The module offset map data for this file. If non-empty, the various
214   /// ContinuousRangeMaps described below have not yet been populated.
215   StringRef ModuleOffsetMap;
216 
217   // === Input Files ===
218 
219   /// The cursor to the start of the input-files block.
220   llvm::BitstreamCursor InputFilesCursor;
221 
222   /// Offsets for all of the input file entries in the AST file.
223   const llvm::support::unaligned_uint64_t *InputFileOffsets = nullptr;
224 
225   /// The input files that have been loaded from this AST file.
226   std::vector<InputFile> InputFilesLoaded;
227 
228   // All user input files reside at the index range [0, NumUserInputFiles), and
229   // system input files reside at [NumUserInputFiles, InputFilesLoaded.size()).
230   unsigned NumUserInputFiles = 0;
231 
232   /// If non-zero, specifies the time when we last validated input
233   /// files.  Zero means we never validated them.
234   ///
235   /// The time is specified in seconds since the start of the Epoch.
236   uint64_t InputFilesValidationTimestamp = 0;
237 
238   // === Source Locations ===
239 
240   /// Cursor used to read source location entries.
241   llvm::BitstreamCursor SLocEntryCursor;
242 
243   /// The number of source location entries in this AST file.
244   unsigned LocalNumSLocEntries = 0;
245 
246   /// The base ID in the source manager's view of this module.
247   int SLocEntryBaseID = 0;
248 
249   /// The base offset in the source manager's view of this module.
250   unsigned SLocEntryBaseOffset = 0;
251 
252   /// Offsets for all of the source location entries in the
253   /// AST file.
254   const uint32_t *SLocEntryOffsets = nullptr;
255 
256   /// SLocEntries that we're going to preload.
257   SmallVector<uint64_t, 4> PreloadSLocEntries;
258 
259   /// Remapping table for source locations in this module.
260   ContinuousRangeMap<uint32_t, int, 2> SLocRemap;
261 
262   // === Identifiers ===
263 
264   /// The number of identifiers in this AST file.
265   unsigned LocalNumIdentifiers = 0;
266 
267   /// Offsets into the identifier table data.
268   ///
269   /// This array is indexed by the identifier ID (-1), and provides
270   /// the offset into IdentifierTableData where the string data is
271   /// stored.
272   const uint32_t *IdentifierOffsets = nullptr;
273 
274   /// Base identifier ID for identifiers local to this module.
275   serialization::IdentID BaseIdentifierID = 0;
276 
277   /// Remapping table for identifier IDs in this module.
278   ContinuousRangeMap<uint32_t, int, 2> IdentifierRemap;
279 
280   /// Actual data for the on-disk hash table of identifiers.
281   ///
282   /// This pointer points into a memory buffer, where the on-disk hash
283   /// table for identifiers actually lives.
284   const char *IdentifierTableData = nullptr;
285 
286   /// A pointer to an on-disk hash table of opaque type
287   /// IdentifierHashTable.
288   void *IdentifierLookupTable = nullptr;
289 
290   /// Offsets of identifiers that we're going to preload within
291   /// IdentifierTableData.
292   std::vector<unsigned> PreloadIdentifierOffsets;
293 
294   // === Macros ===
295 
296   /// The cursor to the start of the preprocessor block, which stores
297   /// all of the macro definitions.
298   llvm::BitstreamCursor MacroCursor;
299 
300   /// The number of macros in this AST file.
301   unsigned LocalNumMacros = 0;
302 
303   /// Offsets of macros in the preprocessor block.
304   ///
305   /// This array is indexed by the macro ID (-1), and provides
306   /// the offset into the preprocessor block where macro definitions are
307   /// stored.
308   const uint32_t *MacroOffsets = nullptr;
309 
310   /// Base macro ID for macros local to this module.
311   serialization::MacroID BaseMacroID = 0;
312 
313   /// Remapping table for macro IDs in this module.
314   ContinuousRangeMap<uint32_t, int, 2> MacroRemap;
315 
316   /// The offset of the start of the set of defined macros.
317   uint64_t MacroStartOffset = 0;
318 
319   // === Detailed PreprocessingRecord ===
320 
321   /// The cursor to the start of the (optional) detailed preprocessing
322   /// record block.
323   llvm::BitstreamCursor PreprocessorDetailCursor;
324 
325   /// The offset of the start of the preprocessor detail cursor.
326   uint64_t PreprocessorDetailStartOffset = 0;
327 
328   /// Base preprocessed entity ID for preprocessed entities local to
329   /// this module.
330   serialization::PreprocessedEntityID BasePreprocessedEntityID = 0;
331 
332   /// Remapping table for preprocessed entity IDs in this module.
333   ContinuousRangeMap<uint32_t, int, 2> PreprocessedEntityRemap;
334 
335   const PPEntityOffset *PreprocessedEntityOffsets = nullptr;
336   unsigned NumPreprocessedEntities = 0;
337 
338   /// Base ID for preprocessed skipped ranges local to this module.
339   unsigned BasePreprocessedSkippedRangeID = 0;
340 
341   const PPSkippedRange *PreprocessedSkippedRangeOffsets = nullptr;
342   unsigned NumPreprocessedSkippedRanges = 0;
343 
344   // === Header search information ===
345 
346   /// The number of local HeaderFileInfo structures.
347   unsigned LocalNumHeaderFileInfos = 0;
348 
349   /// Actual data for the on-disk hash table of header file
350   /// information.
351   ///
352   /// This pointer points into a memory buffer, where the on-disk hash
353   /// table for header file information actually lives.
354   const char *HeaderFileInfoTableData = nullptr;
355 
356   /// The on-disk hash table that contains information about each of
357   /// the header files.
358   void *HeaderFileInfoTable = nullptr;
359 
360   // === Submodule information ===
361 
362   /// The number of submodules in this module.
363   unsigned LocalNumSubmodules = 0;
364 
365   /// Base submodule ID for submodules local to this module.
366   serialization::SubmoduleID BaseSubmoduleID = 0;
367 
368   /// Remapping table for submodule IDs in this module.
369   ContinuousRangeMap<uint32_t, int, 2> SubmoduleRemap;
370 
371   // === Selectors ===
372 
373   /// The number of selectors new to this file.
374   ///
375   /// This is the number of entries in SelectorOffsets.
376   unsigned LocalNumSelectors = 0;
377 
378   /// Offsets into the selector lookup table's data array
379   /// where each selector resides.
380   const uint32_t *SelectorOffsets = nullptr;
381 
382   /// Base selector ID for selectors local to this module.
383   serialization::SelectorID BaseSelectorID = 0;
384 
385   /// Remapping table for selector IDs in this module.
386   ContinuousRangeMap<uint32_t, int, 2> SelectorRemap;
387 
388   /// A pointer to the character data that comprises the selector table
389   ///
390   /// The SelectorOffsets table refers into this memory.
391   const unsigned char *SelectorLookupTableData = nullptr;
392 
393   /// A pointer to an on-disk hash table of opaque type
394   /// ASTSelectorLookupTable.
395   ///
396   /// This hash table provides the IDs of all selectors, and the associated
397   /// instance and factory methods.
398   void *SelectorLookupTable = nullptr;
399 
400   // === Declarations ===
401 
402   /// DeclsCursor - This is a cursor to the start of the DECLS_BLOCK block. It
403   /// has read all the abbreviations at the start of the block and is ready to
404   /// jump around with these in context.
405   llvm::BitstreamCursor DeclsCursor;
406 
407   /// The number of declarations in this AST file.
408   unsigned LocalNumDecls = 0;
409 
410   /// Offset of each declaration within the bitstream, indexed
411   /// by the declaration ID (-1).
412   const DeclOffset *DeclOffsets = nullptr;
413 
414   /// Base declaration ID for declarations local to this module.
415   serialization::DeclID BaseDeclID = 0;
416 
417   /// Remapping table for declaration IDs in this module.
418   ContinuousRangeMap<uint32_t, int, 2> DeclRemap;
419 
420   /// Mapping from the module files that this module file depends on
421   /// to the base declaration ID for that module as it is understood within this
422   /// module.
423   ///
424   /// This is effectively a reverse global-to-local mapping for declaration
425   /// IDs, so that we can interpret a true global ID (for this translation unit)
426   /// as a local ID (for this module file).
427   llvm::DenseMap<ModuleFile *, serialization::DeclID> GlobalToLocalDeclIDs;
428 
429   /// Array of file-level DeclIDs sorted by file.
430   const serialization::DeclID *FileSortedDecls = nullptr;
431   unsigned NumFileSortedDecls = 0;
432 
433   /// Array of category list location information within this
434   /// module file, sorted by the definition ID.
435   const serialization::ObjCCategoriesInfo *ObjCCategoriesMap = nullptr;
436 
437   /// The number of redeclaration info entries in ObjCCategoriesMap.
438   unsigned LocalNumObjCCategoriesInMap = 0;
439 
440   /// The Objective-C category lists for categories known to this
441   /// module.
442   SmallVector<uint64_t, 1> ObjCCategories;
443 
444   // === Types ===
445 
446   /// The number of types in this AST file.
447   unsigned LocalNumTypes = 0;
448 
449   /// Offset of each type within the bitstream, indexed by the
450   /// type ID, or the representation of a Type*.
451   const uint32_t *TypeOffsets = nullptr;
452 
453   /// Base type ID for types local to this module as represented in
454   /// the global type ID space.
455   serialization::TypeID BaseTypeIndex = 0;
456 
457   /// Remapping table for type IDs in this module.
458   ContinuousRangeMap<uint32_t, int, 2> TypeRemap;
459 
460   // === Miscellaneous ===
461 
462   /// Diagnostic IDs and their mappings that the user changed.
463   SmallVector<uint64_t, 8> PragmaDiagMappings;
464 
465   /// List of modules which depend on this module
466   llvm::SetVector<ModuleFile *> ImportedBy;
467 
468   /// List of modules which this module depends on
469   llvm::SetVector<ModuleFile *> Imports;
470 
471   /// Determine whether this module was directly imported at
472   /// any point during translation.
isDirectlyImported()473   bool isDirectlyImported() const { return DirectlyImported; }
474 
475   /// Is this a module file for a module (rather than a PCH or similar).
isModule()476   bool isModule() const {
477     return Kind == MK_ImplicitModule || Kind == MK_ExplicitModule ||
478            Kind == MK_PrebuiltModule;
479   }
480 
481   /// Dump debugging output for this module.
482   void dump();
483 };
484 
485 } // namespace serialization
486 
487 } // namespace clang
488 
489 #endif // LLVM_CLANG_SERIALIZATION_MODULE_H
490