1 //===-ThinLTOCodeGenerator.h - LLVM Link Time Optimizer -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file declares the ThinLTOCodeGenerator class, similar to the
10 // LTOCodeGenerator but for the ThinLTO scheme. It provides an interface for
11 // linker plugin.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_LTO_LEGACY_THINLTOCODEGENERATOR_H
16 #define LLVM_LTO_LEGACY_THINLTOCODEGENERATOR_H
17 
18 #include "llvm-c/lto.h"
19 #include "llvm/ADT/StringSet.h"
20 #include "llvm/IR/ModuleSummaryIndex.h"
21 #include "llvm/LTO/LTO.h"
22 #include "llvm/Support/CachePruning.h"
23 #include "llvm/Support/CodeGen.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Target/TargetOptions.h"
26 #include "llvm/TargetParser/Triple.h"
27 
28 #include <string>
29 
30 namespace llvm {
31 class StringRef;
32 class TargetMachine;
33 
34 /// Helper to gather options relevant to the target machine creation
35 struct TargetMachineBuilder {
36   Triple TheTriple;
37   std::string MCpu;
38   std::string MAttr;
39   TargetOptions Options;
40   std::optional<Reloc::Model> RelocModel;
41   CodeGenOpt::Level CGOptLevel = CodeGenOpt::Aggressive;
42 
43   std::unique_ptr<TargetMachine> create() const;
44 };
45 
46 /// This class define an interface similar to the LTOCodeGenerator, but adapted
47 /// for ThinLTO processing.
48 /// The ThinLTOCodeGenerator is not intended to be reuse for multiple
49 /// compilation: the model is that the client adds modules to the generator and
50 /// ask to perform the ThinLTO optimizations / codegen, and finally destroys the
51 /// codegenerator.
52 class ThinLTOCodeGenerator {
53 public:
54   /// Add given module to the code generator.
55   void addModule(StringRef Identifier, StringRef Data);
56 
57   /**
58    * Adds to a list of all global symbols that must exist in the final generated
59    * code. If a symbol is not listed there, it will be optimized away if it is
60    * inlined into every usage.
61    */
62   void preserveSymbol(StringRef Name);
63 
64   /**
65    * Adds to a list of all global symbols that are cross-referenced between
66    * ThinLTO files. If the ThinLTO CodeGenerator can ensure that every
67    * references from a ThinLTO module to this symbol is optimized away, then
68    * the symbol can be discarded.
69    */
70   void crossReferenceSymbol(StringRef Name);
71 
72   /**
73    * Process all the modules that were added to the code generator in parallel.
74    *
75    * Client can access the resulting object files using getProducedBinaries(),
76    * unless setGeneratedObjectsDirectory() has been called, in which case
77    * results are available through getProducedBinaryFiles().
78    */
79   void run();
80 
81   /**
82    * Return the "in memory" binaries produced by the code generator. This is
83    * filled after run() unless setGeneratedObjectsDirectory() has been
84    * called, in which case results are available through
85    * getProducedBinaryFiles().
86    */
87   std::vector<std::unique_ptr<MemoryBuffer>> &getProducedBinaries() {
88     return ProducedBinaries;
89   }
90 
91   /**
92    * Return the "on-disk" binaries produced by the code generator. This is
93    * filled after run() when setGeneratedObjectsDirectory() has been
94    * called, in which case results are available through getProducedBinaries().
95    */
96   std::vector<std::string> &getProducedBinaryFiles() {
97     return ProducedBinaryFiles;
98   }
99 
100   /**
101    * \defgroup Options setters
102    * @{
103    */
104 
105   /**
106    * \defgroup Cache controlling options
107    *
108    * These entry points control the ThinLTO cache. The cache is intended to
109    * support incremental build, and thus needs to be persistent accross build.
110    * The client enabled the cache by supplying a path to an existing directory.
111    * The code generator will use this to store objects files that may be reused
112    * during a subsequent build.
113    * To avoid filling the disk space, a few knobs are provided:
114    *  - The pruning interval limit the frequency at which the garbage collector
115    *    will try to scan the cache directory to prune it from expired entries.
116    *    Setting to -1 disable the pruning (default). Setting to 0 will force
117    *    pruning to occur.
118    *  - The pruning expiration time indicates to the garbage collector how old
119    *    an entry needs to be to be removed.
120    *  - Finally, the garbage collector can be instructed to prune the cache till
121    *    the occupied space goes below a threshold.
122    * @{
123    */
124 
125   struct CachingOptions {
126     std::string Path;                    // Path to the cache, empty to disable.
127     CachePruningPolicy Policy;
128   };
129 
130   /// Provide a path to a directory where to store the cached files for
131   /// incremental build.
132   void setCacheDir(std::string Path) { CacheOptions.Path = std::move(Path); }
133 
134   /// Cache policy: interval (seconds) between two prunes of the cache. Set to a
135   /// negative value to disable pruning. A value of 0 will force pruning to
136   /// occur.
137   void setCachePruningInterval(int Interval) {
138     if(Interval < 0)
139       CacheOptions.Policy.Interval.reset();
140     else
141       CacheOptions.Policy.Interval = std::chrono::seconds(Interval);
142   }
143 
144   /// Cache policy: expiration (in seconds) for an entry.
145   /// A value of 0 will be ignored.
146   void setCacheEntryExpiration(unsigned Expiration) {
147     if (Expiration)
148       CacheOptions.Policy.Expiration = std::chrono::seconds(Expiration);
149   }
150 
151   /**
152    * Sets the maximum cache size that can be persistent across build, in terms
153    * of percentage of the available space on the disk. Set to 100 to indicate
154    * no limit, 50 to indicate that the cache size will not be left over
155    * half the available space. A value over 100 will be reduced to 100, and a
156    * value of 0 will be ignored.
157    *
158    *
159    * The formula looks like:
160    *  AvailableSpace = FreeSpace + ExistingCacheSize
161    *  NewCacheSize = AvailableSpace * P/100
162    *
163    */
164   void setMaxCacheSizeRelativeToAvailableSpace(unsigned Percentage) {
165     if (Percentage)
166       CacheOptions.Policy.MaxSizePercentageOfAvailableSpace = Percentage;
167   }
168 
169   /// Cache policy: the maximum size for the cache directory in bytes. A value
170   /// over the amount of available space on the disk will be reduced to the
171   /// amount of available space. A value of 0 will be ignored.
172   void setCacheMaxSizeBytes(uint64_t MaxSizeBytes) {
173     if (MaxSizeBytes)
174       CacheOptions.Policy.MaxSizeBytes = MaxSizeBytes;
175   }
176 
177   /// Cache policy: the maximum number of files in the cache directory. A value
178   /// of 0 will be ignored.
179   void setCacheMaxSizeFiles(unsigned MaxSizeFiles) {
180     if (MaxSizeFiles)
181       CacheOptions.Policy.MaxSizeFiles = MaxSizeFiles;
182   }
183 
184   /**@}*/
185 
186   /// Set the path to a directory where to save temporaries at various stages of
187   /// the processing.
188   void setSaveTempsDir(std::string Path) { SaveTempsDir = std::move(Path); }
189 
190   /// Set the path to a directory where to save generated object files. This
191   /// path can be used by a linker to request on-disk files instead of in-memory
192   /// buffers. When set, results are available through getProducedBinaryFiles()
193   /// instead of getProducedBinaries().
194   void setGeneratedObjectsDirectory(std::string Path) {
195     SavedObjectsDirectoryPath = std::move(Path);
196   }
197 
198   /// CPU to use to initialize the TargetMachine
199   void setCpu(std::string Cpu) { TMBuilder.MCpu = std::move(Cpu); }
200 
201   /// Subtarget attributes
202   void setAttr(std::string MAttr) { TMBuilder.MAttr = std::move(MAttr); }
203 
204   /// TargetMachine options
205   void setTargetOptions(TargetOptions Options) {
206     TMBuilder.Options = std::move(Options);
207   }
208 
209   /// Enable the Freestanding mode: indicate that the optimizer should not
210   /// assume builtins are present on the target.
211   void setFreestanding(bool Enabled) { Freestanding = Enabled; }
212 
213   /// CodeModel
214   void setCodePICModel(std::optional<Reloc::Model> Model) {
215     TMBuilder.RelocModel = Model;
216   }
217 
218   /// CodeGen optimization level
219   void setCodeGenOptLevel(CodeGenOpt::Level CGOptLevel) {
220     TMBuilder.CGOptLevel = CGOptLevel;
221   }
222 
223   /// IR optimization level: from 0 to 3.
224   void setOptLevel(unsigned NewOptLevel) {
225     OptLevel = (NewOptLevel > 3) ? 3 : NewOptLevel;
226   }
227 
228   /// Enable or disable debug output for the new pass manager.
229   void setDebugPassManager(unsigned Enabled) { DebugPassManager = Enabled; }
230 
231   /// Disable CodeGen, only run the stages till codegen and stop. The output
232   /// will be bitcode.
233   void disableCodeGen(bool Disable) { DisableCodeGen = Disable; }
234 
235   /// Perform CodeGen only: disable all other stages.
236   void setCodeGenOnly(bool CGOnly) { CodeGenOnly = CGOnly; }
237 
238   /**@}*/
239 
240   /**
241    * \defgroup Set of APIs to run individual stages in isolation.
242    * @{
243    */
244 
245   /**
246    * Produce the combined summary index from all the bitcode files:
247    * "thin-link".
248    */
249   std::unique_ptr<ModuleSummaryIndex> linkCombinedIndex();
250 
251   /**
252    * Perform promotion and renaming of exported internal functions,
253    * and additionally resolve weak and linkonce symbols.
254    * Index is updated to reflect linkage changes from weak resolution.
255    */
256   void promote(Module &Module, ModuleSummaryIndex &Index,
257                const lto::InputFile &File);
258 
259   /**
260    * Compute and emit the imported files for module at \p ModulePath.
261    */
262   void emitImports(Module &Module, StringRef OutputName,
263                    ModuleSummaryIndex &Index,
264                    const lto::InputFile &File);
265 
266   /**
267    * Perform cross-module importing for the module identified by
268    * ModuleIdentifier.
269    */
270   void crossModuleImport(Module &Module, ModuleSummaryIndex &Index,
271                          const lto::InputFile &File);
272 
273   /**
274    * Compute the list of summaries needed for importing into module.
275    */
276   void gatherImportedSummariesForModule(
277       Module &Module, ModuleSummaryIndex &Index,
278       std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex,
279       const lto::InputFile &File);
280 
281   /**
282    * Perform internalization. Index is updated to reflect linkage changes.
283    */
284   void internalize(Module &Module, ModuleSummaryIndex &Index,
285                    const lto::InputFile &File);
286 
287   /**
288    * Perform post-importing ThinLTO optimizations.
289    */
290   void optimize(Module &Module);
291 
292   /**
293    * Write temporary object file to SavedObjectDirectoryPath, write symlink
294    * to Cache directory if needed. Returns the path to the generated file in
295    * SavedObjectsDirectoryPath.
296    */
297   std::string writeGeneratedObject(int count, StringRef CacheEntryPath,
298                                    const MemoryBuffer &OutputBuffer);
299   /**@}*/
300 
301 private:
302   /// Helper factory to build a TargetMachine
303   TargetMachineBuilder TMBuilder;
304 
305   /// Vector holding the in-memory buffer containing the produced binaries, when
306   /// SavedObjectsDirectoryPath isn't set.
307   std::vector<std::unique_ptr<MemoryBuffer>> ProducedBinaries;
308 
309   /// Path to generated files in the supplied SavedObjectsDirectoryPath if any.
310   std::vector<std::string> ProducedBinaryFiles;
311 
312   /// Vector holding the input buffers containing the bitcode modules to
313   /// process.
314   std::vector<std::unique_ptr<lto::InputFile>> Modules;
315 
316   /// Set of symbols that need to be preserved outside of the set of bitcode
317   /// files.
318   StringSet<> PreservedSymbols;
319 
320   /// Set of symbols that are cross-referenced between bitcode files.
321   StringSet<> CrossReferencedSymbols;
322 
323   /// Control the caching behavior.
324   CachingOptions CacheOptions;
325 
326   /// Path to a directory to save the temporary bitcode files.
327   std::string SaveTempsDir;
328 
329   /// Path to a directory to save the generated object files.
330   std::string SavedObjectsDirectoryPath;
331 
332   /// Flag to enable/disable CodeGen. When set to true, the process stops after
333   /// optimizations and a bitcode is produced.
334   bool DisableCodeGen = false;
335 
336   /// Flag to indicate that only the CodeGen will be performed, no cross-module
337   /// importing or optimization.
338   bool CodeGenOnly = false;
339 
340   /// Flag to indicate that the optimizer should not assume builtins are present
341   /// on the target.
342   bool Freestanding = false;
343 
344   /// IR Optimization Level [0-3].
345   unsigned OptLevel = 3;
346 
347   /// Flag to indicate whether debug output should be enabled for the new pass
348   /// manager.
349   bool DebugPassManager = false;
350 };
351 }
352 #endif
353