1 //===- DevelopmentModeInlineAdvisor.cpp - runtime-loadable model runner  --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a model runner using TFLite, allowing the
10 // loading of a model from a command line option.
11 //
12 //===----------------------------------------------------------------------===//
13 #include "llvm/Analysis/TensorSpec.h"
14 #include "llvm/Config/config.h"
15 #if defined(LLVM_HAVE_TFLITE)
16 
17 #include "llvm/ADT/BitVector.h"
18 #include "llvm/Analysis/CallGraph.h"
19 #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
20 #include "llvm/Analysis/MLInlineAdvisor.h"
21 #include "llvm/Analysis/ModelUnderTrainingRunner.h"
22 #include "llvm/Analysis/NoInferenceModelRunner.h"
23 #include "llvm/Analysis/Utils/TFUtils.h"
24 #include "llvm/Analysis/Utils/TrainingLogger.h"
25 #include "llvm/IR/LLVMContext.h"
26 #include "llvm/Support/CommandLine.h"
27 #include "llvm/Support/ManagedStatic.h"
28 
29 #include <vector>
30 #include <optional>
31 
32 using namespace llvm;
33 
34 static cl::opt<std::string> TrainingLog(
35     "training-log", cl::Hidden,
36     cl::desc("Path where the development - mode inlining log is saved."));
37 
38 static cl::opt<std::string> TFModelUnderTrainingPath(
39     "ml-inliner-model-under-training", cl::Hidden,
40     cl::desc(R"(Path to SavedModel from the previous training iteration.
41 The directory is also expected to contain a JSON specification of the
42 outputs expected to be logged, where the first entry must be the
43 inlining decision. The file containing the specification should be
44 called output_spec.json. The expected JSON value is an array of
45 dictionaries. Each dictionary should have 2 keys:
46 
47 - "tensor_spec, followed by the TensorSpec description of the
48 output; and
49 - "logging_name", a string indicating the name to use when
50 logging the output values.
51 
52 Example:
53 [
54   {
55     "logging_name" : "some_name",
56     "tensor_spec" : {
57       "name" : "model_name",
58       "port" : 0,
59       "shape" : [2, 3],
60       "type" : "float"
61       }
62   }
63 ]
64 
65 The first value must always correspond to the decision.)"));
66 
67 static cl::opt<std::string> TFOutputSpecOverride(
68     "ml-inliner-output-spec-override", cl::Hidden,
69     cl::desc("Override the path to the output spec json file. See "
70              "-ml-inliner-model-under-training documentation for the "
71              "specification of that file."));
72 
73 static cl::opt<std::string> TFFeedPrefix("ml-inliner-trained-model-feed-prefix",
74                                          cl::Hidden, cl::init("action_"),
75                                          cl::desc("Prefix for feature names."));
76 
77 namespace {
78 /// An InlineEvent, used by TrainingLogger.
79 struct InlineEvent {
80   /// What the default policy's decision would have been.
81   int64_t DefaultDecision = 0;
82 
83   /// What we advised. When training off the default policy, this is the same as
84   /// DefaultDecision.
85   int64_t AdvisedDecision = 0;
86 
87   /// What actually happened. This would be 'false' in the case of an inline
88   /// error, even if AdvisedDecision were true, otherwise it agrees with
89   /// AdvisedDecision.
90   bool Effect = false;
91 
92   /// What the change in size was: size_after - size_before
93   int64_t Reward = 0;
94 };
95 
96 /// Collect data we may use for training a model.
97 class TrainingLogger final {
98 public:
99   TrainingLogger(StringRef LogFileName, const ModelUnderTrainingRunner *MUTR);
100 
101   /// Log one inlining event.
102   void logInlineEvent(const InlineEvent &Event,
103                       const MLModelRunner &ModelRunner);
104 
105 private:
106   StringRef LogFileName;
107   const ModelUnderTrainingRunner *const MUTR;
108   std::unique_ptr<Logger> L;
109   BitVector Effects;
110   /// Set these 2 clearly OOB, to make sure we set them later.
111   size_t DefaultDecisionPos = std::numeric_limits<size_t>::max();
112   size_t DecisionPos = std::numeric_limits<size_t>::max();
113 };
114 
115 /// An extension of the MLInlineAdvisor for the 'development' mode, targeting
116 /// the offline training scenario. Note that training happens outside of the
117 /// compiler, this facility is concerned with producing training data ("logs").
118 /// This InlineAdvisor can operate in the following modes:
119 ///
120 /// 1) collect logs for the default policy. This is useful for bootstrapping
121 /// training, which will be considerably faster by starting from a reasonable
122 /// policy.
123 ///
124 /// 2) collect logs for the ML policy, using a model from a previous
125 /// training. Potentially, that model uses internally some small random
126 /// perturbation of its weights, to induce exploration (setting this up is the
127 /// responsibility of the training algorithm). The logs would then be used to
128 /// retrain and improve on this model.
129 ///
130 /// 3) use the provided model, with no logging. This is useful for end to end
131 /// validation - the model, in this case, is a release candidate and shouldn't
132 /// have random perturbations. It is a convenience feature: rather than needing
133 /// to take the release candidate model and compile it in 'release' mode,
134 /// validate it, then potentially discard it, it's easier to just pass the model
135 /// to the compiler, albeit compilation would be slower, as a one-off. Once the
136 /// model behaves satisfactorily, it can be compiled AOT, for efficiency, in
137 /// release mode. The expectation is that a well-trained model provides a good
138 /// policy over a sufficiently diverse codebase, over many changes (i.e.
139 /// training happens seldom).
140 class DevelopmentModeMLInlineAdvisor : public MLInlineAdvisor {
141 public:
142   DevelopmentModeMLInlineAdvisor(
143       Module &M, ModuleAnalysisManager &MAM,
144       std::unique_ptr<MLModelRunner> ModelRunner,
145       std::function<bool(CallBase &)> GetDefaultAdvice,
146       std::unique_ptr<TrainingLogger> Logger);
147 
148   size_t getTotalSizeEstimate();
149 
150   void updateNativeSizeEstimate(int64_t Change) {
151     *CurrentNativeSize += Change;
152   }
153   void resetNativeSize(Function *F) {
154     PreservedAnalyses PA = PreservedAnalyses::all();
155     PA.abandon<InlineSizeEstimatorAnalysis>();
156     FAM.invalidate(*F, PA);
157   }
158 
159   std::unique_ptr<MLInlineAdvice>
160   getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE) override;
161 
162   std::optional<size_t> getNativeSizeEstimate(const Function &F) const;
163 
164 private:
165   bool isLogging() const { return !!Logger; }
166   std::unique_ptr<MLInlineAdvice> getMandatoryAdviceImpl(CallBase &CB) override;
167 
168   const bool IsDoingInference;
169   std::unique_ptr<TrainingLogger> Logger;
170 
171   const std::optional<int32_t> InitialNativeSize;
172   std::optional<int32_t> CurrentNativeSize;
173 };
174 
175 /// A variant of MLInlineAdvice that tracks all non-trivial inlining
176 /// decisions, for training/logging.
177 class LoggingMLInlineAdvice : public MLInlineAdvice {
178 public:
179   LoggingMLInlineAdvice(DevelopmentModeMLInlineAdvisor *Advisor, CallBase &CB,
180                         OptimizationRemarkEmitter &ORE, bool Recommendation,
181                         TrainingLogger &Logger,
182                         std::optional<size_t> CallerSizeEstimateBefore,
183                         std::optional<size_t> CalleeSizeEstimateBefore,
184                         bool DefaultDecision, bool Mandatory = false)
185       : MLInlineAdvice(Advisor, CB, ORE, Recommendation), Logger(Logger),
186         CallerSizeEstimateBefore(CallerSizeEstimateBefore),
187         CalleeSizeEstimateBefore(CalleeSizeEstimateBefore),
188         DefaultDecision(DefaultDecision), Mandatory(Mandatory) {}
189 
190   virtual ~LoggingMLInlineAdvice() = default;
191 
192 private:
193   DevelopmentModeMLInlineAdvisor *getAdvisor() const {
194     return static_cast<DevelopmentModeMLInlineAdvisor *>(Advisor);
195   }
196   void recordInliningImpl() override {
197     MLInlineAdvice::recordInliningImpl();
198     getAdvisor()->resetNativeSize(Caller);
199     int Reward = std::numeric_limits<int>::max();
200     if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() &&
201         !getAdvisor()->isForcedToStop()) {
202       int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller) +
203                             *CalleeSizeEstimateBefore;
204       Reward = NativeSizeAfter -
205                (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore);
206       getAdvisor()->updateNativeSizeEstimate(Reward);
207     }
208     log(Reward, /*Success=*/true);
209   }
210 
211   void recordInliningWithCalleeDeletedImpl() override {
212     MLInlineAdvice::recordInliningWithCalleeDeletedImpl();
213     getAdvisor()->resetNativeSize(Caller);
214     if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() &&
215         !getAdvisor()->isForcedToStop()) {
216       int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller);
217       int Reward = NativeSizeAfter -
218                    (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore);
219       getAdvisor()->updateNativeSizeEstimate(Reward);
220       log(Reward, /*Success=*/true);
221     } else {
222       log(NoReward, /*Success=*/true);
223     }
224   }
225 
226   void recordUnsuccessfulInliningImpl(const InlineResult &Result) override {
227     MLInlineAdvice::recordUnsuccessfulInliningImpl(Result);
228     log(NoReward, /*Success=*/false);
229   }
230 
231   void recordUnattemptedInliningImpl() override {
232     MLInlineAdvice::recordUnattemptedInliningImpl();
233     log(NoReward, /*Success=*/false);
234   }
235 
236   void log(int64_t Reward, bool Success) {
237     if (Mandatory)
238       return;
239     InlineEvent Event;
240     Event.AdvisedDecision = isInliningRecommended();
241     Event.DefaultDecision = DefaultDecision;
242     Event.Effect = Success;
243     Event.Reward = Reward;
244     Logger.logInlineEvent(Event, getAdvisor()->getModelRunner());
245   }
246 
247   static const int64_t NoReward = 0;
248   TrainingLogger &Logger;
249   const std::optional<size_t> CallerSizeEstimateBefore;
250   const std::optional<size_t> CalleeSizeEstimateBefore;
251   const int64_t DefaultDecision;
252   const int64_t Mandatory;
253 };
254 
255 static const std::vector<TensorSpec> TrainingOnlyFeatures{
256     TensorSpec::createSpec<int64_t>(TFFeedPrefix + "inlining_default", {1}),
257     TensorSpec::createSpec<float>(TFFeedPrefix + "discount", {1}),
258     TensorSpec::createSpec<float>(TFFeedPrefix + "reward", {1}),
259     TensorSpec::createSpec<int32_t>(TFFeedPrefix + "step_type", {1})};
260 
261 static const std::vector<TensorSpec> getInputFeatures() {
262   std::vector<TensorSpec> InputSpecs;
263   for (size_t I = 0; I < NumberOfFeatures; ++I)
264     InputSpecs.push_back(TensorSpec::createSpec<int64_t>(
265         TFFeedPrefix + FeatureMap[I].name(), FeatureMap[I].shape()));
266   append_range(InputSpecs, TrainingOnlyFeatures);
267   return InputSpecs;
268 }
269 
270 } // namespace
271 
272 TrainingLogger::TrainingLogger(StringRef LogFileName,
273                                const ModelUnderTrainingRunner *MUTR)
274     : LogFileName(LogFileName), MUTR(MUTR) {
275   // The first output is the inlining decision.
276   std::vector<TensorSpec> FT(FeatureMap.begin(), FeatureMap.end());
277 
278   if (MUTR)
279     append_range(FT, MUTR->extraOutputsForLoggingSpecs());
280 
281   DefaultDecisionPos = FT.size();
282   FT.push_back(DefaultDecisionSpec);
283 
284   DecisionPos = FT.size();
285   FT.push_back(InlineDecisionSpec);
286   std::error_code EC;
287   auto OS = std::make_unique<raw_fd_ostream>(TrainingLog, EC);
288   if (EC)
289     dbgs() << (EC.message() + ":" + TrainingLog);
290 
291   L = std::make_unique<Logger>(
292       std::move(OS), FT, TensorSpec::createSpec<int64_t>(RewardName, {1}),
293       InlineSizeEstimatorAnalysis::isEvaluatorRequested());
294   L->switchContext("");
295 }
296 
297 /// Log one inlining event.
298 void TrainingLogger::logInlineEvent(const InlineEvent &Event,
299                                     const MLModelRunner &ModelRunner) {
300   L->startObservation();
301   size_t CurrentFeature = 0;
302   for (; CurrentFeature < NumberOfFeatures; ++CurrentFeature)
303     L->logTensorValue(CurrentFeature,
304                       reinterpret_cast<const char *>(
305                           ModelRunner.getTensorUntyped(CurrentFeature)));
306 
307   if (MUTR)
308     for (size_t I = 0; I < MUTR->extraOutputsForLoggingSpecs().size(); ++I) {
309       const char *RawData =
310           reinterpret_cast<const char *>(MUTR->getUntypedExtraOutputValue(I));
311       L->logTensorValue(CurrentFeature, RawData);
312       ++CurrentFeature;
313     }
314 
315   assert(CurrentFeature == DefaultDecisionPos);
316   L->logTensorValue(DefaultDecisionPos,
317                     reinterpret_cast<const char *>(&Event.DefaultDecision));
318   L->logTensorValue(DecisionPos,
319                     reinterpret_cast<const char *>(&Event.AdvisedDecision));
320   L->endObservation();
321   if (InlineSizeEstimatorAnalysis::isEvaluatorRequested())
322     L->logReward(Event.Reward);
323 
324   // For debugging / later use
325   Effects.push_back(Event.Effect);
326 }
327 
328 DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor(
329     Module &M, ModuleAnalysisManager &MAM,
330     std::unique_ptr<MLModelRunner> ModelRunner,
331     std::function<bool(CallBase &)> GetDefaultAdvice,
332     std::unique_ptr<TrainingLogger> Logger)
333     : MLInlineAdvisor(M, MAM, std::move(ModelRunner), GetDefaultAdvice),
334       IsDoingInference(isa<ModelUnderTrainingRunner>(getModelRunner())),
335       Logger(std::move(Logger)),
336       InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0),
337       CurrentNativeSize(InitialNativeSize) {
338   // We cannot have the case of neither inference nor logging.
339   assert(IsDoingInference || isLogging());
340 }
341 
342 std::optional<size_t>
343 DevelopmentModeMLInlineAdvisor::getNativeSizeEstimate(const Function &F) const {
344   if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested())
345     return std::nullopt;
346   auto &R =
347       FAM.getResult<InlineSizeEstimatorAnalysis>(const_cast<Function &>(F));
348   if (!R) {
349     F.getParent()->getContext().emitError(
350         "Native size estimator is not present.");
351     return 0;
352   }
353   return *R;
354 }
355 
356 std::unique_ptr<MLInlineAdvice>
357 DevelopmentModeMLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) {
358   return std::make_unique<LoggingMLInlineAdvice>(
359       /*Advisor=*/this,
360       /*CB=*/CB, /*ORE=*/getCallerORE(CB), /*Recommendation=*/true,
361       /*Logger=*/*Logger,
362       /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()),
363       /*CalleeSizeEstimateBefore=*/
364       getNativeSizeEstimate(*CB.getCalledFunction()),
365       /*DefaultDecision=*/true, /*Mandatory*/ true);
366 }
367 
368 std::unique_ptr<MLInlineAdvice>
369 DevelopmentModeMLInlineAdvisor::getAdviceFromModel(
370     CallBase &CB, OptimizationRemarkEmitter &ORE) {
371   if (IsDoingInference && !isLogging())
372     return MLInlineAdvisor::getAdviceFromModel(CB, ORE);
373 
374   bool DefaultAdvice = GetDefaultAdvice(CB);
375   auto Recommendation =
376       IsDoingInference ? static_cast<bool>(ModelRunner->evaluate<int64_t>())
377                        : DefaultAdvice;
378   return std::make_unique<LoggingMLInlineAdvice>(
379       /*Advisor=*/this,
380       /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/Recommendation,
381       /*Logger=*/*Logger,
382       /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()),
383       /*CalleeSizeEstimateBefore=*/
384       getNativeSizeEstimate(*CB.getCalledFunction()),
385       /*DefaultDecision=*/DefaultAdvice);
386 }
387 
388 size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() {
389   if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested())
390     return 0;
391   size_t Ret = 0;
392   for (auto &F : M) {
393     if (F.isDeclaration())
394       continue;
395     Ret += *getNativeSizeEstimate(F);
396   }
397   return Ret;
398 }
399 
400 std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor(
401     Module &M, ModuleAnalysisManager &MAM,
402     std::function<bool(CallBase &)> GetDefaultAdvice) {
403   auto &Ctx = M.getContext();
404   std::unique_ptr<MLModelRunner> Runner;
405   if (TFModelUnderTrainingPath.empty())
406     Runner.reset(new NoInferenceModelRunner(Ctx, getInputFeatures()));
407   else
408     Runner = ModelUnderTrainingRunner::createAndEnsureValid(
409         Ctx, TFModelUnderTrainingPath, DecisionName, getInputFeatures(),
410         TFOutputSpecOverride);
411   if (!Runner)
412     return nullptr;
413   std::unique_ptr<TrainingLogger> Logger;
414   if (!TrainingLog.empty())
415     Logger = std::make_unique<TrainingLogger>(
416         TrainingLog, dyn_cast<ModelUnderTrainingRunner>(Runner.get()));
417 
418   return std::make_unique<DevelopmentModeMLInlineAdvisor>(
419       M, MAM, std::move(Runner), GetDefaultAdvice, std::move(Logger));
420 }
421 #endif // defined(LLVM_HAVE_TFLITE)
422