1 //===- DevelopmentModeInlineAdvisor.cpp - runtime-loadable model runner  --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a model runner using Tensorflow C APIs, allowing the
10 // loading of a model from a command line option.
11 //
12 //===----------------------------------------------------------------------===//
13 #include "llvm/Analysis/TensorSpec.h"
14 #include "llvm/Config/config.h"
15 #if defined(LLVM_HAVE_TFLITE)
16 
17 #include "llvm/ADT/BitVector.h"
18 #include "llvm/Analysis/CallGraph.h"
19 #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
20 #include "llvm/Analysis/MLInlineAdvisor.h"
21 #include "llvm/Analysis/ModelUnderTrainingRunner.h"
22 #include "llvm/Analysis/NoInferenceModelRunner.h"
23 #include "llvm/Analysis/Utils/TFUtils.h"
24 #include "llvm/Analysis/Utils/TrainingLogger.h"
25 #include "llvm/IR/LLVMContext.h"
26 #include "llvm/Support/CommandLine.h"
27 #include "llvm/Support/ManagedStatic.h"
28 
29 #include <vector>
30 #include <optional>
31 
32 using namespace llvm;
33 
34 static cl::opt<std::string> TrainingLog(
35     "training-log", cl::Hidden,
36     cl::desc("Path where the development - mode inlining log is saved."));
37 
38 static cl::opt<std::string> TFModelUnderTrainingPath(
39     "ml-inliner-model-under-training", cl::Hidden,
40     cl::desc(R"(Path to SavedModel from the previous training iteration.
41 The directory is also expected to contain a JSON specification of the
42 outputs expected to be logged, where the first entry must be the
43 inlining decision. The file containing the specification should be
44 called output_spec.json. The expected JSON value is an array of
45 dictionaries. Each dictionary should have 2 keys:
46 
47 - "tensor_spec, followed by the TensorSpec description of the
48 output; and
49 - "logging_name", a string indicating the name to use when
50 logging the output values.
51 
52 Example:
53 [
54   {
55     "logging_name" : "some_name",
56     "tensor_spec" : {
57       "name" : "model_name",
58       "port" : 0,
59       "shape" : [2, 3],
60       "type" : "float"
61       }
62   }
63 ]
64 
65 The first value must always correspond to the decision.)"));
66 
67 static cl::opt<std::string> TFOutputSpecOverride(
68     "ml-inliner-output-spec-override", cl::Hidden,
69     cl::desc("Override the path to the output spec json file. See "
70              "-ml-inliner-model-under-training documentation for the "
71              "specification of that file."));
72 
73 static cl::opt<std::string> TFFeedPrefix("ml-inliner-trained-model-feed-prefix",
74                                          cl::Hidden, cl::init("action_"),
75                                          cl::desc("Prefix for feature names."));
76 
77 namespace {
78 /// An InlineEvent, used by TrainingLogger.
79 struct InlineEvent {
80   /// What the default policy's decision would have been.
81   int64_t DefaultDecision = 0;
82 
83   /// What we advised. When training off the default policy, this is the same as
84   /// DefaultDecision.
85   int64_t AdvisedDecision = 0;
86 
87   /// What actually happened. This would be 'false' in the case of an inline
88   /// error, even if AdvisedDecision were true, otherwise it agrees with
89   /// AdvisedDecision.
90   bool Effect = false;
91 
92   /// What the change in size was: size_after - size_before
93   int64_t Reward = 0;
94 };
95 
96 /// Collect data we may use for training a model.
97 class TrainingLogger final {
98 public:
99   TrainingLogger(StringRef LogFileName, const ModelUnderTrainingRunner *MUTR);
100 
101   /// Log one inlining event.
102   void logInlineEvent(const InlineEvent &Event,
103                       const MLModelRunner &ModelRunner);
104 
105 private:
106   StringRef LogFileName;
107   const ModelUnderTrainingRunner *const MUTR;
108   std::unique_ptr<Logger> L;
109   BitVector Effects;
110   /// Set these 2 clearly OOB, to make sure we set them later.
111   size_t DefaultDecisionPos = std::numeric_limits<size_t>::max();
112   size_t DecisionPos = std::numeric_limits<size_t>::max();
113 };
114 
115 /// An extension of the MLInlineAdvisor for the 'development' mode, targeting
116 /// the offline training scenario. Note that training happens outside of the
117 /// compiler, this facility is concerned with producing training data ("logs").
118 /// This InlineAdvisor can operate in the following modes:
119 ///
120 /// 1) collect logs for the default policy. This is useful for bootstrapping
121 /// training, which will be considerably faster by starting from a reasonable
122 /// policy.
123 ///
124 /// 2) collect logs for the ML policy, using a model from a previous
125 /// training. Potentially, that model uses internally some small random
126 /// perturbation of its weights, to induce exploration (setting this up is the
127 /// responsibility of the training algorithm). The logs would then be used to
128 /// retrain and improve on this model.
129 ///
130 /// 3) use the provided model, with no logging. This is useful for end to end
131 /// validation - the model, in this case, is a release candidate and shouldn't
132 /// have random perturbations. It is a convenience feature: rather than needing
133 /// to take the release candidate model and compile it in 'release' mode,
134 /// validate it, then potentially discard it, it's easier to just pass the model
135 /// to the compiler, albeit compilation would be slower, as a one-off. Once the
136 /// model behaves satisfactorily, it can be compiled AOT, for efficiency, in
137 /// release mode. The expectation is that a well-trained model provides a good
138 /// policy over a sufficiently diverse codebase, over many changes (i.e.
139 /// training happens seldom).
140 class DevelopmentModeMLInlineAdvisor : public MLInlineAdvisor {
141 public:
142   DevelopmentModeMLInlineAdvisor(
143       Module &M, ModuleAnalysisManager &MAM,
144       std::unique_ptr<MLModelRunner> ModelRunner,
145       std::function<bool(CallBase &)> GetDefaultAdvice,
146       std::unique_ptr<TrainingLogger> Logger);
147 
148   size_t getTotalSizeEstimate();
149 
150   void updateNativeSizeEstimate(int64_t Change) {
151     *CurrentNativeSize += Change;
152   }
153   void resetNativeSize(Function *F) {
154     PreservedAnalyses PA = PreservedAnalyses::all();
155     PA.abandon<InlineSizeEstimatorAnalysis>();
156     FAM.invalidate(*F, PA);
157   }
158 
159   std::unique_ptr<MLInlineAdvice>
160   getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE) override;
161 
162   std::optional<size_t> getNativeSizeEstimate(const Function &F) const;
163 
164 private:
165   bool isLogging() const { return !!Logger; }
166   std::unique_ptr<MLInlineAdvice> getMandatoryAdviceImpl(CallBase &CB) override;
167 
168   std::function<bool(CallBase &)> GetDefaultAdvice;
169   const bool IsDoingInference;
170   std::unique_ptr<TrainingLogger> Logger;
171 
172   const std::optional<int32_t> InitialNativeSize;
173   std::optional<int32_t> CurrentNativeSize;
174 };
175 
176 /// A variant of MLInlineAdvice that tracks all non-trivial inlining
177 /// decisions, for training/logging.
178 class LoggingMLInlineAdvice : public MLInlineAdvice {
179 public:
180   LoggingMLInlineAdvice(DevelopmentModeMLInlineAdvisor *Advisor, CallBase &CB,
181                         OptimizationRemarkEmitter &ORE, bool Recommendation,
182                         TrainingLogger &Logger,
183                         std::optional<size_t> CallerSizeEstimateBefore,
184                         std::optional<size_t> CalleeSizeEstimateBefore,
185                         bool DefaultDecision, bool Mandatory = false)
186       : MLInlineAdvice(Advisor, CB, ORE, Recommendation), Logger(Logger),
187         CallerSizeEstimateBefore(CallerSizeEstimateBefore),
188         CalleeSizeEstimateBefore(CalleeSizeEstimateBefore),
189         DefaultDecision(DefaultDecision), Mandatory(Mandatory) {}
190 
191   virtual ~LoggingMLInlineAdvice() = default;
192 
193 private:
194   DevelopmentModeMLInlineAdvisor *getAdvisor() const {
195     return static_cast<DevelopmentModeMLInlineAdvisor *>(Advisor);
196   }
197   void recordInliningImpl() override {
198     MLInlineAdvice::recordInliningImpl();
199     getAdvisor()->resetNativeSize(Caller);
200     int Reward = std::numeric_limits<int>::max();
201     if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() &&
202         !getAdvisor()->isForcedToStop()) {
203       int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller) +
204                             *CalleeSizeEstimateBefore;
205       Reward = NativeSizeAfter -
206                (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore);
207       getAdvisor()->updateNativeSizeEstimate(Reward);
208     }
209     log(Reward, /*Success=*/true);
210   }
211 
212   void recordInliningWithCalleeDeletedImpl() override {
213     MLInlineAdvice::recordInliningWithCalleeDeletedImpl();
214     getAdvisor()->resetNativeSize(Caller);
215     if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() &&
216         !getAdvisor()->isForcedToStop()) {
217       int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller);
218       int Reward = NativeSizeAfter -
219                    (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore);
220       getAdvisor()->updateNativeSizeEstimate(Reward);
221       log(Reward, /*Success=*/true);
222     } else {
223       log(NoReward, /*Success=*/true);
224     }
225   }
226 
227   void recordUnsuccessfulInliningImpl(const InlineResult &Result) override {
228     MLInlineAdvice::recordUnsuccessfulInliningImpl(Result);
229     log(NoReward, /*Success=*/false);
230   }
231 
232   void recordUnattemptedInliningImpl() override {
233     MLInlineAdvice::recordUnattemptedInliningImpl();
234     log(NoReward, /*Success=*/false);
235   }
236 
237   void log(int64_t Reward, bool Success) {
238     if (Mandatory)
239       return;
240     InlineEvent Event;
241     Event.AdvisedDecision = isInliningRecommended();
242     Event.DefaultDecision = DefaultDecision;
243     Event.Effect = Success;
244     Event.Reward = Reward;
245     Logger.logInlineEvent(Event, getAdvisor()->getModelRunner());
246   }
247 
248   static const int64_t NoReward = 0;
249   TrainingLogger &Logger;
250   const std::optional<size_t> CallerSizeEstimateBefore;
251   const std::optional<size_t> CalleeSizeEstimateBefore;
252   const int64_t DefaultDecision;
253   const int64_t Mandatory;
254 };
255 
256 static const std::vector<TensorSpec> TrainingOnlyFeatures{
257     TensorSpec::createSpec<int64_t>(TFFeedPrefix + "inlining_default", {1}),
258     TensorSpec::createSpec<float>(TFFeedPrefix + "discount", {1}),
259     TensorSpec::createSpec<float>(TFFeedPrefix + "reward", {1}),
260     TensorSpec::createSpec<int32_t>(TFFeedPrefix + "step_type", {1})};
261 
262 static const std::vector<TensorSpec> getInputFeatures() {
263   std::vector<TensorSpec> InputSpecs;
264   for (size_t I = 0; I < NumberOfFeatures; ++I)
265     InputSpecs.push_back(TensorSpec::createSpec<int64_t>(
266         TFFeedPrefix + FeatureMap[I].name(), FeatureMap[I].shape()));
267   append_range(InputSpecs, TrainingOnlyFeatures);
268   return InputSpecs;
269 }
270 
271 } // namespace
272 
273 TrainingLogger::TrainingLogger(StringRef LogFileName,
274                                const ModelUnderTrainingRunner *MUTR)
275     : LogFileName(LogFileName), MUTR(MUTR) {
276   // The first output is the inlining decision.
277   std::vector<TensorSpec> FT(FeatureMap.begin(), FeatureMap.end());
278 
279   if (MUTR)
280     append_range(FT, MUTR->extraOutputsForLoggingSpecs());
281 
282   DefaultDecisionPos = FT.size();
283   FT.push_back(TensorSpec::createSpec<int64_t>(DefaultDecisionName, {1}));
284 
285   DecisionPos = FT.size();
286   FT.push_back(TensorSpec::createSpec<int64_t>(DecisionName, {1}));
287   std::error_code EC;
288   auto OS = std::make_unique<raw_fd_ostream>(TrainingLog, EC);
289   if (EC)
290     dbgs() << (EC.message() + ":" + TrainingLog);
291 
292   L = std::make_unique<Logger>(
293       std::move(OS), FT, TensorSpec::createSpec<int64_t>(RewardName, {1}),
294       InlineSizeEstimatorAnalysis::isEvaluatorRequested());
295   L->switchContext("");
296 }
297 
298 /// Log one inlining event.
299 void TrainingLogger::logInlineEvent(const InlineEvent &Event,
300                                     const MLModelRunner &ModelRunner) {
301   L->startObservation();
302   size_t CurrentFeature = 0;
303   for (; CurrentFeature < NumberOfFeatures; ++CurrentFeature)
304     L->logTensorValue(CurrentFeature,
305                       reinterpret_cast<const char *>(
306                           ModelRunner.getTensorUntyped(CurrentFeature)));
307 
308   if (MUTR)
309     for (size_t I = 0; I < MUTR->extraOutputsForLoggingSpecs().size(); ++I) {
310       const char *RawData =
311           reinterpret_cast<const char *>(MUTR->getUntypedExtraOutputValue(I));
312       L->logTensorValue(CurrentFeature, RawData);
313       ++CurrentFeature;
314     }
315 
316   assert(CurrentFeature == DefaultDecisionPos);
317   L->logTensorValue(DefaultDecisionPos,
318                     reinterpret_cast<const char *>(&Event.DefaultDecision));
319   L->logTensorValue(DecisionPos,
320                     reinterpret_cast<const char *>(&Event.AdvisedDecision));
321   L->endObservation();
322   if (InlineSizeEstimatorAnalysis::isEvaluatorRequested())
323     L->logReward(Event.Reward);
324 
325   // For debugging / later use
326   Effects.push_back(Event.Effect);
327 }
328 
329 DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor(
330     Module &M, ModuleAnalysisManager &MAM,
331     std::unique_ptr<MLModelRunner> ModelRunner,
332     std::function<bool(CallBase &)> GetDefaultAdvice,
333     std::unique_ptr<TrainingLogger> Logger)
334     : MLInlineAdvisor(M, MAM, std::move(ModelRunner)),
335       GetDefaultAdvice(GetDefaultAdvice),
336       IsDoingInference(isa<ModelUnderTrainingRunner>(getModelRunner())),
337       Logger(std::move(Logger)),
338       InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0),
339       CurrentNativeSize(InitialNativeSize) {
340   // We cannot have the case of neither inference nor logging.
341   assert(IsDoingInference || isLogging());
342 }
343 
344 std::optional<size_t>
345 DevelopmentModeMLInlineAdvisor::getNativeSizeEstimate(const Function &F) const {
346   if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested())
347     return std::nullopt;
348   auto &R =
349       FAM.getResult<InlineSizeEstimatorAnalysis>(const_cast<Function &>(F));
350   if (!R) {
351     F.getParent()->getContext().emitError(
352         "Native size estimator is not present.");
353     return 0;
354   }
355   return *R;
356 }
357 
358 std::unique_ptr<MLInlineAdvice>
359 DevelopmentModeMLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) {
360   return std::make_unique<LoggingMLInlineAdvice>(
361       /*Advisor=*/this,
362       /*CB=*/CB, /*ORE=*/getCallerORE(CB), /*Recommendation=*/true,
363       /*Logger=*/*Logger,
364       /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()),
365       /*CalleeSizeEstimateBefore=*/
366       getNativeSizeEstimate(*CB.getCalledFunction()),
367       /*DefaultDecision=*/true, /*Mandatory*/ true);
368 }
369 
370 std::unique_ptr<MLInlineAdvice>
371 DevelopmentModeMLInlineAdvisor::getAdviceFromModel(
372     CallBase &CB, OptimizationRemarkEmitter &ORE) {
373   if (IsDoingInference && !isLogging())
374     return MLInlineAdvisor::getAdviceFromModel(CB, ORE);
375 
376   bool DefaultAdvice = GetDefaultAdvice(CB);
377   auto Recommendation =
378       IsDoingInference ? static_cast<bool>(ModelRunner->evaluate<int64_t>())
379                        : DefaultAdvice;
380   return std::make_unique<LoggingMLInlineAdvice>(
381       /*Advisor=*/this,
382       /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/Recommendation,
383       /*Logger=*/*Logger,
384       /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()),
385       /*CalleeSizeEstimateBefore=*/
386       getNativeSizeEstimate(*CB.getCalledFunction()),
387       /*DefaultDecision=*/DefaultAdvice);
388 }
389 
390 size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() {
391   if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested())
392     return 0;
393   size_t Ret = 0;
394   for (auto &F : M) {
395     if (F.isDeclaration())
396       continue;
397     Ret += *getNativeSizeEstimate(F);
398   }
399   return Ret;
400 }
401 
402 std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor(
403     Module &M, ModuleAnalysisManager &MAM,
404     std::function<bool(CallBase &)> GetDefaultAdvice) {
405   auto &Ctx = M.getContext();
406   std::unique_ptr<MLModelRunner> Runner;
407   if (TFModelUnderTrainingPath.empty())
408     Runner.reset(new NoInferenceModelRunner(Ctx, getInputFeatures()));
409   else
410     Runner = ModelUnderTrainingRunner::createAndEnsureValid(
411         Ctx, TFModelUnderTrainingPath, DecisionName, getInputFeatures(),
412         TFOutputSpecOverride);
413   if (!Runner)
414     return nullptr;
415   std::unique_ptr<TrainingLogger> Logger;
416   if (!TrainingLog.empty())
417     Logger = std::make_unique<TrainingLogger>(
418         TrainingLog, dyn_cast<ModelUnderTrainingRunner>(Runner.get()));
419 
420   return std::make_unique<DevelopmentModeMLInlineAdvisor>(
421       M, MAM, std::move(Runner), GetDefaultAdvice, std::move(Logger));
422 }
423 #endif // defined(LLVM_HAVE_TFLITE)
424