1 //===- DevelopmentModeInlineAdvisor.cpp - runtime-loadable model runner  --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a model runner using Tensorflow C APIs, allowing the
10 // loading of a model from a command line option.
11 //
12 //===----------------------------------------------------------------------===//
13 #include "llvm/Config/config.h"
14 #if defined(LLVM_HAVE_TF_API)
15 
16 #include "llvm/Analysis/CallGraph.h"
17 #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
18 #include "llvm/Analysis/MLInlineAdvisor.h"
19 #include "llvm/Analysis/Utils/TFUtils.h"
20 #include "llvm/IR/LLVMContext.h"
21 #include "llvm/Support/CommandLine.h"
22 #include "llvm/Support/ManagedStatic.h"
23 
24 #include <vector>
25 
26 using namespace llvm;
27 
28 static cl::opt<std::string> TrainingLog(
29     "training-log", cl::Hidden,
30     cl::desc("Path where the development - mode inlining log is saved."));
31 
32 static cl::opt<std::string> TFModelUnderTrainingPath(
33     "ml-inliner-model-under-training", cl::Hidden,
34     cl::desc(R"(Path to SavedModel from the previous training iteration.
35 The directory is also expected to contain a JSON specification of the
36 outputs expected to be logged, where the first entry must be the
37 inlining decision. The file containing the specification should be
38 called output_spec.json. The expected JSON value is an array of
39 dictionaries. Each dictionary should have 2 keys:
40 
41 - "tensor_spec, followed by the TensorSpec description of the
42 output; and
43 - "logging_name", a string indicating the name to use when
44 logging the output values.
45 
46 Example:
47 [
48   {
49     "logging_name" : "some_name",
50     "tensor_spec" : {
51       "name" : "model_name",
52       "port" : 0,
53       "shape" : [2, 3],
54       "type" : "float"
55       }
56   }
57 ]
58 
59 The first value must always correspond to the decision.)"));
60 
61 static cl::opt<std::string> TFOutputSpecOverride(
62     "ml-inliner-output-spec-override", cl::Hidden,
63     cl::desc("Override the path to the output spec json file. See "
64              "-ml-inliner-model-under-training documentation for the "
65              "specification of that file."));
66 
67 static cl::opt<std::string> TFFeedPrefix("ml-inliner-trained-model-feed-prefix",
68                                          cl::Hidden, cl::init("action_"),
69                                          cl::desc("Prefix for feature names."));
70 
71 namespace {
72 /// An InlineEvent, used by TrainingLogger.
73 struct InlineEvent {
74   /// What the default policy's decision would have been.
75   int64_t DefaultDecision = 0;
76 
77   /// What we advised. When training off the default policy, this is the same as
78   /// DefaultDecision.
79   int64_t AdvisedDecision = 0;
80 
81   /// What actually happened. This would be 'false' in the case of an inline
82   /// error, even if AdvisedDecision were true, otherwise it agrees with
83   /// AdvisedDecision.
84   bool Effect = false;
85 
86   /// What the change in size was: size_after - size_before
87   int64_t Reward = 0;
88 };
89 
90 /// Collect data we may use for training a model, and write it as a textual
91 /// Tensorflow SequenceExample
92 /// (https://www.tensorflow.org/api_docs/python/tf/train/SequenceExample)
93 /// protobuf (https://developers.google.com/protocol-buffers).
94 /// Because this is a protobuf, we cannot just stream the events as they come.
95 /// Internally, TrainingLogger stores data in column-major format, because that
96 /// lines up with how TF SequenceExample represents it.
97 class ModelUnderTrainingRunner;
98 class TrainingLogger final {
99 public:
100   TrainingLogger(StringRef LogFileName, const ModelUnderTrainingRunner *MUTR);
101 
102   /// Log one inlining event.
103   void logInlineEvent(const InlineEvent &Event,
104                       const MLModelRunner &ModelRunner);
105 
106   /// Print the stored tensors.
107   void print();
108 
109 private:
110   StringRef LogFileName;
111   const ModelUnderTrainingRunner *const MUTR;
112   std::unique_ptr<Logger> L;
113   std::vector<bool> Effects;
114   /// There's at least one output. We'll set this to a different value if MUTR
115   /// is avaliable.
116   size_t OutputCount = 1;
117   /// Set these 2 clearly OOB, to make sure we set them later.
118   size_t DefaultDecisionPos = std::numeric_limits<size_t>::max();
119   size_t DecisionPos = std::numeric_limits<size_t>::max();
120 };
121 
122 /// An extension of the MLInlineAdvisor for the 'development' mode, targeting
123 /// the offline training scenario. Note that training happens outside of the
124 /// compiler, this facility is concerned with producing training data ("logs").
125 /// This InlineAdvisor can operate in the following modes:
126 ///
127 /// 1) collect logs for the default policy. This is useful for bootstrapping
128 /// training, which will be considerably faster by starting from a reasonable
129 /// policy.
130 ///
131 /// 2) collect logs for the ML policy, using a model from a previous
132 /// training. Potentially, that model uses internally some small random
133 /// perturbation of its weights, to induce exploration (setting this up is the
134 /// responsibility of the training algorithm). The logs would then be used to
135 /// retrain and improve on this model.
136 ///
137 /// 3) use the provided model, with no logging. This is useful for end to end
138 /// validation - the model, in this case, is a release candidate and shouldn't
139 /// have random perturbations. It is a convenience feature: rather than needing
140 /// to take the release candidate model and compile it in 'release' mode,
141 /// validate it, then potentially discard it, it's easier to just pass the model
142 /// to the compiler, albeit compilation would be slower, as a one-off. Once the
143 /// model behaves satisfactorily, it can be compiled AOT, for efficiency, in
144 /// release mode. The expectation is that a well-trained model provides a good
145 /// policy over a sufficiently diverse codebase, over many changes (i.e.
146 /// training happens seldom).
147 class DevelopmentModeMLInlineAdvisor : public MLInlineAdvisor {
148 public:
149   DevelopmentModeMLInlineAdvisor(
150       Module &M, ModuleAnalysisManager &MAM,
151       std::unique_ptr<MLModelRunner> ModelRunner,
152       std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference,
153       std::unique_ptr<TrainingLogger> Logger);
154 
155   size_t getTotalSizeEstimate();
156 
157   virtual ~DevelopmentModeMLInlineAdvisor();
updateNativeSizeEstimate(int64_t Change)158   void updateNativeSizeEstimate(int64_t Change) {
159     *CurrentNativeSize += Change;
160   }
161   void resetNativeSize(Function *F) {
162     PreservedAnalyses PA = PreservedAnalyses::all();
163     PA.abandon<InlineSizeEstimatorAnalysis>();
164     FAM.invalidate(*F, PA);
165   }
166 
167   std::unique_ptr<MLInlineAdvice>
168   getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE) override;
169 
170   Optional<size_t> getNativeSizeEstimate(const Function &F) const;
171 
172 private:
173   bool isLogging() const { return !!Logger; }
174   std::unique_ptr<MLInlineAdvice> getMandatoryAdviceImpl(CallBase &CB) override;
175 
176   std::function<bool(CallBase &)> GetDefaultAdvice;
177   const bool IsDoingInference;
178   std::unique_ptr<TrainingLogger> Logger;
179 
180   const Optional<int32_t> InitialNativeSize;
181   Optional<int32_t> CurrentNativeSize;
182 };
183 
184 /// A variant of MLInlineAdvice that tracks all non-trivial inlining
185 /// decisions, for training/logging.
186 class LoggingMLInlineAdvice : public MLInlineAdvice {
187 public:
188   LoggingMLInlineAdvice(DevelopmentModeMLInlineAdvisor *Advisor, CallBase &CB,
189                         OptimizationRemarkEmitter &ORE, bool Recommendation,
190                         TrainingLogger &Logger,
191                         Optional<size_t> CallerSizeEstimateBefore,
192                         Optional<size_t> CalleeSizeEstimateBefore,
193                         bool DefaultDecision, bool Mandatory = false)
194       : MLInlineAdvice(Advisor, CB, ORE, Recommendation), Logger(Logger),
195         CallerSizeEstimateBefore(CallerSizeEstimateBefore),
196         CalleeSizeEstimateBefore(CalleeSizeEstimateBefore),
197         DefaultDecision(DefaultDecision), Mandatory(Mandatory) {}
198 
199   virtual ~LoggingMLInlineAdvice() = default;
200 
201 private:
202   DevelopmentModeMLInlineAdvisor *getAdvisor() const {
203     return static_cast<DevelopmentModeMLInlineAdvisor *>(Advisor);
204   }
205   void recordInliningImpl() override {
206     MLInlineAdvice::recordInliningImpl();
207     getAdvisor()->resetNativeSize(Caller);
208     int Reward = std::numeric_limits<int>::max();
209     if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() &&
210         !getAdvisor()->isForcedToStop()) {
211       int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller) +
212                             *CalleeSizeEstimateBefore;
213       Reward = NativeSizeAfter -
214                (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore);
215       getAdvisor()->updateNativeSizeEstimate(Reward);
216     }
217     log(Reward, /*Success=*/true);
218   }
219 
220   void recordInliningWithCalleeDeletedImpl() override {
221     MLInlineAdvice::recordInliningWithCalleeDeletedImpl();
222     getAdvisor()->resetNativeSize(Caller);
223     if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() &&
224         !getAdvisor()->isForcedToStop()) {
225       int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller);
226       int Reward = NativeSizeAfter -
227                    (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore);
228       getAdvisor()->updateNativeSizeEstimate(Reward);
229       log(Reward, /*Success=*/true);
230     } else {
231       log(NoReward, /*Success=*/true);
232     }
233   }
234 
235   void recordUnsuccessfulInliningImpl(const InlineResult &Result) override {
236     MLInlineAdvice::recordUnsuccessfulInliningImpl(Result);
237     log(NoReward, /*Success=*/false);
238   }
239 
240   void recordUnattemptedInliningImpl() override {
241     MLInlineAdvice::recordUnattemptedInliningImpl();
242     log(NoReward, /*Success=*/false);
243   }
244 
245   void log(int64_t Reward, bool Success) {
246     if (Mandatory)
247       return;
248     InlineEvent Event;
249     Event.AdvisedDecision = isInliningRecommended();
250     Event.DefaultDecision = DefaultDecision;
251     Event.Effect = Success;
252     Event.Reward = Reward;
253     Logger.logInlineEvent(Event, getAdvisor()->getModelRunner());
254   }
255 
256   static const int64_t NoReward = 0;
257   TrainingLogger &Logger;
258   const Optional<size_t> CallerSizeEstimateBefore;
259   const Optional<size_t> CalleeSizeEstimateBefore;
260   const int64_t DefaultDecision;
261   const int64_t Mandatory;
262 };
263 
264 /// A pseudo model runner. We use it to store feature values when collecting
265 /// logs for the default policy, but never ask it to 'run'.
266 class NoInferenceModelRunner : public MLModelRunner {
267 public:
268   NoInferenceModelRunner(LLVMContext &Ctx)
269       : MLModelRunner(Ctx), Features(NumberOfFeatures) {}
270   void setFeature(FeatureIndex Index, int64_t Value) override {
271     Features[static_cast<int>(Index)] = Value;
272   }
273 
274   int64_t getFeature(int Index) const override { return Features[Index]; }
275   bool run() override {
276     llvm_unreachable("We shouldn't call run on this model runner.");
277   }
278 
279 private:
280   InlineFeatures Features;
281 };
282 
283 /// ModelUnderTrainingRunner - training mode implementation. It uses TF C APIs
284 /// to dynamically load and evaluate a TF SavedModel
285 /// (https://www.tensorflow.org/guide/saved_model). Runtime performance is
286 /// sacrificed for ease of use while training.
287 class ModelUnderTrainingRunner final : public MLModelRunner {
288 public:
289   ModelUnderTrainingRunner(LLVMContext &Ctx, const std::string &ModelPath);
290 
291   bool run() override;
292 
293   // Disallows copy and assign.
294   ModelUnderTrainingRunner(const ModelUnderTrainingRunner &) = delete;
295   ModelUnderTrainingRunner &
296   operator=(const ModelUnderTrainingRunner &) = delete;
297 
298   void setFeature(FeatureIndex Index, int64_t Value) override;
299   int64_t getFeature(int Index) const override;
300   bool isValid() const { return !!Evaluator; }
301 
302   const std::vector<LoggedFeatureSpec> &outputLoggedFeatureSpecs() const {
303     return OutputSpecs;
304   }
305 
306   const Optional<TFModelEvaluator::EvaluationResult> &
307   lastEvaluationResult() const {
308     return LastEvaluationResult;
309   }
310 
311 private:
312   std::unique_ptr<TFModelEvaluator> Evaluator;
313   std::vector<LoggedFeatureSpec> OutputSpecs;
314   Optional<TFModelEvaluator::EvaluationResult> LastEvaluationResult;
315 
316   // The training framework needs some additional features.
317   const std::vector<TensorSpec> TrainingOnlyFeatures{
318       TensorSpec::createSpec<int64_t>(TFFeedPrefix + "inlining_default", {1}),
319       TensorSpec::createSpec<float>(TFFeedPrefix + "discount", {1}),
320       TensorSpec::createSpec<float>(TFFeedPrefix + "reward", {1}),
321       TensorSpec::createSpec<int32_t>(TFFeedPrefix + "step_type", {1})};
322 };
323 } // namespace
324 
325 TrainingLogger::TrainingLogger(StringRef LogFileName,
326                                const ModelUnderTrainingRunner *MUTR)
327     : LogFileName(LogFileName), MUTR(MUTR) {
328   // The first output is the inlining decision.
329   if (MUTR)
330     OutputCount = MUTR->outputLoggedFeatureSpecs().size();
331   std::vector<LoggedFeatureSpec> FT;
332 
333   for (size_t I = 0; I < NumberOfFeatures; ++I)
334     FT.push_back(
335         {TensorSpec::createSpec<int64_t>(FeatureNameMap.at(I), {1}), None});
336   if (MUTR && MUTR->outputLoggedFeatureSpecs().size() > 1)
337     append_range(FT, drop_begin(MUTR->outputLoggedFeatureSpecs()));
338 
339   DefaultDecisionPos = FT.size();
340   FT.push_back(
341       {TensorSpec::createSpec<int64_t>(DefaultDecisionName, {1}), None});
342 
343   DecisionPos = FT.size();
344   FT.push_back({TensorSpec::createSpec<int64_t>(DecisionName, {1}), None});
345 
346   L = std::make_unique<Logger>(
347       FT, TensorSpec::createSpec<int64_t>(RewardName, {1}),
348       InlineSizeEstimatorAnalysis::isEvaluatorRequested());
349 }
350 
351 /// Log one inlining event.
352 void TrainingLogger::logInlineEvent(const InlineEvent &Event,
353                                     const MLModelRunner &ModelRunner) {
354   size_t CurrentFeature = 0;
355   for (; CurrentFeature < NumberOfFeatures; ++CurrentFeature) {
356     int64_t F = ModelRunner.getFeature(CurrentFeature);
357     L->logInt64Value(CurrentFeature, &F);
358   }
359 
360   for (size_t I = 1; I < OutputCount; ++I) {
361     const auto &Result = *MUTR->lastEvaluationResult();
362     const char *RawData =
363         reinterpret_cast<const char *>(Result.getUntypedTensorValue(I));
364     L->logSpecifiedTensorValue(CurrentFeature, RawData);
365     ++CurrentFeature;
366   }
367 
368   assert(CurrentFeature == DefaultDecisionPos);
369   L->logInt64Value(DefaultDecisionPos, &Event.DefaultDecision);
370   L->logInt64Value(DecisionPos, &Event.AdvisedDecision);
371   if (InlineSizeEstimatorAnalysis::isEvaluatorRequested())
372     L->logInt64Reward(Event.Reward);
373 
374   // For debugging / later use
375   Effects.push_back(Event.Effect);
376 }
377 
378 void TrainingLogger::print() {
379   std::error_code EC;
380   raw_fd_ostream OutFile(LogFileName, EC);
381   L->flush(OutFile);
382 }
383 
384 DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor(
385     Module &M, ModuleAnalysisManager &MAM,
386     std::unique_ptr<MLModelRunner> ModelRunner,
387     std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference,
388     std::unique_ptr<TrainingLogger> Logger)
389     : MLInlineAdvisor(M, MAM, std::move(ModelRunner)),
390       GetDefaultAdvice(GetDefaultAdvice), IsDoingInference(IsDoingInference),
391       Logger(std::move(Logger)),
392       InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0),
393       CurrentNativeSize(InitialNativeSize) {
394   // We cannot have the case of neither inference nor logging.
395   assert(IsDoingInference || isLogging());
396 }
397 
398 DevelopmentModeMLInlineAdvisor::~DevelopmentModeMLInlineAdvisor() {
399   if (isLogging())
400     Logger->print();
401 }
402 
403 Optional<size_t>
404 DevelopmentModeMLInlineAdvisor::getNativeSizeEstimate(const Function &F) const {
405   if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested())
406     return None;
407   auto &R =
408       FAM.getResult<InlineSizeEstimatorAnalysis>(const_cast<Function &>(F));
409   if (!R) {
410     F.getParent()->getContext().emitError(
411         "Native size estimator is not present.");
412     return 0;
413   }
414   return *R;
415 }
416 
417 std::unique_ptr<MLInlineAdvice>
418 DevelopmentModeMLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) {
419   return std::make_unique<LoggingMLInlineAdvice>(
420       /*Advisor=*/this,
421       /*CB=*/CB, /*ORE=*/getCallerORE(CB), /*Recommendation=*/true,
422       /*Logger=*/*Logger,
423       /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()),
424       /*CalleeSizeEstimateBefore=*/
425       getNativeSizeEstimate(*CB.getCalledFunction()),
426       /*DefaultDecision=*/true, /*Mandatory*/ true);
427 }
428 
429 std::unique_ptr<MLInlineAdvice>
430 DevelopmentModeMLInlineAdvisor::getAdviceFromModel(
431     CallBase &CB, OptimizationRemarkEmitter &ORE) {
432   if (IsDoingInference && !isLogging())
433     return MLInlineAdvisor::getAdviceFromModel(CB, ORE);
434 
435   bool DefaultAdvice = GetDefaultAdvice(CB);
436   auto Recommendation = IsDoingInference ? ModelRunner->run() : DefaultAdvice;
437   return std::make_unique<LoggingMLInlineAdvice>(
438       /*Advisor=*/this,
439       /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/Recommendation,
440       /*Logger=*/*Logger,
441       /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()),
442       /*CalleeSizeEstimateBefore=*/
443       getNativeSizeEstimate(*CB.getCalledFunction()),
444       /*DefaultDecision=*/DefaultAdvice);
445 }
446 
447 size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() {
448   if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested())
449     return 0;
450   size_t Ret = 0;
451   for (auto &F : M) {
452     if (F.isDeclaration())
453       continue;
454     if (isFunctionDeleted(&F))
455       continue;
456     Ret += *getNativeSizeEstimate(F);
457   }
458   return Ret;
459 }
460 
461 ModelUnderTrainingRunner::ModelUnderTrainingRunner(LLVMContext &Ctx,
462                                                    const std::string &ModelPath)
463     : MLModelRunner(Ctx) {
464   std::vector<TensorSpec> InputSpecs;
465   for (size_t I = 0; I < NumberOfFeatures; ++I)
466     InputSpecs.push_back(
467         TensorSpec::createSpec<int64_t>(TFFeedPrefix + FeatureNameMap[I], {1}));
468   append_range(InputSpecs, TrainingOnlyFeatures);
469   if (auto MaybeOutSpecs =
470           loadOutputSpecs(Ctx, DecisionName, ModelPath, TFOutputSpecOverride))
471     OutputSpecs = std::move(*MaybeOutSpecs);
472   else
473     return;
474 
475   Evaluator = std::make_unique<TFModelEvaluator>(
476       ModelPath, InputSpecs, [&](size_t I) { return OutputSpecs[I].Spec; },
477       OutputSpecs.size());
478   if (!Evaluator || !Evaluator->isValid()) {
479     Ctx.emitError("Failed to create inliner saved model evaluator");
480     Evaluator.reset();
481     return;
482   }
483 }
484 
485 bool ModelUnderTrainingRunner::run() {
486   LastEvaluationResult = Evaluator->evaluate();
487   if (!LastEvaluationResult.hasValue()) {
488     Ctx.emitError("Error evaluating model.");
489     return false;
490   }
491   int64_t Decision = *LastEvaluationResult->getTensorValue<int64_t>(0);
492   return static_cast<bool>(Decision);
493 }
494 
495 int64_t ModelUnderTrainingRunner::getFeature(int Index) const {
496   return *Evaluator->getInput<int64_t>(Index);
497 }
498 
499 void ModelUnderTrainingRunner::setFeature(FeatureIndex Index, int64_t Value) {
500   size_t NumericIndex = static_cast<size_t>(Index);
501   *(Evaluator->getInput<int64_t>(NumericIndex)) = Value;
502 }
503 
504 std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor(
505     Module &M, ModuleAnalysisManager &MAM,
506     std::function<bool(CallBase &)> GetDefaultAdvice) {
507   auto &Ctx = M.getContext();
508   std::unique_ptr<MLModelRunner> Runner;
509   ModelUnderTrainingRunner *MUTRPtr = nullptr;
510   bool IsDoingInference = false;
511   if (TFModelUnderTrainingPath.empty())
512     Runner.reset(new NoInferenceModelRunner(Ctx));
513   else {
514     auto MUTR = std::make_unique<ModelUnderTrainingRunner>(
515         Ctx, TFModelUnderTrainingPath);
516     if (!MUTR || !MUTR->isValid()) {
517       Ctx.emitError("Could not load the policy model from the provided path");
518       return nullptr;
519     }
520     IsDoingInference = true;
521     MUTRPtr = MUTR.get();
522     Runner = std::move(MUTR);
523   }
524   std::unique_ptr<TrainingLogger> Logger;
525   if (!TrainingLog.empty())
526     Logger = std::make_unique<TrainingLogger>(TrainingLog, MUTRPtr);
527 
528   return std::make_unique<DevelopmentModeMLInlineAdvisor>(
529       M, MAM, std::move(Runner), GetDefaultAdvice, IsDoingInference,
530       std::move(Logger));
531 }
532 #endif // defined(LLVM_HAVE_TF_API)
533