1 //===- DevelopmentModeInlineAdvisor.cpp - runtime-loadable model runner  --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a model runner using Tensorflow C APIs, allowing the
10 // loading of a model from a command line option.
11 //
12 //===----------------------------------------------------------------------===//
13 #include "llvm/Config/config.h"
14 #if defined(LLVM_HAVE_TF_API)
15 
16 #include "llvm/ADT/BitVector.h"
17 #include "llvm/Analysis/CallGraph.h"
18 #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
19 #include "llvm/Analysis/MLInlineAdvisor.h"
20 #include "llvm/Analysis/ModelUnderTrainingRunner.h"
21 #include "llvm/Analysis/NoInferenceModelRunner.h"
22 #include "llvm/Analysis/Utils/TFUtils.h"
23 #include "llvm/IR/LLVMContext.h"
24 #include "llvm/Support/CommandLine.h"
25 #include "llvm/Support/ManagedStatic.h"
26 
27 #include <vector>
28 
29 using namespace llvm;
30 
31 static cl::opt<std::string> TrainingLog(
32     "training-log", cl::Hidden,
33     cl::desc("Path where the development - mode inlining log is saved."));
34 
35 static cl::opt<std::string> TFModelUnderTrainingPath(
36     "ml-inliner-model-under-training", cl::Hidden,
37     cl::desc(R"(Path to SavedModel from the previous training iteration.
38 The directory is also expected to contain a JSON specification of the
39 outputs expected to be logged, where the first entry must be the
40 inlining decision. The file containing the specification should be
41 called output_spec.json. The expected JSON value is an array of
42 dictionaries. Each dictionary should have 2 keys:
43 
44 - "tensor_spec, followed by the TensorSpec description of the
45 output; and
46 - "logging_name", a string indicating the name to use when
47 logging the output values.
48 
49 Example:
50 [
51   {
52     "logging_name" : "some_name",
53     "tensor_spec" : {
54       "name" : "model_name",
55       "port" : 0,
56       "shape" : [2, 3],
57       "type" : "float"
58       }
59   }
60 ]
61 
62 The first value must always correspond to the decision.)"));
63 
64 static cl::opt<std::string> TFOutputSpecOverride(
65     "ml-inliner-output-spec-override", cl::Hidden,
66     cl::desc("Override the path to the output spec json file. See "
67              "-ml-inliner-model-under-training documentation for the "
68              "specification of that file."));
69 
70 static cl::opt<std::string> TFFeedPrefix("ml-inliner-trained-model-feed-prefix",
71                                          cl::Hidden, cl::init("action_"),
72                                          cl::desc("Prefix for feature names."));
73 
74 namespace {
75 /// An InlineEvent, used by TrainingLogger.
76 struct InlineEvent {
77   /// What the default policy's decision would have been.
78   int64_t DefaultDecision = 0;
79 
80   /// What we advised. When training off the default policy, this is the same as
81   /// DefaultDecision.
82   int64_t AdvisedDecision = 0;
83 
84   /// What actually happened. This would be 'false' in the case of an inline
85   /// error, even if AdvisedDecision were true, otherwise it agrees with
86   /// AdvisedDecision.
87   bool Effect = false;
88 
89   /// What the change in size was: size_after - size_before
90   int64_t Reward = 0;
91 };
92 
93 /// Collect data we may use for training a model, and write it as a textual
94 /// Tensorflow SequenceExample
95 /// (https://www.tensorflow.org/api_docs/python/tf/train/SequenceExample)
96 /// protobuf (https://developers.google.com/protocol-buffers).
97 /// Because this is a protobuf, we cannot just stream the events as they come.
98 /// Internally, TrainingLogger stores data in column-major format, because that
99 /// lines up with how TF SequenceExample represents it.
100 class TrainingLogger final {
101 public:
102   TrainingLogger(StringRef LogFileName, const ModelUnderTrainingRunner *MUTR);
103 
104   /// Log one inlining event.
105   void logInlineEvent(const InlineEvent &Event,
106                       const MLModelRunner &ModelRunner);
107 
108   /// Print the stored tensors.
109   void print();
110 
111 private:
112   StringRef LogFileName;
113   const ModelUnderTrainingRunner *const MUTR;
114   std::unique_ptr<Logger> L;
115   BitVector Effects;
116   /// There's at least one output. We'll set this to a different value if MUTR
117   /// is avaliable.
118   size_t OutputCount = 1;
119   /// Set these 2 clearly OOB, to make sure we set them later.
120   size_t DefaultDecisionPos = std::numeric_limits<size_t>::max();
121   size_t DecisionPos = std::numeric_limits<size_t>::max();
122 };
123 
124 /// An extension of the MLInlineAdvisor for the 'development' mode, targeting
125 /// the offline training scenario. Note that training happens outside of the
126 /// compiler, this facility is concerned with producing training data ("logs").
127 /// This InlineAdvisor can operate in the following modes:
128 ///
129 /// 1) collect logs for the default policy. This is useful for bootstrapping
130 /// training, which will be considerably faster by starting from a reasonable
131 /// policy.
132 ///
133 /// 2) collect logs for the ML policy, using a model from a previous
134 /// training. Potentially, that model uses internally some small random
135 /// perturbation of its weights, to induce exploration (setting this up is the
136 /// responsibility of the training algorithm). The logs would then be used to
137 /// retrain and improve on this model.
138 ///
139 /// 3) use the provided model, with no logging. This is useful for end to end
140 /// validation - the model, in this case, is a release candidate and shouldn't
141 /// have random perturbations. It is a convenience feature: rather than needing
142 /// to take the release candidate model and compile it in 'release' mode,
143 /// validate it, then potentially discard it, it's easier to just pass the model
144 /// to the compiler, albeit compilation would be slower, as a one-off. Once the
145 /// model behaves satisfactorily, it can be compiled AOT, for efficiency, in
146 /// release mode. The expectation is that a well-trained model provides a good
147 /// policy over a sufficiently diverse codebase, over many changes (i.e.
148 /// training happens seldom).
149 class DevelopmentModeMLInlineAdvisor : public MLInlineAdvisor {
150 public:
151   DevelopmentModeMLInlineAdvisor(
152       Module &M, ModuleAnalysisManager &MAM,
153       std::unique_ptr<MLModelRunner> ModelRunner,
154       std::function<bool(CallBase &)> GetDefaultAdvice,
155       std::unique_ptr<TrainingLogger> Logger);
156 
157   size_t getTotalSizeEstimate();
158 
159   virtual ~DevelopmentModeMLInlineAdvisor();
160   void updateNativeSizeEstimate(int64_t Change) {
161     *CurrentNativeSize += Change;
162   }
163   void resetNativeSize(Function *F) {
164     PreservedAnalyses PA = PreservedAnalyses::all();
165     PA.abandon<InlineSizeEstimatorAnalysis>();
166     FAM.invalidate(*F, PA);
167   }
168 
169   std::unique_ptr<MLInlineAdvice>
170   getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE) override;
171 
172   Optional<size_t> getNativeSizeEstimate(const Function &F) const;
173 
174 private:
175   bool isLogging() const { return !!Logger; }
176   std::unique_ptr<MLInlineAdvice> getMandatoryAdviceImpl(CallBase &CB) override;
177 
178   std::function<bool(CallBase &)> GetDefaultAdvice;
179   const bool IsDoingInference;
180   std::unique_ptr<TrainingLogger> Logger;
181 
182   const Optional<int32_t> InitialNativeSize;
183   Optional<int32_t> CurrentNativeSize;
184 };
185 
186 /// A variant of MLInlineAdvice that tracks all non-trivial inlining
187 /// decisions, for training/logging.
188 class LoggingMLInlineAdvice : public MLInlineAdvice {
189 public:
190   LoggingMLInlineAdvice(DevelopmentModeMLInlineAdvisor *Advisor, CallBase &CB,
191                         OptimizationRemarkEmitter &ORE, bool Recommendation,
192                         TrainingLogger &Logger,
193                         Optional<size_t> CallerSizeEstimateBefore,
194                         Optional<size_t> CalleeSizeEstimateBefore,
195                         bool DefaultDecision, bool Mandatory = false)
196       : MLInlineAdvice(Advisor, CB, ORE, Recommendation), Logger(Logger),
197         CallerSizeEstimateBefore(CallerSizeEstimateBefore),
198         CalleeSizeEstimateBefore(CalleeSizeEstimateBefore),
199         DefaultDecision(DefaultDecision), Mandatory(Mandatory) {}
200 
201   virtual ~LoggingMLInlineAdvice() = default;
202 
203 private:
204   DevelopmentModeMLInlineAdvisor *getAdvisor() const {
205     return static_cast<DevelopmentModeMLInlineAdvisor *>(Advisor);
206   }
207   void recordInliningImpl() override {
208     MLInlineAdvice::recordInliningImpl();
209     getAdvisor()->resetNativeSize(Caller);
210     int Reward = std::numeric_limits<int>::max();
211     if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() &&
212         !getAdvisor()->isForcedToStop()) {
213       int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller) +
214                             *CalleeSizeEstimateBefore;
215       Reward = NativeSizeAfter -
216                (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore);
217       getAdvisor()->updateNativeSizeEstimate(Reward);
218     }
219     log(Reward, /*Success=*/true);
220   }
221 
222   void recordInliningWithCalleeDeletedImpl() override {
223     MLInlineAdvice::recordInliningWithCalleeDeletedImpl();
224     getAdvisor()->resetNativeSize(Caller);
225     if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() &&
226         !getAdvisor()->isForcedToStop()) {
227       int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller);
228       int Reward = NativeSizeAfter -
229                    (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore);
230       getAdvisor()->updateNativeSizeEstimate(Reward);
231       log(Reward, /*Success=*/true);
232     } else {
233       log(NoReward, /*Success=*/true);
234     }
235   }
236 
237   void recordUnsuccessfulInliningImpl(const InlineResult &Result) override {
238     MLInlineAdvice::recordUnsuccessfulInliningImpl(Result);
239     log(NoReward, /*Success=*/false);
240   }
241 
242   void recordUnattemptedInliningImpl() override {
243     MLInlineAdvice::recordUnattemptedInliningImpl();
244     log(NoReward, /*Success=*/false);
245   }
246 
247   void log(int64_t Reward, bool Success) {
248     if (Mandatory)
249       return;
250     InlineEvent Event;
251     Event.AdvisedDecision = isInliningRecommended();
252     Event.DefaultDecision = DefaultDecision;
253     Event.Effect = Success;
254     Event.Reward = Reward;
255     Logger.logInlineEvent(Event, getAdvisor()->getModelRunner());
256   }
257 
258   static const int64_t NoReward = 0;
259   TrainingLogger &Logger;
260   const Optional<size_t> CallerSizeEstimateBefore;
261   const Optional<size_t> CalleeSizeEstimateBefore;
262   const int64_t DefaultDecision;
263   const int64_t Mandatory;
264 };
265 
266 static const std::vector<TensorSpec> TrainingOnlyFeatures{
267     TensorSpec::createSpec<int64_t>(TFFeedPrefix + "inlining_default", {1}),
268     TensorSpec::createSpec<float>(TFFeedPrefix + "discount", {1}),
269     TensorSpec::createSpec<float>(TFFeedPrefix + "reward", {1}),
270     TensorSpec::createSpec<int32_t>(TFFeedPrefix + "step_type", {1})};
271 
272 static const std::vector<TensorSpec> getInputFeatures() {
273   std::vector<TensorSpec> InputSpecs;
274   for (size_t I = 0; I < NumberOfFeatures; ++I)
275     InputSpecs.push_back(TensorSpec::createSpec<int64_t>(
276         TFFeedPrefix + FeatureMap[I].name(), FeatureMap[I].shape()));
277   append_range(InputSpecs, TrainingOnlyFeatures);
278   return InputSpecs;
279 }
280 
281 } // namespace
282 
283 TrainingLogger::TrainingLogger(StringRef LogFileName,
284                                const ModelUnderTrainingRunner *MUTR)
285     : LogFileName(LogFileName), MUTR(MUTR) {
286   // The first output is the inlining decision.
287   if (MUTR)
288     OutputCount = MUTR->outputLoggedFeatureSpecs().size();
289   std::vector<LoggedFeatureSpec> FT;
290 
291   for (size_t I = 0; I < NumberOfFeatures; ++I)
292     FT.push_back({FeatureMap.at(I), None});
293   if (MUTR && MUTR->outputLoggedFeatureSpecs().size() > 1)
294     append_range(FT, drop_begin(MUTR->outputLoggedFeatureSpecs()));
295 
296   DefaultDecisionPos = FT.size();
297   FT.push_back(
298       {TensorSpec::createSpec<int64_t>(DefaultDecisionName, {1}), None});
299 
300   DecisionPos = FT.size();
301   FT.push_back({TensorSpec::createSpec<int64_t>(DecisionName, {1}), None});
302 
303   L = std::make_unique<Logger>(
304       FT, TensorSpec::createSpec<int64_t>(RewardName, {1}),
305       InlineSizeEstimatorAnalysis::isEvaluatorRequested());
306 }
307 
308 /// Log one inlining event.
309 void TrainingLogger::logInlineEvent(const InlineEvent &Event,
310                                     const MLModelRunner &ModelRunner) {
311   size_t CurrentFeature = 0;
312   for (; CurrentFeature < NumberOfFeatures; ++CurrentFeature) {
313     int64_t F = *ModelRunner.getTensor<int64_t>(CurrentFeature);
314     L->logInt64Value(CurrentFeature, &F);
315   }
316 
317   for (size_t I = 1; I < OutputCount; ++I) {
318     const auto &Result = *MUTR->lastEvaluationResult();
319     const char *RawData =
320         reinterpret_cast<const char *>(Result.getUntypedTensorValue(I));
321     L->logSpecifiedTensorValue(CurrentFeature, RawData);
322     ++CurrentFeature;
323   }
324 
325   assert(CurrentFeature == DefaultDecisionPos);
326   L->logInt64Value(DefaultDecisionPos, &Event.DefaultDecision);
327   L->logInt64Value(DecisionPos, &Event.AdvisedDecision);
328   if (InlineSizeEstimatorAnalysis::isEvaluatorRequested())
329     L->logInt64Reward(Event.Reward);
330 
331   // For debugging / later use
332   Effects.push_back(Event.Effect);
333 }
334 
335 void TrainingLogger::print() {
336   std::error_code EC;
337   raw_fd_ostream OutFile(LogFileName, EC);
338   L->flush(OutFile);
339 }
340 
341 DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor(
342     Module &M, ModuleAnalysisManager &MAM,
343     std::unique_ptr<MLModelRunner> ModelRunner,
344     std::function<bool(CallBase &)> GetDefaultAdvice,
345     std::unique_ptr<TrainingLogger> Logger)
346     : MLInlineAdvisor(M, MAM, std::move(ModelRunner)),
347       GetDefaultAdvice(GetDefaultAdvice),
348       IsDoingInference(isa<ModelUnderTrainingRunner>(getModelRunner())),
349       Logger(std::move(Logger)),
350       InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0),
351       CurrentNativeSize(InitialNativeSize) {
352   // We cannot have the case of neither inference nor logging.
353   assert(IsDoingInference || isLogging());
354 }
355 
356 DevelopmentModeMLInlineAdvisor::~DevelopmentModeMLInlineAdvisor() {
357   if (isLogging())
358     Logger->print();
359 }
360 
361 Optional<size_t>
362 DevelopmentModeMLInlineAdvisor::getNativeSizeEstimate(const Function &F) const {
363   if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested())
364     return None;
365   auto &R =
366       FAM.getResult<InlineSizeEstimatorAnalysis>(const_cast<Function &>(F));
367   if (!R) {
368     F.getParent()->getContext().emitError(
369         "Native size estimator is not present.");
370     return 0;
371   }
372   return *R;
373 }
374 
375 std::unique_ptr<MLInlineAdvice>
376 DevelopmentModeMLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) {
377   return std::make_unique<LoggingMLInlineAdvice>(
378       /*Advisor=*/this,
379       /*CB=*/CB, /*ORE=*/getCallerORE(CB), /*Recommendation=*/true,
380       /*Logger=*/*Logger,
381       /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()),
382       /*CalleeSizeEstimateBefore=*/
383       getNativeSizeEstimate(*CB.getCalledFunction()),
384       /*DefaultDecision=*/true, /*Mandatory*/ true);
385 }
386 
387 std::unique_ptr<MLInlineAdvice>
388 DevelopmentModeMLInlineAdvisor::getAdviceFromModel(
389     CallBase &CB, OptimizationRemarkEmitter &ORE) {
390   if (IsDoingInference && !isLogging())
391     return MLInlineAdvisor::getAdviceFromModel(CB, ORE);
392 
393   bool DefaultAdvice = GetDefaultAdvice(CB);
394   auto Recommendation =
395       IsDoingInference ? static_cast<bool>(ModelRunner->evaluate<int64_t>())
396                        : DefaultAdvice;
397   return std::make_unique<LoggingMLInlineAdvice>(
398       /*Advisor=*/this,
399       /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/Recommendation,
400       /*Logger=*/*Logger,
401       /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()),
402       /*CalleeSizeEstimateBefore=*/
403       getNativeSizeEstimate(*CB.getCalledFunction()),
404       /*DefaultDecision=*/DefaultAdvice);
405 }
406 
407 size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() {
408   if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested())
409     return 0;
410   size_t Ret = 0;
411   for (auto &F : M) {
412     if (F.isDeclaration())
413       continue;
414     Ret += *getNativeSizeEstimate(F);
415   }
416   return Ret;
417 }
418 
419 std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor(
420     Module &M, ModuleAnalysisManager &MAM,
421     std::function<bool(CallBase &)> GetDefaultAdvice) {
422   auto &Ctx = M.getContext();
423   std::unique_ptr<MLModelRunner> Runner;
424   if (TFModelUnderTrainingPath.empty())
425     Runner.reset(new NoInferenceModelRunner(Ctx, getInputFeatures()));
426   else
427     Runner = ModelUnderTrainingRunner::createAndEnsureValid(
428         Ctx, TFModelUnderTrainingPath, DecisionName, getInputFeatures(),
429         TFOutputSpecOverride);
430   if (!Runner)
431     return nullptr;
432   std::unique_ptr<TrainingLogger> Logger;
433   if (!TrainingLog.empty())
434     Logger = std::make_unique<TrainingLogger>(
435         TrainingLog, dyn_cast<ModelUnderTrainingRunner>(Runner.get()));
436 
437   return std::make_unique<DevelopmentModeMLInlineAdvisor>(
438       M, MAM, std::move(Runner), GetDefaultAdvice, std::move(Logger));
439 }
440 #endif // defined(LLVM_HAVE_TF_API)
441