1 //===- DevelopmentModeInlineAdvisor.cpp - runtime-loadable model runner  --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a model runner using Tensorflow C APIs, allowing the
10 // loading of a model from a command line option.
11 //
12 //===----------------------------------------------------------------------===//
13 #include "llvm/Config/config.h"
14 #include "llvm/Support/Casting.h"
15 #if defined(LLVM_HAVE_TF_API)
16 
17 #include "llvm/ADT/BitVector.h"
18 #include "llvm/Analysis/CallGraph.h"
19 #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
20 #include "llvm/Analysis/MLInlineAdvisor.h"
21 #include "llvm/Analysis/ModelUnderTrainingRunner.h"
22 #include "llvm/Analysis/NoInferenceModelRunner.h"
23 #include "llvm/Analysis/Utils/TFUtils.h"
24 #include "llvm/IR/LLVMContext.h"
25 #include "llvm/Support/CommandLine.h"
26 #include "llvm/Support/ManagedStatic.h"
27 
28 #include <vector>
29 
30 using namespace llvm;
31 
32 static cl::opt<std::string> TrainingLog(
33     "training-log", cl::Hidden,
34     cl::desc("Path where the development - mode inlining log is saved."));
35 
36 static cl::opt<std::string> TFModelUnderTrainingPath(
37     "ml-inliner-model-under-training", cl::Hidden,
38     cl::desc(R"(Path to SavedModel from the previous training iteration.
39 The directory is also expected to contain a JSON specification of the
40 outputs expected to be logged, where the first entry must be the
41 inlining decision. The file containing the specification should be
42 called output_spec.json. The expected JSON value is an array of
43 dictionaries. Each dictionary should have 2 keys:
44 
45 - "tensor_spec, followed by the TensorSpec description of the
46 output; and
47 - "logging_name", a string indicating the name to use when
48 logging the output values.
49 
50 Example:
51 [
52   {
53     "logging_name" : "some_name",
54     "tensor_spec" : {
55       "name" : "model_name",
56       "port" : 0,
57       "shape" : [2, 3],
58       "type" : "float"
59       }
60   }
61 ]
62 
63 The first value must always correspond to the decision.)"));
64 
65 static cl::opt<std::string> TFOutputSpecOverride(
66     "ml-inliner-output-spec-override", cl::Hidden,
67     cl::desc("Override the path to the output spec json file. See "
68              "-ml-inliner-model-under-training documentation for the "
69              "specification of that file."));
70 
71 static cl::opt<std::string> TFFeedPrefix("ml-inliner-trained-model-feed-prefix",
72                                          cl::Hidden, cl::init("action_"),
73                                          cl::desc("Prefix for feature names."));
74 
75 namespace {
76 /// An InlineEvent, used by TrainingLogger.
77 struct InlineEvent {
78   /// What the default policy's decision would have been.
79   int64_t DefaultDecision = 0;
80 
81   /// What we advised. When training off the default policy, this is the same as
82   /// DefaultDecision.
83   int64_t AdvisedDecision = 0;
84 
85   /// What actually happened. This would be 'false' in the case of an inline
86   /// error, even if AdvisedDecision were true, otherwise it agrees with
87   /// AdvisedDecision.
88   bool Effect = false;
89 
90   /// What the change in size was: size_after - size_before
91   int64_t Reward = 0;
92 };
93 
94 /// Collect data we may use for training a model, and write it as a textual
95 /// Tensorflow SequenceExample
96 /// (https://www.tensorflow.org/api_docs/python/tf/train/SequenceExample)
97 /// protobuf (https://developers.google.com/protocol-buffers).
98 /// Because this is a protobuf, we cannot just stream the events as they come.
99 /// Internally, TrainingLogger stores data in column-major format, because that
100 /// lines up with how TF SequenceExample represents it.
101 class TrainingLogger final {
102 public:
103   TrainingLogger(StringRef LogFileName, const ModelUnderTrainingRunner *MUTR);
104 
105   /// Log one inlining event.
106   void logInlineEvent(const InlineEvent &Event,
107                       const MLModelRunner &ModelRunner);
108 
109   /// Print the stored tensors.
110   void print();
111 
112 private:
113   StringRef LogFileName;
114   const ModelUnderTrainingRunner *const MUTR;
115   std::unique_ptr<Logger> L;
116   BitVector Effects;
117   /// There's at least one output. We'll set this to a different value if MUTR
118   /// is avaliable.
119   size_t OutputCount = 1;
120   /// Set these 2 clearly OOB, to make sure we set them later.
121   size_t DefaultDecisionPos = std::numeric_limits<size_t>::max();
122   size_t DecisionPos = std::numeric_limits<size_t>::max();
123 };
124 
125 /// An extension of the MLInlineAdvisor for the 'development' mode, targeting
126 /// the offline training scenario. Note that training happens outside of the
127 /// compiler, this facility is concerned with producing training data ("logs").
128 /// This InlineAdvisor can operate in the following modes:
129 ///
130 /// 1) collect logs for the default policy. This is useful for bootstrapping
131 /// training, which will be considerably faster by starting from a reasonable
132 /// policy.
133 ///
134 /// 2) collect logs for the ML policy, using a model from a previous
135 /// training. Potentially, that model uses internally some small random
136 /// perturbation of its weights, to induce exploration (setting this up is the
137 /// responsibility of the training algorithm). The logs would then be used to
138 /// retrain and improve on this model.
139 ///
140 /// 3) use the provided model, with no logging. This is useful for end to end
141 /// validation - the model, in this case, is a release candidate and shouldn't
142 /// have random perturbations. It is a convenience feature: rather than needing
143 /// to take the release candidate model and compile it in 'release' mode,
144 /// validate it, then potentially discard it, it's easier to just pass the model
145 /// to the compiler, albeit compilation would be slower, as a one-off. Once the
146 /// model behaves satisfactorily, it can be compiled AOT, for efficiency, in
147 /// release mode. The expectation is that a well-trained model provides a good
148 /// policy over a sufficiently diverse codebase, over many changes (i.e.
149 /// training happens seldom).
150 class DevelopmentModeMLInlineAdvisor : public MLInlineAdvisor {
151 public:
152   DevelopmentModeMLInlineAdvisor(
153       Module &M, ModuleAnalysisManager &MAM,
154       std::unique_ptr<MLModelRunner> ModelRunner,
155       std::function<bool(CallBase &)> GetDefaultAdvice,
156       std::unique_ptr<TrainingLogger> Logger);
157 
158   size_t getTotalSizeEstimate();
159 
160   virtual ~DevelopmentModeMLInlineAdvisor();
161   void updateNativeSizeEstimate(int64_t Change) {
162     *CurrentNativeSize += Change;
163   }
164   void resetNativeSize(Function *F) {
165     PreservedAnalyses PA = PreservedAnalyses::all();
166     PA.abandon<InlineSizeEstimatorAnalysis>();
167     FAM.invalidate(*F, PA);
168   }
169 
170   std::unique_ptr<MLInlineAdvice>
171   getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE) override;
172 
173   Optional<size_t> getNativeSizeEstimate(const Function &F) const;
174 
175 private:
176   bool isLogging() const { return !!Logger; }
177   std::unique_ptr<MLInlineAdvice> getMandatoryAdviceImpl(CallBase &CB) override;
178 
179   std::function<bool(CallBase &)> GetDefaultAdvice;
180   const bool IsDoingInference;
181   std::unique_ptr<TrainingLogger> Logger;
182 
183   const Optional<int32_t> InitialNativeSize;
184   Optional<int32_t> CurrentNativeSize;
185 };
186 
187 /// A variant of MLInlineAdvice that tracks all non-trivial inlining
188 /// decisions, for training/logging.
189 class LoggingMLInlineAdvice : public MLInlineAdvice {
190 public:
191   LoggingMLInlineAdvice(DevelopmentModeMLInlineAdvisor *Advisor, CallBase &CB,
192                         OptimizationRemarkEmitter &ORE, bool Recommendation,
193                         TrainingLogger &Logger,
194                         Optional<size_t> CallerSizeEstimateBefore,
195                         Optional<size_t> CalleeSizeEstimateBefore,
196                         bool DefaultDecision, bool Mandatory = false)
197       : MLInlineAdvice(Advisor, CB, ORE, Recommendation), Logger(Logger),
198         CallerSizeEstimateBefore(CallerSizeEstimateBefore),
199         CalleeSizeEstimateBefore(CalleeSizeEstimateBefore),
200         DefaultDecision(DefaultDecision), Mandatory(Mandatory) {}
201 
202   virtual ~LoggingMLInlineAdvice() = default;
203 
204 private:
205   DevelopmentModeMLInlineAdvisor *getAdvisor() const {
206     return static_cast<DevelopmentModeMLInlineAdvisor *>(Advisor);
207   }
208   void recordInliningImpl() override {
209     MLInlineAdvice::recordInliningImpl();
210     getAdvisor()->resetNativeSize(Caller);
211     int Reward = std::numeric_limits<int>::max();
212     if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() &&
213         !getAdvisor()->isForcedToStop()) {
214       int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller) +
215                             *CalleeSizeEstimateBefore;
216       Reward = NativeSizeAfter -
217                (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore);
218       getAdvisor()->updateNativeSizeEstimate(Reward);
219     }
220     log(Reward, /*Success=*/true);
221   }
222 
223   void recordInliningWithCalleeDeletedImpl() override {
224     MLInlineAdvice::recordInliningWithCalleeDeletedImpl();
225     getAdvisor()->resetNativeSize(Caller);
226     if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() &&
227         !getAdvisor()->isForcedToStop()) {
228       int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller);
229       int Reward = NativeSizeAfter -
230                    (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore);
231       getAdvisor()->updateNativeSizeEstimate(Reward);
232       log(Reward, /*Success=*/true);
233     } else {
234       log(NoReward, /*Success=*/true);
235     }
236   }
237 
238   void recordUnsuccessfulInliningImpl(const InlineResult &Result) override {
239     MLInlineAdvice::recordUnsuccessfulInliningImpl(Result);
240     log(NoReward, /*Success=*/false);
241   }
242 
243   void recordUnattemptedInliningImpl() override {
244     MLInlineAdvice::recordUnattemptedInliningImpl();
245     log(NoReward, /*Success=*/false);
246   }
247 
248   void log(int64_t Reward, bool Success) {
249     if (Mandatory)
250       return;
251     InlineEvent Event;
252     Event.AdvisedDecision = isInliningRecommended();
253     Event.DefaultDecision = DefaultDecision;
254     Event.Effect = Success;
255     Event.Reward = Reward;
256     Logger.logInlineEvent(Event, getAdvisor()->getModelRunner());
257   }
258 
259   static const int64_t NoReward = 0;
260   TrainingLogger &Logger;
261   const Optional<size_t> CallerSizeEstimateBefore;
262   const Optional<size_t> CalleeSizeEstimateBefore;
263   const int64_t DefaultDecision;
264   const int64_t Mandatory;
265 };
266 
267 static const std::vector<TensorSpec> TrainingOnlyFeatures{
268     TensorSpec::createSpec<int64_t>(TFFeedPrefix + "inlining_default", {1}),
269     TensorSpec::createSpec<float>(TFFeedPrefix + "discount", {1}),
270     TensorSpec::createSpec<float>(TFFeedPrefix + "reward", {1}),
271     TensorSpec::createSpec<int32_t>(TFFeedPrefix + "step_type", {1})};
272 
273 static const std::vector<TensorSpec> getInputFeatures() {
274   std::vector<TensorSpec> InputSpecs;
275   for (size_t I = 0; I < NumberOfFeatures; ++I)
276     InputSpecs.push_back(
277         TensorSpec::createSpec<int64_t>(TFFeedPrefix + FeatureNameMap[I], {1}));
278   append_range(InputSpecs, TrainingOnlyFeatures);
279   return InputSpecs;
280 }
281 
282 } // namespace
283 
284 TrainingLogger::TrainingLogger(StringRef LogFileName,
285                                const ModelUnderTrainingRunner *MUTR)
286     : LogFileName(LogFileName), MUTR(MUTR) {
287   // The first output is the inlining decision.
288   if (MUTR)
289     OutputCount = MUTR->outputLoggedFeatureSpecs().size();
290   std::vector<LoggedFeatureSpec> FT;
291 
292   for (size_t I = 0; I < NumberOfFeatures; ++I)
293     FT.push_back(
294         {TensorSpec::createSpec<int64_t>(FeatureNameMap.at(I), {1}), None});
295   if (MUTR && MUTR->outputLoggedFeatureSpecs().size() > 1)
296     append_range(FT, drop_begin(MUTR->outputLoggedFeatureSpecs()));
297 
298   DefaultDecisionPos = FT.size();
299   FT.push_back(
300       {TensorSpec::createSpec<int64_t>(DefaultDecisionName, {1}), None});
301 
302   DecisionPos = FT.size();
303   FT.push_back({TensorSpec::createSpec<int64_t>(DecisionName, {1}), None});
304 
305   L = std::make_unique<Logger>(
306       FT, TensorSpec::createSpec<int64_t>(RewardName, {1}),
307       InlineSizeEstimatorAnalysis::isEvaluatorRequested());
308 }
309 
310 /// Log one inlining event.
311 void TrainingLogger::logInlineEvent(const InlineEvent &Event,
312                                     const MLModelRunner &ModelRunner) {
313   size_t CurrentFeature = 0;
314   for (; CurrentFeature < NumberOfFeatures; ++CurrentFeature) {
315     int64_t F = *ModelRunner.getTensor<int64_t>(CurrentFeature);
316     L->logInt64Value(CurrentFeature, &F);
317   }
318 
319   for (size_t I = 1; I < OutputCount; ++I) {
320     const auto &Result = *MUTR->lastEvaluationResult();
321     const char *RawData =
322         reinterpret_cast<const char *>(Result.getUntypedTensorValue(I));
323     L->logSpecifiedTensorValue(CurrentFeature, RawData);
324     ++CurrentFeature;
325   }
326 
327   assert(CurrentFeature == DefaultDecisionPos);
328   L->logInt64Value(DefaultDecisionPos, &Event.DefaultDecision);
329   L->logInt64Value(DecisionPos, &Event.AdvisedDecision);
330   if (InlineSizeEstimatorAnalysis::isEvaluatorRequested())
331     L->logInt64Reward(Event.Reward);
332 
333   // For debugging / later use
334   Effects.push_back(Event.Effect);
335 }
336 
337 void TrainingLogger::print() {
338   std::error_code EC;
339   raw_fd_ostream OutFile(LogFileName, EC);
340   L->flush(OutFile);
341 }
342 
343 DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor(
344     Module &M, ModuleAnalysisManager &MAM,
345     std::unique_ptr<MLModelRunner> ModelRunner,
346     std::function<bool(CallBase &)> GetDefaultAdvice,
347     std::unique_ptr<TrainingLogger> Logger)
348     : MLInlineAdvisor(M, MAM, std::move(ModelRunner)),
349       GetDefaultAdvice(GetDefaultAdvice),
350       IsDoingInference(isa<ModelUnderTrainingRunner>(getModelRunner())),
351       Logger(std::move(Logger)),
352       InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0),
353       CurrentNativeSize(InitialNativeSize) {
354   // We cannot have the case of neither inference nor logging.
355   assert(IsDoingInference || isLogging());
356 }
357 
358 DevelopmentModeMLInlineAdvisor::~DevelopmentModeMLInlineAdvisor() {
359   if (isLogging())
360     Logger->print();
361 }
362 
363 Optional<size_t>
364 DevelopmentModeMLInlineAdvisor::getNativeSizeEstimate(const Function &F) const {
365   if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested())
366     return None;
367   auto &R =
368       FAM.getResult<InlineSizeEstimatorAnalysis>(const_cast<Function &>(F));
369   if (!R) {
370     F.getParent()->getContext().emitError(
371         "Native size estimator is not present.");
372     return 0;
373   }
374   return *R;
375 }
376 
377 std::unique_ptr<MLInlineAdvice>
378 DevelopmentModeMLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) {
379   return std::make_unique<LoggingMLInlineAdvice>(
380       /*Advisor=*/this,
381       /*CB=*/CB, /*ORE=*/getCallerORE(CB), /*Recommendation=*/true,
382       /*Logger=*/*Logger,
383       /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()),
384       /*CalleeSizeEstimateBefore=*/
385       getNativeSizeEstimate(*CB.getCalledFunction()),
386       /*DefaultDecision=*/true, /*Mandatory*/ true);
387 }
388 
389 std::unique_ptr<MLInlineAdvice>
390 DevelopmentModeMLInlineAdvisor::getAdviceFromModel(
391     CallBase &CB, OptimizationRemarkEmitter &ORE) {
392   if (IsDoingInference && !isLogging())
393     return MLInlineAdvisor::getAdviceFromModel(CB, ORE);
394 
395   bool DefaultAdvice = GetDefaultAdvice(CB);
396   auto Recommendation =
397       IsDoingInference ? static_cast<bool>(ModelRunner->evaluate<int64_t>())
398                        : DefaultAdvice;
399   return std::make_unique<LoggingMLInlineAdvice>(
400       /*Advisor=*/this,
401       /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/Recommendation,
402       /*Logger=*/*Logger,
403       /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()),
404       /*CalleeSizeEstimateBefore=*/
405       getNativeSizeEstimate(*CB.getCalledFunction()),
406       /*DefaultDecision=*/DefaultAdvice);
407 }
408 
409 size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() {
410   if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested())
411     return 0;
412   size_t Ret = 0;
413   for (auto &F : M) {
414     if (F.isDeclaration())
415       continue;
416     Ret += *getNativeSizeEstimate(F);
417   }
418   return Ret;
419 }
420 
421 std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor(
422     Module &M, ModuleAnalysisManager &MAM,
423     std::function<bool(CallBase &)> GetDefaultAdvice) {
424   auto &Ctx = M.getContext();
425   std::unique_ptr<MLModelRunner> Runner;
426   if (TFModelUnderTrainingPath.empty())
427     Runner.reset(new NoInferenceModelRunner(Ctx, getInputFeatures()));
428   else
429     Runner = ModelUnderTrainingRunner::createAndEnsureValid(
430         Ctx, TFModelUnderTrainingPath, DecisionName, getInputFeatures(),
431         TFOutputSpecOverride);
432   if (!Runner)
433     return nullptr;
434   std::unique_ptr<TrainingLogger> Logger;
435   if (!TrainingLog.empty())
436     Logger = std::make_unique<TrainingLogger>(
437         TrainingLog, dyn_cast<ModelUnderTrainingRunner>(Runner.get()));
438 
439   return std::make_unique<DevelopmentModeMLInlineAdvisor>(
440       M, MAM, std::move(Runner), GetDefaultAdvice, std::move(Logger));
441 }
442 #endif // defined(LLVM_HAVE_TF_API)
443