1 //===- InlineSizeEstimatorAnalysis.cpp - IR to native size from ML model --===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This implements feature and label extraction for offline supervised learning
11 // of a IR to native size model.
12 //
13 //===----------------------------------------------------------------------===//
14 #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
15
16 #ifdef LLVM_HAVE_TF_API
17 #include "llvm/Analysis/Utils/TFUtils.h"
18 #endif
19 #include "llvm/Analysis/LoopInfo.h"
20 #include "llvm/Analysis/TargetLibraryInfo.h"
21 #include "llvm/Analysis/TargetTransformInfo.h"
22 #include "llvm/IR/BasicBlock.h"
23 #include "llvm/IR/Dominators.h"
24 #include "llvm/IR/Function.h"
25 #include "llvm/IR/Instructions.h"
26 #include "llvm/IR/PassManager.h"
27 #include "llvm/MC/MCAsmLayout.h"
28 #include "llvm/Support/Casting.h"
29 #include "llvm/Support/CommandLine.h"
30 #include "llvm/Support/raw_ostream.h"
31
32 #include <algorithm>
33 #include <deque>
34
35 using namespace llvm;
36
37 AnalysisKey InlineSizeEstimatorAnalysis::Key;
38
39 #define DEBUG_TYPE "inline-size-estimator"
40
41 #ifdef LLVM_HAVE_TF_API
42 cl::opt<std::string> TFIR2NativeModelPath(
43 "ml-inliner-ir2native-model", cl::Hidden,
44 cl::desc("Path to saved model evaluating native size from IR."));
45
46 namespace {
getMaxInstructionID()47 unsigned getMaxInstructionID() {
48 #define LAST_OTHER_INST(NR) return NR;
49 #include "llvm/IR/Instruction.def"
50 }
51
52 class IRToNativeSizeLearning {
53 public:
54 enum class NamedFeatureIndex : size_t {
55 InitialSize,
56 Blocks,
57 Calls,
58 IsLocal,
59 IsLinkOnceODR,
60 IsLinkOnce,
61 Loops,
62 MaxLoopDepth,
63 MaxDomTreeLevel,
64
65 NumNamedFeatures
66 };
67 static const size_t NumNamedFeatures =
68 static_cast<size_t>(NamedFeatureIndex::NumNamedFeatures);
69 struct FunctionFeatures {
70 static std::vector<std::pair<size_t, size_t>>
71 ImportantInstructionSuccessions;
72 static const size_t FeatureCount;
73
74 std::array<int32_t, NumNamedFeatures> NamedFeatures = {0};
75 std::vector<int32_t> InstructionHistogram;
76 std::vector<int32_t> InstructionPairHistogram;
77
78 void fillTensor(int32_t *Ptr) const;
operator []__anon777091210111::IRToNativeSizeLearning::FunctionFeatures79 int32_t &operator[](NamedFeatureIndex Pos) {
80 return NamedFeatures[static_cast<size_t>(Pos)];
81 }
82 };
83 IRToNativeSizeLearning() = default;
84
85 static FunctionFeatures getFunctionFeatures(Function &F,
86 FunctionAnalysisManager &FAM);
87
88 private:
89 /// Sort once the feature tuples.
90 struct SortFeatureTuples {
91 bool IsSorted = false;
SortFeatureTuples__anon777091210111::IRToNativeSizeLearning::SortFeatureTuples92 SortFeatureTuples() {
93 std::sort(FunctionFeatures::ImportantInstructionSuccessions.begin(),
94 FunctionFeatures::ImportantInstructionSuccessions.end());
95 IsSorted = true;
96 }
97 };
98
99 static llvm::ManagedStatic<SortFeatureTuples> TupleSorter;
100
ensureSortedTuples()101 static bool ensureSortedTuples() { return TupleSorter->IsSorted; }
102 };
103 llvm::ManagedStatic<IRToNativeSizeLearning::SortFeatureTuples>
104 IRToNativeSizeLearning::TupleSorter;
105
106 // This is a point in time - we determined including these pairs of
107 // consecutive instructions (in the IR layout available at inline time) as
108 // features improves the model performance. We want to move away from manual
109 // feature selection.
110 // The vector is given in opcode pairs rather than labels because 1) labels
111 // weren't readily available, and 2) the successions were hand - extracted
112 std::vector<std::pair<size_t, size_t>>
113 IRToNativeSizeLearning::FunctionFeatures::ImportantInstructionSuccessions =
114 {{1, 34}, {15, 27}, {53, 53}, {53, 34}, {1, 11}, {32, 2}, {2, 48},
115 {28, 48}, {1, 45}, {49, 32}, {57, 56}, {55, 53}, {1, 28}, {57, 34},
116 {1, 1}, {32, 28}, {32, 15}, {49, 28}, {53, 1}, {2, 53}, {48, 34},
117 {28, 53}, {2, 32}, {1, 40}, {32, 48}, {29, 56}, {56, 32}, {55, 56},
118 {48, 56}, {1, 31}, {33, 34}, {2, 28}, {1, 12}, {55, 1}, {31, 31},
119 {65, 1}, {33, 56}, {32, 32}, {13, 13}, {1, 26}, {13, 26}, {2, 1},
120 {1, 33}, {47, 49}, {64, 1}, {2, 38}, {34, 53}, {48, 2}, {55, 34},
121 {34, 32}, {1, 5}, {56, 13}, {2, 2}, {2, 49}, {33, 2}, {49, 39},
122 {56, 49}, {33, 49}, {32, 39}, {39, 57}, {29, 33}, {31, 34}, {32, 29},
123 {47, 15}, {13, 34}, {2, 33}, {32, 49}, {49, 34}, {56, 33}, {1, 30},
124 {33, 33}, {31, 33}, {2, 29}, {56, 7}, {32, 13}, {2, 55}, {56, 56},
125 {2, 34}, {1, 42}, {34, 49}, {1, 20}, {32, 33}, {1, 25}, {53, 28},
126 {1, 14}, {31, 49}, {28, 2}, {2, 13}, {2, 56}, {1, 32}, {56, 53},
127 {65, 65}, {33, 53}, {64, 64}, {13, 2}, {34, 33}, {1, 4}, {49, 2},
128 {1, 9}, {56, 1}, {33, 1}, {53, 57}, {32, 53}, {13, 56}, {32, 56},
129 {55, 55}, {1, 18}, {49, 56}, {34, 34}, {1, 7}, {56, 64}, {32, 1},
130 {13, 33}, {55, 28}, {49, 33}, {57, 57}, {56, 34}, {34, 56}, {33, 32},
131 {32, 40}, {1, 29}, {53, 2}, {34, 1}, {32, 34}, {49, 49}, {1, 24},
132 {40, 34}, {1, 13}, {38, 34}, {29, 2}, {34, 2}, {1, 39}, {1, 22},
133 {1, 27}, {49, 1}, {1, 8}, {56, 2}};
134
135 // We have: 9 calculated features (the features here); 1 feature for each
136 // instruction opcode; and 1 feature for each manually-identified sequence.
137 // For the latter 2, we build a histogram: we count the number of
138 // occurrences of each instruction opcode or succession of instructions,
139 // respectively.
140 // Note that instruction opcodes start from 1. For convenience, we also have an
141 // always 0 feature for the '0' opcode, hence the extra 1.
142 const size_t IRToNativeSizeLearning::FunctionFeatures::FeatureCount =
143 IRToNativeSizeLearning::FunctionFeatures::ImportantInstructionSuccessions
144 .size() +
145 getMaxInstructionID() + 1 + IRToNativeSizeLearning::NumNamedFeatures;
146
getSize(Function & F,TargetTransformInfo & TTI)147 size_t getSize(Function &F, TargetTransformInfo &TTI) {
148 size_t Ret = 0;
149 for (auto &BB : F)
150 for (auto &I : BB)
151 Ret += TTI.getInstructionCost(
152 &I, TargetTransformInfo::TargetCostKind::TCK_CodeSize);
153 return Ret;
154 }
155
getSize(Function & F,FunctionAnalysisManager & FAM)156 size_t getSize(Function &F, FunctionAnalysisManager &FAM) {
157 auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
158 return getSize(F, TTI);
159 }
160
getMaxDominatorTreeDepth(const Function & F,const DominatorTree & Tree)161 unsigned getMaxDominatorTreeDepth(const Function &F,
162 const DominatorTree &Tree) {
163 unsigned Ret = 0;
164 for (auto &BB : F)
165 if (auto *TN = Tree.getNode(&BB))
166 Ret = std::max(Ret, TN->getLevel());
167 return Ret;
168 }
169 } // namespace
170
171 IRToNativeSizeLearning::FunctionFeatures
getFunctionFeatures(Function & F,FunctionAnalysisManager & FAM)172 IRToNativeSizeLearning::getFunctionFeatures(Function &F,
173 FunctionAnalysisManager &FAM) {
174 assert(ensureSortedTuples() && "expected lazy initialization");
175
176 auto &DomTree = FAM.getResult<DominatorTreeAnalysis>(F);
177 FunctionFeatures FF;
178 size_t InstrCount = getMaxInstructionID() + 1;
179 FF.InstructionHistogram.resize(InstrCount);
180
181 FF.InstructionPairHistogram.resize(
182 FunctionFeatures::ImportantInstructionSuccessions.size());
183
184 auto StartID = 0;
185 auto LastID = StartID;
186 auto getPairIndex = [](size_t a, size_t b) {
187 auto I =
188 std::find(FunctionFeatures::ImportantInstructionSuccessions.begin(),
189 FunctionFeatures::ImportantInstructionSuccessions.end(),
190 std::make_pair(a, b));
191 if (I == FunctionFeatures::ImportantInstructionSuccessions.end())
192 return -1;
193 return static_cast<int>(std::distance(
194 FunctionFeatures::ImportantInstructionSuccessions.begin(), I));
195 };
196
197 // We don't want debug calls, because they'd just add noise.
198 for (auto &BB : F) {
199 for (auto I = BB.instructionsWithoutDebug().begin(),
200 E = BB.instructionsWithoutDebug().end();
201 I != E; ++I) {
202 auto ID = I->getOpcode();
203
204 ++FF.InstructionHistogram[ID];
205 int PairIndex = getPairIndex(LastID, ID);
206 if (PairIndex >= 0)
207 ++FF.InstructionPairHistogram[PairIndex];
208 LastID = ID;
209 if (isa<CallBase>(*I))
210 ++FF[NamedFeatureIndex::Calls];
211 }
212 }
213
214 FF[NamedFeatureIndex::InitialSize] = getSize(F, FAM);
215 FF[NamedFeatureIndex::IsLocal] = F.hasLocalLinkage();
216 FF[NamedFeatureIndex::IsLinkOnceODR] = F.hasLinkOnceODRLinkage();
217 FF[NamedFeatureIndex::IsLinkOnce] = F.hasLinkOnceLinkage();
218 FF[NamedFeatureIndex::Blocks] =
219 std::distance(F.getBasicBlockList().begin(), F.getBasicBlockList().end());
220 auto &LI = FAM.getResult<LoopAnalysis>(F);
221 FF[NamedFeatureIndex::Loops] = std::distance(LI.begin(), LI.end());
222 for (auto &L : LI)
223 FF[NamedFeatureIndex::MaxLoopDepth] =
224 std::max(FF[NamedFeatureIndex::MaxLoopDepth],
225 static_cast<int32_t>(L->getLoopDepth()));
226 FF[NamedFeatureIndex::MaxDomTreeLevel] = getMaxDominatorTreeDepth(F, DomTree);
227 return FF;
228 }
229
fillTensor(int32_t * Ptr) const230 void IRToNativeSizeLearning::FunctionFeatures::fillTensor(int32_t *Ptr) const {
231 std::copy(NamedFeatures.begin(), NamedFeatures.end(), Ptr);
232 Ptr += NamedFeatures.size();
233 std::copy(InstructionHistogram.begin(), InstructionHistogram.end(), Ptr);
234 Ptr += InstructionHistogram.size();
235 std::copy(InstructionPairHistogram.begin(), InstructionPairHistogram.end(),
236 Ptr);
237 }
238
isEvaluatorRequested()239 bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() {
240 return !TFIR2NativeModelPath.empty();
241 }
242
InlineSizeEstimatorAnalysis()243 InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() {
244 if (!isEvaluatorRequested()) {
245 return;
246 }
247 std::vector<std::string> InputNames{"serving_default_input_1"};
248 std::vector<std::string> OutputName{"StatefulPartitionedCall"};
249 Evaluator = std::make_unique<TFModelEvaluator>(
250 TFIR2NativeModelPath.getValue().c_str(), InputNames, OutputName);
251 if (!Evaluator || !Evaluator->isValid()) {
252 Evaluator.reset();
253 return;
254 }
255 static const std::vector<int64_t> Dim{
256 1, static_cast<int64_t>(
257 IRToNativeSizeLearning::FunctionFeatures::FeatureCount)};
258
259 Evaluator->initInput<int32_t>(0, Dim);
260 }
261
262 InlineSizeEstimatorAnalysis::Result
run(const Function & F,FunctionAnalysisManager & FAM)263 InlineSizeEstimatorAnalysis::run(const Function &F,
264 FunctionAnalysisManager &FAM) {
265 if (!Evaluator)
266 return None;
267 auto Features = IRToNativeSizeLearning::getFunctionFeatures(
268 const_cast<Function &>(F), FAM);
269 int32_t *V = Evaluator->getInput<int32_t>(0);
270 Features.fillTensor(V);
271 auto ER = Evaluator->evaluate();
272 if (!ER)
273 return None;
274 float Ret = *ER->getTensorValue<float>(0);
275 if (Ret < 0.0)
276 Ret = 0.0;
277 return static_cast<size_t>(Ret);
278 }
279
~InlineSizeEstimatorAnalysis()280 InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {}
InlineSizeEstimatorAnalysis(InlineSizeEstimatorAnalysis && Other)281 InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis(
282 InlineSizeEstimatorAnalysis &&Other)
283 : Evaluator(std::move(Other.Evaluator)) {}
284
285 #else
286 namespace llvm {
287 class TFModelEvaluator {};
288 } // namespace llvm
InlineSizeEstimatorAnalysis()289 InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() {}
InlineSizeEstimatorAnalysis(InlineSizeEstimatorAnalysis &&)290 InlineSizeEstimatorAnalysis ::InlineSizeEstimatorAnalysis(
291 InlineSizeEstimatorAnalysis &&) {}
~InlineSizeEstimatorAnalysis()292 InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {}
293 InlineSizeEstimatorAnalysis::Result
run(const Function & F,FunctionAnalysisManager & FAM)294 InlineSizeEstimatorAnalysis::run(const Function &F,
295 FunctionAnalysisManager &FAM) {
296 return None;
297 }
isEvaluatorRequested()298 bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { return false; }
299 #endif