1 /**
2 * Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "Loader.h"
18 #include "LoaderUtils.h"
19
20 #include "glow/Base/Image.h"
21 #include "glow/Graph/Graph.h"
22 #include "glow/Graph/Nodes.h"
23 #include "glow/Graph/Utils.h"
24 #include "glow/Quantization/Serialization.h"
25 #include "glow/Support/Support.h"
26
27 #include "llvm/Support/CommandLine.h"
28 #include "llvm/Support/FileSystem.h"
29 #include "llvm/Support/Path.h"
30 #include "llvm/Support/raw_ostream.h"
31
32 #include <chrono>
33 #include <fstream>
34 #include <memory>
35 #include <sstream>
36
37 using namespace glow;
38
39 namespace {
40
41 /// Model Tuner options
42 llvm::cl::OptionCategory modelTunerCat("Model Tuner Options");
43
44 llvm::cl::opt<std::string> datasetFileOpt(
45 "dataset-file", llvm::cl::Required,
46 llvm::cl::desc("Path to the dataset description file which contains on "
47 "each line a file path and an integer label separated by "
48 "space or comma. The integer labels start with 0 (0,1,..)."
49 "An example might look like this:\n"
50 " image0.png 0 \n"
51 " image1.png 13 \n"
52 " ............. \n"
53 "Another example might look like this:\n"
54 " image0.png,0, \n"
55 " image1.png,13, \n"
56 " ............. \n"),
57 llvm::cl::value_desc("file.txt|file.csv"), llvm::cl::cat(modelTunerCat));
58
59 llvm::cl::opt<std::string> datasetPathOpt(
60 "dataset-path", llvm::cl::Required,
61 llvm::cl::desc("The path of the directory where the dataset entries are "
62 "located."),
63 llvm::cl::value_desc("directory path"), llvm::cl::cat(modelTunerCat));
64
65 llvm::cl::opt<std::string> dumpTunedProfileFileOpt(
66 "dump-tuned-profile",
67 llvm::cl::desc("Output quantization profile obtained after tuning."),
68 llvm::cl::value_desc("profile_output.yaml"), llvm::cl::Required,
69 llvm::cl::cat(modelTunerCat));
70
71 llvm::cl::opt<float> targetAccuracyOpt(
72 "target-accuracy",
73 llvm::cl::desc("Stop the quantization tuning/calibration procedure when \n"
74 "the accuracy has reached or surpassed the given value. \n"
75 "A float value between 0.0 and 1.0 is expected. If not \n"
76 "specified, the tuning will run until completion. "),
77 llvm::cl::value_desc("float"), llvm::cl::Optional, llvm::cl::init(1.0),
78 llvm::cl::cat(modelTunerCat));
79
80 llvm::cl::opt<unsigned> maxIterPerNodeOpt(
81 "max-iter-per-node",
82 llvm::cl::desc("Maximum number of tuning iterations per node (default 3)."),
83 llvm::cl::value_desc("int"), llvm::cl::Optional, llvm::cl::init(3),
84 llvm::cl::cat(modelTunerCat));
85
86 llvm::cl::opt<float> accDropSkipOpt(
87 "acc-drop-skip",
88 llvm::cl::desc("The accuracy drop for which the tuning of any node is \n"
89 "skipped. The default value is 0.05 (5%)."),
90 llvm::cl::value_desc("float"), llvm::cl::Optional, llvm::cl::init(0.05),
91 llvm::cl::cat(modelTunerCat));
92 } // namespace
93
94 /// Get maximum confidence class (index and value) for the model output.
getOutputClass(Tensor * T)95 static std::pair<unsigned, float> getOutputClass(Tensor *T) {
96 CHECK(T->getElementType() == ElemKind::FloatTy)
97 << "Model output is expected to be float!";
98 auto TH = T->getHandle<float>();
99 float maxVal = TH.raw(0);
100 unsigned maxIdx = 0;
101 for (unsigned idx = 1; idx < TH.size(); ++idx) {
102 if (TH.raw(idx) > maxVal) {
103 maxVal = TH.raw(idx);
104 maxIdx = idx;
105 }
106 }
107 return std::make_pair(maxIdx, maxVal);
108 }
109
110 /// Function to run the model using the given \p dataset and compute the
111 /// accuracy. If \p quantize flag is given then the model is additionally
112 /// quantized using the profiling information \p pInfos.
runModelAndGetAccuracy(LabeledDataSet & dataset,bool quantize,std::vector<NodeProfilingInfo> & pInfos)113 float runModelAndGetAccuracy(LabeledDataSet &dataset, bool quantize,
114 std::vector<NodeProfilingInfo> &pInfos) {
115
116 // Initialize the loader object.
117 Loader loader;
118
119 // Load the model.
120 loader.loadModel();
121
122 // Allocate tensors for all placeholders.
123 PlaceholderBindings bindings;
124 bindings.allocate(loader.getModule()->getPlaceholders());
125
126 // Get input/output placeholders.
127 auto inpPHMap = loader.getInputPlaceholderMap();
128 auto outPHMap = loader.getOutputPlaceholderMap();
129 CHECK(inpPHMap.size() == 1) << "Model is expected to have only 1 input!";
130 CHECK(outPHMap.size() == 1) << "Model is expected to have only 1 output!";
131 Placeholder *input = inpPHMap.begin()->second;
132 Placeholder *output = outPHMap.begin()->second;
133
134 // Get compilation options.
135 CompilationContext cctx;
136 if (quantize) {
137 // Get compilation options for quantization.
138 cctx = loader.getCompilationContext(QuantizationMode::Quantize);
139 // Force the given profiling infos.
140 cctx.precisionConfig.quantConfig.infos = pInfos;
141 } else {
142 // Get compilation options for running the model as-is.
143 cctx = loader.getCompilationContext(QuantizationMode::None);
144 }
145 cctx.bindings = &bindings;
146
147 // Compile the function.
148 loader.compile(cctx);
149
150 // Run the function for all the dataset.
151 size_t correct = 0;
152 for (const auto &data : dataset) {
153 // Read the image and preprocess.
154 Tensor inputImg = readPngImageAndPreprocess(data.first, imageNormMode,
155 imageChannelOrder, imageLayout);
156 auto imgShape = inputImg.getType().dims();
157 Tensor inputTensor =
158 inputImg.getUnowned({1, imgShape[0], imgShape[1], imgShape[2]});
159 updateInputPlaceholders(*cctx.bindings, {input}, {&inputTensor});
160 // Run inference.
161 loader.runInference(*cctx.bindings, 1);
162 // Get output class.
163 auto cls = getOutputClass(cctx.bindings->get(output));
164 if (cls.first == data.second) {
165 ++correct;
166 }
167 }
168
169 // Compute accuracy.
170 return ((float)correct) / dataset.size();
171 }
172
173 /// Function to tune a given tensor for the given function with the given
174 /// dataset.
tuneQuantizationForTensor(std::vector<NodeProfilingInfo> & pInfos,LabeledDataSet & dataset,unsigned qIdx,float bestAcc)175 float tuneQuantizationForTensor(std::vector<NodeProfilingInfo> &pInfos,
176 LabeledDataSet &dataset, unsigned qIdx,
177 float bestAcc) {
178
179 // Tuning parameters.
180 unsigned maxIterPerNode = maxIterPerNodeOpt;
181 float accDropSkip = accDropSkipOpt;
182
183 // Backup profiling parameters for this tensor.
184 auto bestTPP = pInfos[qIdx].tensorProfilingParams_;
185
186 // Get tensor average value.
187 float tensorAvgVal = quantization::getTensorAverageValue(bestTPP);
188
189 // Get quantization configuration.
190 auto quantConfig = Loader::getQuantizationConfiguration();
191
192 // Run the tune iterations for this tensor.
193 for (unsigned iterIdx = 0; iterIdx < maxIterPerNode; ++iterIdx) {
194
195 // Get current min/max range.
196 float rangeMin = pInfos[qIdx].tensorProfilingParams_.min;
197 float rangeMax = pInfos[qIdx].tensorProfilingParams_.max;
198
199 // Skip tuning for this tensor if range is empty.
200 if (rangeMin == rangeMax) {
201 llvm::outs() << " Tuning skipped for this tensor: not required\n";
202 break;
203 }
204
205 // Get testing min/max range by repeatedly shrinking with a factor of 2.
206 float testMin, testMax;
207 if (quantConfig.schema == quantization::Asymmetric) {
208 // Shrink tensor min/max range around average value.
209 testMin = tensorAvgVal - (tensorAvgVal - rangeMin) / 2.0;
210 testMax = tensorAvgVal + (rangeMax - tensorAvgVal) / 2.0;
211 } else if (quantConfig.schema == quantization::Symmetric ||
212 quantConfig.schema == quantization::SymmetricWithUnsigned ||
213 quantConfig.schema == quantization::SymmetricWithPower2Scale) {
214 // Shrink tensor min/max range around 0.
215 float rangeAbsMin = std::abs(rangeMin);
216 float rangeAbsMax = std::abs(rangeMax);
217 float rangeAbs = rangeAbsMax > rangeAbsMin ? rangeAbsMax : rangeAbsMin;
218 testMin = -rangeAbs / 2.0f;
219 testMax = +rangeAbs / 2.0f;
220 } else {
221 llvm_unreachable("Quantization schema not supported!");
222 }
223
224 // Set the testing range.
225 pInfos[qIdx].tensorProfilingParams_.min = testMin;
226 pInfos[qIdx].tensorProfilingParams_.max = testMax;
227 llvm::outs() << strFormat(" [%d/%d] Testing range = [%.4f, %.4f]\n",
228 iterIdx + 1, maxIterPerNode, testMin, testMax);
229
230 // Quantize model and compute accuracy for current params.
231 float currAcc = runModelAndGetAccuracy(dataset, true, pInfos);
232 llvm::outs() << strFormat(" Accuracy = %.4f %%\n", currAcc * 100);
233
234 // If we obtain EXACTLY the same accuracy then the profiling parameters
235 // of this tensor have no side effects (most probably are not used).
236 if (currAcc == bestAcc) {
237 llvm::outs()
238 << " Tuning stopped for this tensor: accuracy not improved\n";
239 break;
240 }
241
242 // If current accuracy is better then save the profiling parameters.
243 if (currAcc > bestAcc) {
244 bestAcc = currAcc;
245 bestTPP = pInfos[qIdx].tensorProfilingParams_;
246 }
247
248 // If the current accuracy drops below the best accuracy with a given delta
249 // then skip the tuning for the current tensor.
250 bool lastIter = (iterIdx == (maxIterPerNode - 1));
251 if (!lastIter && (currAcc < (bestAcc - accDropSkip))) {
252 llvm::outs() << " Tuning stopped for this tensor: accuracy dropped more "
253 "than \"acc-drop-skip\"\n";
254 break;
255 }
256 }
257
258 // Save best profiling parameters for this tensor.
259 pInfos[qIdx].tensorProfilingParams_ = bestTPP;
260 llvm::outs() << strFormat("Best accuracy : %.4f %%\n", bestAcc * 100);
261 return bestAcc;
262 }
263
main(int argc,char ** argv)264 int main(int argc, char **argv) {
265
266 // Parse command line parameters. All the options will be available as part of
267 // the loader object.
268 parseCommandLine(argc, argv);
269
270 // Get the input profile used for tuning.
271 auto quantConfig = Loader::getQuantizationConfiguration();
272 CHECK(quantConfig.infos.size())
273 << "Input profile not found. Use the -load-profile option!";
274 auto pInfosTune = quantConfig.infos;
275 int tensorQNum = pInfosTune.size();
276
277 // Read tuning dataset.
278 LabeledDataSet datasetTune =
279 readLabeledDataSet(datasetFileOpt, datasetPathOpt);
280
281 // Set output stream to unbuffered state to flush every time.
282 llvm::outs().SetUnbuffered();
283
284 // Compute initial accuracy.
285 llvm::outs() << strFormat("\nComputing initial accuracy ... \n");
286 float accValF = runModelAndGetAccuracy(datasetTune, false, pInfosTune);
287 float accValQ = runModelAndGetAccuracy(datasetTune, true, pInfosTune);
288 llvm::outs() << strFormat("Initial accuracy: %.4f %% (FLOAT)\n",
289 accValF * 100);
290 llvm::outs() << strFormat("Initial accuracy: %.4f %% (QUANTIZED)\n",
291 accValQ * 100);
292 llvm::outs() << strFormat("Target accuracy: %.4f %% (QUANTIZED)\n",
293 targetAccuracyOpt * 100);
294 llvm::outs() << strFormat("Number of tensors: %d\n\n", tensorQNum);
295
296 // Perform tuning for all tunable tensors.
297 float accVal = accValQ;
298 auto startTime = getTimeStamp();
299 for (int tensorQIdx = 0; tensorQIdx < tensorQNum; ++tensorQIdx) {
300
301 // Stop tuning if target accuracy is achieved.
302 if (accVal > targetAccuracyOpt) {
303 llvm::outs() << "Target accuracy achieved! Tuning is stopped ...\n";
304 break;
305 }
306
307 // Tune the quantization for this tensor.
308 auto tensorName = pInfosTune[tensorQIdx].nodeOutputName_.data();
309 llvm::outs() << strFormat("[%d/%d] Tuning quantization for tensor \"%s\"\n",
310 tensorQIdx + 1, tensorQNum, tensorName);
311 accVal =
312 tuneQuantizationForTensor(pInfosTune, datasetTune, tensorQIdx, accVal);
313
314 // Display estimated remaining time and stats.
315 unsigned iterSec = getDurationSec(startTime) / (tensorQIdx + 1);
316 unsigned remSec = iterSec * (tensorQNum - tensorQIdx - 1);
317 unsigned remMin = (remSec / 60) % 60;
318 unsigned remHrs = (remSec / 60) / 60;
319 llvm::outs() << strFormat("Iteration time: %d seconds\n", iterSec);
320 llvm::outs() << strFormat("Remaining time: %d hours %d minutes\n\n", remHrs,
321 remMin);
322 }
323
324 // Print final accuracy.
325 llvm::outs() << strFormat("\nFinal accuracy: %.4f %% (QUANTIZED)\n\n",
326 accVal * 100);
327
328 // Print total time.
329 unsigned totSec, totMin, totHrs;
330 getDuration(startTime, totSec, totMin, totHrs);
331 llvm::outs() << strFormat("Total time: %d hours %d minutes\n\n", totHrs,
332 totMin);
333
334 // Serialize the tuned output profile.
335 serializeProfilingInfosToYaml(dumpTunedProfileFileOpt,
336 quantConfig.graphPreLowerHash, pInfosTune);
337
338 return 0;
339 }
340