1 /**
2  * Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "Loader.h"
18 #include "LoaderUtils.h"
19 
20 #include "glow/Base/Image.h"
21 #include "glow/Graph/Graph.h"
22 #include "glow/Graph/Nodes.h"
23 #include "glow/Graph/Utils.h"
24 #include "glow/Quantization/Serialization.h"
25 #include "glow/Support/Support.h"
26 
27 #include "llvm/Support/CommandLine.h"
28 #include "llvm/Support/FileSystem.h"
29 #include "llvm/Support/Path.h"
30 #include "llvm/Support/raw_ostream.h"
31 
32 #include <chrono>
33 #include <fstream>
34 #include <memory>
35 #include <sstream>
36 
37 using namespace glow;
38 
39 namespace {
40 
41 /// Model Tuner options
42 llvm::cl::OptionCategory modelTunerCat("Model Tuner Options");
43 
44 llvm::cl::opt<std::string> datasetFileOpt(
45     "dataset-file", llvm::cl::Required,
46     llvm::cl::desc("Path to the dataset description file which contains on "
47                    "each line a file path and an integer label separated by "
48                    "space or comma. The integer labels start with 0 (0,1,..)."
49                    "An example might look like this:\n"
50                    "  image0.png 0   \n"
51                    "  image1.png 13  \n"
52                    "  .............  \n"
53                    "Another example might look like this:\n"
54                    "  image0.png,0,  \n"
55                    "  image1.png,13, \n"
56                    "  .............  \n"),
57     llvm::cl::value_desc("file.txt|file.csv"), llvm::cl::cat(modelTunerCat));
58 
59 llvm::cl::opt<std::string> datasetPathOpt(
60     "dataset-path", llvm::cl::Required,
61     llvm::cl::desc("The path of the directory where the dataset entries are "
62                    "located."),
63     llvm::cl::value_desc("directory path"), llvm::cl::cat(modelTunerCat));
64 
65 llvm::cl::opt<std::string> dumpTunedProfileFileOpt(
66     "dump-tuned-profile",
67     llvm::cl::desc("Output quantization profile obtained after tuning."),
68     llvm::cl::value_desc("profile_output.yaml"), llvm::cl::Required,
69     llvm::cl::cat(modelTunerCat));
70 
71 llvm::cl::opt<float> targetAccuracyOpt(
72     "target-accuracy",
73     llvm::cl::desc("Stop the quantization tuning/calibration procedure when \n"
74                    "the accuracy has reached or surpassed the given value.  \n"
75                    "A float value between 0.0 and 1.0 is expected. If not   \n"
76                    "specified, the tuning will run until completion. "),
77     llvm::cl::value_desc("float"), llvm::cl::Optional, llvm::cl::init(1.0),
78     llvm::cl::cat(modelTunerCat));
79 
80 llvm::cl::opt<unsigned> maxIterPerNodeOpt(
81     "max-iter-per-node",
82     llvm::cl::desc("Maximum number of tuning iterations per node (default 3)."),
83     llvm::cl::value_desc("int"), llvm::cl::Optional, llvm::cl::init(3),
84     llvm::cl::cat(modelTunerCat));
85 
86 llvm::cl::opt<float> accDropSkipOpt(
87     "acc-drop-skip",
88     llvm::cl::desc("The accuracy drop for which the tuning of any node is \n"
89                    "skipped. The default value is 0.05 (5%)."),
90     llvm::cl::value_desc("float"), llvm::cl::Optional, llvm::cl::init(0.05),
91     llvm::cl::cat(modelTunerCat));
92 } // namespace
93 
94 /// Get maximum confidence class (index and value) for the model output.
getOutputClass(Tensor * T)95 static std::pair<unsigned, float> getOutputClass(Tensor *T) {
96   CHECK(T->getElementType() == ElemKind::FloatTy)
97       << "Model output is expected to be float!";
98   auto TH = T->getHandle<float>();
99   float maxVal = TH.raw(0);
100   unsigned maxIdx = 0;
101   for (unsigned idx = 1; idx < TH.size(); ++idx) {
102     if (TH.raw(idx) > maxVal) {
103       maxVal = TH.raw(idx);
104       maxIdx = idx;
105     }
106   }
107   return std::make_pair(maxIdx, maxVal);
108 }
109 
110 /// Function to run the model using the given \p dataset and compute the
111 /// accuracy. If \p quantize flag is given then the model is additionally
112 /// quantized using the profiling information \p pInfos.
runModelAndGetAccuracy(LabeledDataSet & dataset,bool quantize,std::vector<NodeProfilingInfo> & pInfos)113 float runModelAndGetAccuracy(LabeledDataSet &dataset, bool quantize,
114                              std::vector<NodeProfilingInfo> &pInfos) {
115 
116   // Initialize the loader object.
117   Loader loader;
118 
119   // Load the model.
120   loader.loadModel();
121 
122   // Allocate tensors for all placeholders.
123   PlaceholderBindings bindings;
124   bindings.allocate(loader.getModule()->getPlaceholders());
125 
126   // Get input/output placeholders.
127   auto inpPHMap = loader.getInputPlaceholderMap();
128   auto outPHMap = loader.getOutputPlaceholderMap();
129   CHECK(inpPHMap.size() == 1) << "Model is expected to have only 1 input!";
130   CHECK(outPHMap.size() == 1) << "Model is expected to have only 1 output!";
131   Placeholder *input = inpPHMap.begin()->second;
132   Placeholder *output = outPHMap.begin()->second;
133 
134   // Get compilation options.
135   CompilationContext cctx;
136   if (quantize) {
137     // Get compilation options for quantization.
138     cctx = loader.getCompilationContext(QuantizationMode::Quantize);
139     // Force the given profiling infos.
140     cctx.precisionConfig.quantConfig.infos = pInfos;
141   } else {
142     // Get compilation options for running the model as-is.
143     cctx = loader.getCompilationContext(QuantizationMode::None);
144   }
145   cctx.bindings = &bindings;
146 
147   // Compile the function.
148   loader.compile(cctx);
149 
150   // Run the function for all the dataset.
151   size_t correct = 0;
152   for (const auto &data : dataset) {
153     // Read the image and preprocess.
154     Tensor inputImg = readPngImageAndPreprocess(data.first, imageNormMode,
155                                                 imageChannelOrder, imageLayout);
156     auto imgShape = inputImg.getType().dims();
157     Tensor inputTensor =
158         inputImg.getUnowned({1, imgShape[0], imgShape[1], imgShape[2]});
159     updateInputPlaceholders(*cctx.bindings, {input}, {&inputTensor});
160     // Run inference.
161     loader.runInference(*cctx.bindings, 1);
162     // Get output class.
163     auto cls = getOutputClass(cctx.bindings->get(output));
164     if (cls.first == data.second) {
165       ++correct;
166     }
167   }
168 
169   // Compute accuracy.
170   return ((float)correct) / dataset.size();
171 }
172 
173 /// Function to tune a given tensor for the given function with the given
174 /// dataset.
tuneQuantizationForTensor(std::vector<NodeProfilingInfo> & pInfos,LabeledDataSet & dataset,unsigned qIdx,float bestAcc)175 float tuneQuantizationForTensor(std::vector<NodeProfilingInfo> &pInfos,
176                                 LabeledDataSet &dataset, unsigned qIdx,
177                                 float bestAcc) {
178 
179   // Tuning parameters.
180   unsigned maxIterPerNode = maxIterPerNodeOpt;
181   float accDropSkip = accDropSkipOpt;
182 
183   // Backup profiling parameters for this tensor.
184   auto bestTPP = pInfos[qIdx].tensorProfilingParams_;
185 
186   // Get tensor average value.
187   float tensorAvgVal = quantization::getTensorAverageValue(bestTPP);
188 
189   // Get quantization configuration.
190   auto quantConfig = Loader::getQuantizationConfiguration();
191 
192   // Run the tune iterations for this tensor.
193   for (unsigned iterIdx = 0; iterIdx < maxIterPerNode; ++iterIdx) {
194 
195     // Get current min/max range.
196     float rangeMin = pInfos[qIdx].tensorProfilingParams_.min;
197     float rangeMax = pInfos[qIdx].tensorProfilingParams_.max;
198 
199     // Skip tuning for this tensor if range is empty.
200     if (rangeMin == rangeMax) {
201       llvm::outs() << "  Tuning skipped for this tensor: not required\n";
202       break;
203     }
204 
205     // Get testing min/max range by repeatedly shrinking with a factor of 2.
206     float testMin, testMax;
207     if (quantConfig.schema == quantization::Asymmetric) {
208       // Shrink tensor min/max range around average value.
209       testMin = tensorAvgVal - (tensorAvgVal - rangeMin) / 2.0;
210       testMax = tensorAvgVal + (rangeMax - tensorAvgVal) / 2.0;
211     } else if (quantConfig.schema == quantization::Symmetric ||
212                quantConfig.schema == quantization::SymmetricWithUnsigned ||
213                quantConfig.schema == quantization::SymmetricWithPower2Scale) {
214       // Shrink tensor min/max range around 0.
215       float rangeAbsMin = std::abs(rangeMin);
216       float rangeAbsMax = std::abs(rangeMax);
217       float rangeAbs = rangeAbsMax > rangeAbsMin ? rangeAbsMax : rangeAbsMin;
218       testMin = -rangeAbs / 2.0f;
219       testMax = +rangeAbs / 2.0f;
220     } else {
221       llvm_unreachable("Quantization schema not supported!");
222     }
223 
224     // Set the testing range.
225     pInfos[qIdx].tensorProfilingParams_.min = testMin;
226     pInfos[qIdx].tensorProfilingParams_.max = testMax;
227     llvm::outs() << strFormat("  [%d/%d] Testing range = [%.4f, %.4f]\n",
228                               iterIdx + 1, maxIterPerNode, testMin, testMax);
229 
230     // Quantize model and compute accuracy for current params.
231     float currAcc = runModelAndGetAccuracy(dataset, true, pInfos);
232     llvm::outs() << strFormat("  Accuracy = %.4f %%\n", currAcc * 100);
233 
234     // If we obtain EXACTLY the same accuracy then the profiling parameters
235     // of this tensor have no side effects (most probably are not used).
236     if (currAcc == bestAcc) {
237       llvm::outs()
238           << "  Tuning stopped for this tensor: accuracy not improved\n";
239       break;
240     }
241 
242     // If current accuracy is better then save the profiling parameters.
243     if (currAcc > bestAcc) {
244       bestAcc = currAcc;
245       bestTPP = pInfos[qIdx].tensorProfilingParams_;
246     }
247 
248     // If the current accuracy drops below the best accuracy with a given delta
249     // then skip the tuning for the current tensor.
250     bool lastIter = (iterIdx == (maxIterPerNode - 1));
251     if (!lastIter && (currAcc < (bestAcc - accDropSkip))) {
252       llvm::outs() << "  Tuning stopped for this tensor: accuracy dropped more "
253                       "than \"acc-drop-skip\"\n";
254       break;
255     }
256   }
257 
258   // Save best profiling parameters for this tensor.
259   pInfos[qIdx].tensorProfilingParams_ = bestTPP;
260   llvm::outs() << strFormat("Best accuracy : %.4f %%\n", bestAcc * 100);
261   return bestAcc;
262 }
263 
main(int argc,char ** argv)264 int main(int argc, char **argv) {
265 
266   // Parse command line parameters. All the options will be available as part of
267   // the loader object.
268   parseCommandLine(argc, argv);
269 
270   // Get the input profile used for tuning.
271   auto quantConfig = Loader::getQuantizationConfiguration();
272   CHECK(quantConfig.infos.size())
273       << "Input profile not found. Use the -load-profile option!";
274   auto pInfosTune = quantConfig.infos;
275   int tensorQNum = pInfosTune.size();
276 
277   // Read tuning dataset.
278   LabeledDataSet datasetTune =
279       readLabeledDataSet(datasetFileOpt, datasetPathOpt);
280 
281   // Set output stream to unbuffered state to flush every time.
282   llvm::outs().SetUnbuffered();
283 
284   // Compute initial accuracy.
285   llvm::outs() << strFormat("\nComputing initial accuracy ... \n");
286   float accValF = runModelAndGetAccuracy(datasetTune, false, pInfosTune);
287   float accValQ = runModelAndGetAccuracy(datasetTune, true, pInfosTune);
288   llvm::outs() << strFormat("Initial accuracy: %.4f %% (FLOAT)\n",
289                             accValF * 100);
290   llvm::outs() << strFormat("Initial accuracy: %.4f %% (QUANTIZED)\n",
291                             accValQ * 100);
292   llvm::outs() << strFormat("Target  accuracy: %.4f %% (QUANTIZED)\n",
293                             targetAccuracyOpt * 100);
294   llvm::outs() << strFormat("Number of tensors: %d\n\n", tensorQNum);
295 
296   // Perform tuning for all tunable tensors.
297   float accVal = accValQ;
298   auto startTime = getTimeStamp();
299   for (int tensorQIdx = 0; tensorQIdx < tensorQNum; ++tensorQIdx) {
300 
301     // Stop tuning if target accuracy is achieved.
302     if (accVal > targetAccuracyOpt) {
303       llvm::outs() << "Target accuracy achieved! Tuning is stopped ...\n";
304       break;
305     }
306 
307     // Tune the quantization for this tensor.
308     auto tensorName = pInfosTune[tensorQIdx].nodeOutputName_.data();
309     llvm::outs() << strFormat("[%d/%d] Tuning quantization for tensor \"%s\"\n",
310                               tensorQIdx + 1, tensorQNum, tensorName);
311     accVal =
312         tuneQuantizationForTensor(pInfosTune, datasetTune, tensorQIdx, accVal);
313 
314     // Display estimated remaining time and stats.
315     unsigned iterSec = getDurationSec(startTime) / (tensorQIdx + 1);
316     unsigned remSec = iterSec * (tensorQNum - tensorQIdx - 1);
317     unsigned remMin = (remSec / 60) % 60;
318     unsigned remHrs = (remSec / 60) / 60;
319     llvm::outs() << strFormat("Iteration time: %d seconds\n", iterSec);
320     llvm::outs() << strFormat("Remaining time: %d hours %d minutes\n\n", remHrs,
321                               remMin);
322   }
323 
324   // Print final accuracy.
325   llvm::outs() << strFormat("\nFinal accuracy: %.4f %% (QUANTIZED)\n\n",
326                             accVal * 100);
327 
328   // Print total time.
329   unsigned totSec, totMin, totHrs;
330   getDuration(startTime, totSec, totMin, totHrs);
331   llvm::outs() << strFormat("Total time: %d hours %d minutes\n\n", totHrs,
332                             totMin);
333 
334   // Serialize the tuned output profile.
335   serializeProfilingInfosToYaml(dumpTunedProfileFileOpt,
336                                 quantConfig.graphPreLowerHash, pInfosTune);
337 
338   return 0;
339 }
340