1 /**
2  * Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "BackendTestUtils.h"
18 
19 #include "glow/Converter/TypeAToTypeBFunctionConverter.h"
20 #include "glow/ExecutionEngine/ExecutionEngine.h"
21 #include "glow/Graph/Graph.h"
22 #include "glow/IR/IR.h"
23 #include "glow/IR/IRBuilder.h"
24 #include "glow/IR/Instrs.h"
25 #include "glow/Optimizer/GraphOptimizer/GraphOptimizer.h"
26 #include "glow/Quantization/Quantization.h"
27 
28 #include "gtest/gtest.h"
29 
30 #include "llvm/Support/CommandLine.h"
31 
32 #include <future>
33 
34 namespace glow {
35 
36 llvm::cl::OptionCategory backendTestUtilsCat("BackendTestUtils Category");
37 
38 unsigned parCloneCountOpt;
39 llvm::cl::opt<unsigned, /* ExternalStorage */ true> parCloneCountI(
40     "parallel-clone-count",
41     llvm::cl::desc(
42         "Number of times to clone a graph in parallel. Intended to stress test "
43         "different backends. This option is not used by all unit "
44         "tests; for now you must check the test to see if so."),
45     llvm::cl::location(parCloneCountOpt), llvm::cl::Optional, llvm::cl::init(1),
46     llvm::cl::cat(backendTestUtilsCat));
47 
48 bool runDisabledTests;
49 llvm::cl::opt<bool, /* ExternalStorage */ true> runDisabledTestsI(
50     "run-disabled-tests",
51     llvm::cl::desc("If set, disabled tests will not be skipped."),
52     llvm::cl::location(runDisabledTests), llvm::cl::Optional,
53     llvm::cl::init(false), llvm::cl::cat(backendTestUtilsCat));
54 
55 using llvm::cast;
56 
57 namespace {
58 // Helpers for creating and intializing placeholders from tensors.
createPlaceholder(Module & mod,PlaceholderBindings & bindings,Tensor * tensor,llvm::StringRef name,const std::string layout=ANY_LAYOUT)59 static Placeholder *createPlaceholder(Module &mod,
60                                       PlaceholderBindings &bindings,
61                                       Tensor *tensor, llvm::StringRef name,
62                                       const std::string layout = ANY_LAYOUT) {
63   auto *P = mod.createPlaceholder(tensor->getElementType(), tensor->dims(),
64                                   name, false, layout);
65   auto *PTensor = bindings.allocate(P);
66   PTensor->assign(tensor);
67 
68   return P;
69 }
70 
createQuantizedPlaceholder(Module & mod,PlaceholderBindings & bindings,Tensor * tensor,float scale,int32_t offset,llvm::StringRef name)71 static Placeholder *createQuantizedPlaceholder(Module &mod,
72                                                PlaceholderBindings &bindings,
73                                                Tensor *tensor, float scale,
74                                                int32_t offset,
75                                                llvm::StringRef name) {
76   auto *P = mod.createPlaceholder(tensor->getElementType(), tensor->dims(),
77                                   scale, offset, name, false);
78   auto *PTensor = bindings.allocate(P);
79   PTensor->assign(tensor);
80 
81   return P;
82 }
83 
84 /// Create and initialize a function using the argument \p createAndInitFunction
85 /// then run the function in profiling mode to get the profiling parameters.
86 /// \p count is the number of times to clone the Function inside itself before
87 /// profiling. \returns the profiling parameters for all the function nodes.
88 static std::vector<NodeProfilingInfo>
profileAndGetNodeProfilingInfo(CreateAndInitFunction createAndInitFunction,unsigned count)89 profileAndGetNodeProfilingInfo(CreateAndInitFunction createAndInitFunction,
90                                unsigned count) {
91   LoweredInfoMap loweredMapForProf;
92   PlaceholderBindings pBindings;
93   // Note: deviceMemory = 0 is a signal to use the defaultMemory.
94   ExecutionEngine PEE{"Interpreter", /* deviceMemory */ 0,
95                       /* ignoreUserDeviceConfig */ true};
96   auto FT = createAndInitFunction(pBindings, PEE);
97   CompilationContext cctx{&pBindings, &loweredMapForProf};
98 
99   // Clone the number of times as requested to match the Function that will be
100   // quantized.
101   cloneFunInsideFun(FT, &pBindings, cctx, count);
102   cctx.precisionConfig.quantMode = QuantizationMode::Profile;
103   PEE.compile(cctx);
104   PEE.run(pBindings);
105 
106   // We get the new function using front() because the original function was
107   // deleted as part of the Partitioner quantization flow.
108   return quantization::generateNodeProfilingInfos(
109       pBindings, PEE.getModule().getFunctions().front(), loweredMapForProf);
110 }
111 
112 /// Helper that sets up and \returns a pair of configs for both interpreter and
113 /// backend being tested.
114 static std::pair<CompilationContext, CompilationContext>
setupInterpAndBackendConfigs(Function * IF,ExecutionEngine & IEE,PlaceholderBindings & iBindings,LoweredInfoMap & ILIM,PlaceholderBindings & bBindings,LoweredInfoMap & BLIM,ElemKind interpElemKind,ElemKind backendElemKind,quantization::Schema schema,bool convertToRowwiseQuantization,CreateAndInitFunction createAndInitFunction,ElemKind biasElemKind,bool forceFP16AccumSLS,PrecisionConfiguration::Float16Format float16Format,unsigned count,bool convertToChannelwiseQuantization,bool skipQuantizeFCBias)115 setupInterpAndBackendConfigs(
116     Function *IF, ExecutionEngine &IEE, PlaceholderBindings &iBindings,
117     LoweredInfoMap &ILIM, PlaceholderBindings &bBindings, LoweredInfoMap &BLIM,
118     ElemKind interpElemKind, ElemKind backendElemKind,
119     quantization::Schema schema, bool convertToRowwiseQuantization,
120     CreateAndInitFunction createAndInitFunction, ElemKind biasElemKind,
121     bool forceFP16AccumSLS, PrecisionConfiguration::Float16Format float16Format,
122     unsigned count, bool convertToChannelwiseQuantization,
123     bool skipQuantizeFCBias) {
124   CompilationContext cctxI{&iBindings, &ILIM};
125   CompilationContext cctxB{&bBindings, &BLIM};
126   PrecisionConfiguration &precConfigI = cctxI.precisionConfig;
127   PrecisionConfiguration &precConfigB = cctxB.precisionConfig;
128 
129   if (isQuantizedElemKind(interpElemKind) ||
130       isQuantizedElemKind(backendElemKind)) {
131     // If either interp or backend need to be quantized then we need to profile
132     // and get quantization infos.
133     if (isQuantizedElemKind(interpElemKind)) {
134       // Note: We only do parallel cloning for the backend, so always use count
135       // of 1 here.
136       auto NQII =
137           profileAndGetNodeProfilingInfo(createAndInitFunction, /* count */ 1);
138 
139       precConfigI.quantMode = QuantizationMode::Quantize;
140       precConfigI.quantConfig.infos = NQII;
141       precConfigI.quantConfig.enableRowwise = convertToRowwiseQuantization;
142       precConfigI.quantConfig.enableChannelwise =
143           convertToChannelwiseQuantization;
144       precConfigI.quantConfig.schema = schema;
145       precConfigI.quantConfig.precision = interpElemKind;
146       precConfigI.quantConfig.assertAllNodesQuantized = true;
147       precConfigI.quantConfig.precisionBias = biasElemKind;
148       precConfigI.quantConfig.skipQuantizeFCBias = skipQuantizeFCBias;
149     }
150 
151     if (isQuantizedElemKind(backendElemKind)) {
152       // Always clone count times here. This matches the Function the backend
153       // will quantize.
154       auto NQIB = profileAndGetNodeProfilingInfo(createAndInitFunction, count);
155 
156       precConfigB.quantMode = QuantizationMode::Quantize;
157       precConfigB.quantConfig.infos = NQIB;
158       precConfigB.quantConfig.enableRowwise = convertToRowwiseQuantization;
159       precConfigB.quantConfig.enableChannelwise =
160           convertToChannelwiseQuantization;
161       precConfigB.quantConfig.schema = schema;
162       precConfigB.quantConfig.precision = backendElemKind;
163       precConfigB.quantConfig.assertAllNodesQuantized = true;
164       precConfigB.quantConfig.precisionBias = biasElemKind;
165       precConfigB.quantConfig.skipQuantizeFCBias = skipQuantizeFCBias;
166     }
167   }
168 
169   // For now if the ElemKind is FP16 then we use Float16Ty, UInt8FusedFP16QTy.
170   precConfigI.convertToFP16 = interpElemKind == ElemKind::Float16Ty;
171   precConfigI.convertFusedToFP16 = interpElemKind == ElemKind::Float16Ty;
172   precConfigI.forceFP16AccumSLS = forceFP16AccumSLS;
173   precConfigB.convertToFP16 = backendElemKind == ElemKind::Float16Ty;
174   precConfigB.convertFusedToFP16 = backendElemKind == ElemKind::Float16Ty;
175   precConfigB.forceFP16AccumSLS = forceFP16AccumSLS;
176 
177   return std::make_pair(cctxI, cctxB);
178 }
179 } // namespace
180 
dispatchInference(const std::string & fname,runtime::HostManager * hostManager,ExecutionContext & context,unsigned concurrentRequestsOpt)181 void dispatchInference(const std::string &fname,
182                        runtime::HostManager *hostManager,
183                        ExecutionContext &context,
184                        unsigned concurrentRequestsOpt) {
185   // If additional requests are desired, setup additional contexts.
186   std::vector<std::unique_ptr<ExecutionContext>> contexts;
187   std::unique_ptr<ExecutionContext> originalContextPtr(&context);
188   contexts.push_back(std::move(originalContextPtr));
189   if (concurrentRequestsOpt > 1) {
190     // Clone the placeholder bindings into a new executionContext.
191     for (unsigned i = 0, max = concurrentRequestsOpt - 1; i < max; i++) {
192       std::unique_ptr<ExecutionContext> newContext =
193           glow::make_unique<ExecutionContext>(
194               glow::make_unique<PlaceholderBindings>(
195                   context.getPlaceholderBindings()->clone()));
196       contexts.push_back(std::move(newContext));
197     }
198   }
199   std::vector<std::promise<void>> promises(concurrentRequestsOpt);
200   std::vector<std::future<void>> futures;
201   for (auto &promise : promises) {
202     futures.push_back(promise.get_future());
203   }
204   for (unsigned i = 0; i < concurrentRequestsOpt; i++) {
205     hostManager->runNetwork(fname, std::move(contexts[i]),
206                             [&contexts, &promises,
207                              i](runtime::RunIdentifierTy, Error err,
208                                 std::unique_ptr<ExecutionContext> contextPtr) {
209                               contexts[i] = std::move(contextPtr);
210                               // Expect no errors.
211                               EXIT_ON_ERR(std::move(err));
212                               promises[i].set_value();
213                             });
214   }
215 
216   for (auto &future : futures) {
217     future.wait();
218   }
219 
220   for (auto &c : contexts) {
221     c->getPlaceholderBindings()->ensureOnHost();
222   }
223   // Release the original context passed in by reference so we don't free it.
224   contexts[0].release();
225 }
226 
227 /// Helper that iterates over all of the Placeholders from the function \p F
228 /// and converts the Tensors found in \p bindings to the same type as the
229 /// Placeholders if necessary.
convertBindingsToCorrectType(Function * F,PlaceholderBindings & bindings)230 static void convertBindingsToCorrectType(Function *F,
231                                          PlaceholderBindings &bindings) {
232   PlaceholderList PHs = F->findPlaceholders();
233   for (Placeholder *PH : PHs) {
234     Tensor *T = bindings.get(PH);
235     TypeRef newTy = PH->getType();
236     if (T->getType().isEqual(newTy)) {
237       continue;
238     }
239     // For input placeholders convert tensor type and values.
240     // For output placeholders convert only the tensor type.
241     if (isInput(PH, *F)) {
242       ElemKind newK = newTy->getElementType();
243       if (isQuantizedElemKind(newK)) {
244         Tensor QT = quantization::quantizeTensor(
245             *T, {newTy->getScale(), newTy->getOffset()}, newK);
246         T->assign(&QT);
247       } else {
248         T->convertToType(newK);
249       }
250     } else {
251       T->reset(*newTy);
252     }
253   }
254 }
255 
256 /// Helper to get a float copy of a Tensor \p T if needed.
convertToFloatIfNecessary(Tensor & T)257 static Tensor convertToFloatIfNecessary(Tensor &T) {
258   const ElemKind srcK = T.getType().getElementType();
259   if (srcK == ElemKind::FloatTy) {
260     return T.clone();
261   }
262   if (isQuantizedElemKind(srcK)) {
263     return quantization::dequantizeTensor(T, ElemKind::FloatTy);
264   }
265   return T.getCopyConvertedToType(ElemKind::FloatTy);
266 }
267 
compareAgainstInterpreter(llvm::StringRef backendName,CreateAndInitFunction createAndInitFunction,ElemKind interpElemKind,ElemKind backendElemKind,float allowedError,unsigned count,bool convertToRowwiseQuantization,quantization::Schema schema,ElemKind biasElemKind,bool forceFP16AccumSLS,PrecisionConfiguration::Float16Format float16Format,bool convertToChannelwiseQuantization,bool skipQuantizeFCBias)268 void compareAgainstInterpreter(
269     llvm::StringRef backendName, CreateAndInitFunction createAndInitFunction,
270     ElemKind interpElemKind, ElemKind backendElemKind, float allowedError,
271     unsigned count, bool convertToRowwiseQuantization,
272     quantization::Schema schema, ElemKind biasElemKind, bool forceFP16AccumSLS,
273     PrecisionConfiguration::Float16Format float16Format,
274     bool convertToChannelwiseQuantization, bool skipQuantizeFCBias) {
275   // Note: deviceMemory = 0 is a signal to use the defaultMemory.
276   ExecutionEngine IEE{"Interpreter", /* deviceMemory */ 0,
277                       /* ignoreUserDeviceConfig */ true};
278   ExecutionEngine BEE{backendName};
279   PlaceholderBindings iBindings, bBindings;
280 
281   LOG(INFO) << "Comparing Interpreter with precision "
282             << Type::getElementName(interpElemKind).str() << " against "
283             << backendName.str() << " with precision "
284             << Type::getElementName(backendElemKind).str() << " with Bias "
285             << (skipQuantizeFCBias ? "unquantized"
286                                    : Type::getElementName(biasElemKind).str())
287             << " with FP16 AccumulationSLS " << forceFP16AccumSLS;
288 
289   // Create the same network on the interpreter and the backend being tested.
290   FunctionTensorPair IFT = createAndInitFunction(iBindings, IEE);
291   FunctionTensorPair BFT = createAndInitFunction(bBindings, BEE);
292 
293   Function *IF = IFT.first;
294 
295   // Set up the configs for interpreter and backend. If one or both functions
296   // will be quantized, then gather a profile the graph on the interpreter, and
297   // then quantize the Functions as requested.
298   LoweredInfoMap ILIM, BLIM;
299   auto configs = setupInterpAndBackendConfigs(
300       IF, IEE, iBindings, ILIM, bBindings, BLIM, interpElemKind,
301       backendElemKind, schema, convertToRowwiseQuantization,
302       createAndInitFunction, biasElemKind, forceFP16AccumSLS, float16Format,
303       count, convertToChannelwiseQuantization, skipQuantizeFCBias);
304   CompilationContext &cctxI = configs.first;
305   CompilationContext &cctxB = configs.second;
306 
307   // Skip conversion for rowwise quantized tests as they are a special case
308   // which don't fit cleanly here -- e.g. RWQ-SLS has FloatTy outputs.
309   if (!convertToRowwiseQuantization) {
310     // We want to compare the ops themselves and not see differences in
311     // conversion, so fold ElemKind conversion nodes into IO.
312     cctxI.optimizationOpts.foldElemKindConversionIntoIO = true;
313     cctxB.optimizationOpts.foldElemKindConversionIntoIO = true;
314   }
315 
316   // Clone the Function inside itself many times if desired.
317   std::unordered_set<Tensor *> resultTensors =
318       cloneFunInsideFun(BFT, &bBindings, cctxB, count);
319   assert(resultTensors.size() == count &&
320          "Should get the same number of Tensors back as count.");
321 
322   IEE.compile(cctxI);
323   BEE.compile(cctxB);
324 
325   // Again skip rowwise quantization as before.
326   if (!convertToRowwiseQuantization) {
327     // Now that we have compiled, precision transformation has occurred. Now
328     // convert all mismatches for Placeholders given their original bindings.
329     convertBindingsToCorrectType(IEE.getSingleFunctionFromModule(), iBindings);
330     convertBindingsToCorrectType(BEE.getSingleFunctionFromModule(), bBindings);
331   }
332 
333   IEE.run(iBindings);
334   BEE.run(bBindings);
335 
336   // Compare each of our result tensors to the original. Always convert back to
337   // float if necessary, as allowed error is expected to compare float.
338   Tensor finalIT = convertToFloatIfNecessary(*IFT.second);
339   for (Tensor *T : resultTensors) {
340     Tensor finalBT = convertToFloatIfNecessary(*T);
341     EXPECT_TRUE(finalIT.isEqual(finalBT, allowedError, /* verbose */ true));
342   }
343 
344   // Additionally check that each of the results from the parallel cloned
345   // Functions are bitwise equal.
346   auto it = resultTensors.begin();
347   Tensor *firstResult = *it;
348   for (it++; it != resultTensors.end(); it++) {
349     EXPECT_TRUE(firstResult->isBitwiseEqual(**it));
350   }
351 }
352 
cloneFunInsideFun(FunctionTensorPair FTP,PlaceholderBindings * bindings,CompilationContext & cctx,unsigned count)353 std::unordered_set<Tensor *> cloneFunInsideFun(FunctionTensorPair FTP,
354                                                PlaceholderBindings *bindings,
355                                                CompilationContext &cctx,
356                                                unsigned count) {
357   Function *origF = FTP.first;
358 
359   // Always save the original Function's Tensor, which we will keep around.
360   std::unordered_set<Tensor *> resultTensors;
361   resultTensors.insert(FTP.second);
362 
363   // Nothing to do if we just want the one.
364   if (count == 1) {
365     return resultTensors;
366   }
367 
368   Module *mod = origF->getParent();
369 
370   // Clone the original Function to repeatedly add it to the original.
371   auto *cloneF = origF->clone("single_clone");
372 
373   // We keep the original Function, then clone/add count-1 more.
374   for (size_t i = 1; i < count; i++) {
375     // Clone the clone, and then add all the new nodes to the original function.
376     auto *tmpF = cloneF->clone("tmp" + std::to_string(i));
377     std::unordered_set<Node *> clonedNodes;
378     bool foundSaveNode = false;
379     for (auto &N : tmpF->getNodes()) {
380       clonedNodes.insert(&N);
381 
382       // For every Node we add, check if it uses a Placeholder node, and if so
383       // clone it in the Module so that CSE doesn't undo all our hard work.
384       for (size_t j = 0, f = N.getNumInputs(); j < f; j++) {
385         Placeholder *origPH = llvm::dyn_cast<Placeholder>(N.getNthInput(j));
386         if (!origPH) {
387           continue;
388         }
389 
390         // Clone the Placeholder, allocate it in the bindings, and replace the
391         // usage of the original node to point to the clone.
392         Placeholder *clonePH = mod->createPlaceholder(
393             origPH->getType(), origPH->getName(), origPH->isTraining());
394         Tensor *oldT = bindings->get(origPH);
395         assert(oldT);
396         Tensor *newT = bindings->allocate(clonePH);
397         newT->assign(oldT);
398         N.setNthInput(j, clonePH);
399 
400         // Save the result Tensors to return so we can compare the results of
401         // all of our clones.
402         if (llvm::isa<SaveNode>(N)) {
403           assert(!foundSaveNode &&
404                  "Can only handle Functions with a single SaveNode.");
405           foundSaveNode = true;
406           resultTensors.insert(newT);
407         }
408       }
409     }
410     for (auto &N : clonedNodes) {
411       origF->takeOwnershipOfNode(N);
412     }
413     mod->eraseFunction(tmpF);
414   }
415   // Now erase the clone we used to copy in, as it's no longer needed.
416   mod->eraseFunction(cloneF);
417 
418   // Finally, duplicate all of the node profiling infos with the new expected
419   // clone's name so that the cloned copies will find the same profiling info
420   // as the original node if being quantized.
421   auto &origInfos = cctx.precisionConfig.quantConfig.infos;
422   origInfos.reserve(count * origInfos.size());
423   std::vector<NodeProfilingInfo> newInfos;
424   newInfos.reserve((count - 1) * origInfos.size());
425   for (const auto &PI : origInfos) {
426     const size_t colonIdx = PI.nodeOutputName_.find(":");
427     assert(colonIdx != std::string::npos && "Name should always contain ':'");
428     for (size_t i = 1; i < count; i++) {
429       std::string newName(PI.nodeOutputName_);
430       // Cloned nodes end up with the original name plus the count number
431       // appended to their name due to uniquing. Replicate the same thing.
432       newName.insert(colonIdx, std::to_string(i));
433       newInfos.emplace_back(newName, PI.tensorProfilingParams_);
434     }
435   }
436   origInfos.insert(origInfos.end(), newInfos.begin(), newInfos.end());
437 
438   return resultTensors;
439 }
440 
countNodeKind(Function * F,Kinded::Kind kind)441 unsigned countNodeKind(Function *F, Kinded::Kind kind) {
442   unsigned count = 0;
443   for (auto &n : F->getNodes()) {
444     if (n.getKind() == kind) {
445       count++;
446     }
447   }
448   return count;
449 }
450 
inferIntLookupTableNet(Tensor * input,Tensor * out,llvm::ArrayRef<int8_t> table,llvm::StringRef kind)451 void inferIntLookupTableNet(Tensor *input, Tensor *out,
452                             llvm::ArrayRef<int8_t> table,
453                             llvm::StringRef kind) {
454   PlaceholderBindings bindings;
455   ExecutionEngine EE(kind);
456   auto &mod = EE.getModule();
457   Function *F = mod.createFunction("main");
458   auto outTy = mod.uniqueType(ElemKind::Int8QTy, {(dim_t)input->size()}, 3, 3);
459   auto var = createQuantizedPlaceholder(mod, bindings, input,
460                                         input->getType().getScale(),
461                                         input->getType().getOffset(), "var");
462   auto *lookupTable = F->createIntLookupTable("lookuptable", var, table, outTy);
463   auto *result = F->createSave("ret", lookupTable);
464   auto *resultTensor = bindings.allocate(result->getPlaceholder());
465 
466   EE.compile(CompilationMode::Infer);
467   bindings.allocate(mod.getPlaceholders());
468 
469   updateInputPlaceholders(bindings, {var}, {input});
470   EE.run(bindings);
471   out->assign(resultTensor);
472 }
473 
inferConvNet(Tensor * inputs,Tensor * filter,Tensor * bias,Tensor * out,llvm::StringRef kind)474 void inferConvNet(Tensor *inputs, Tensor *filter, Tensor *bias, Tensor *out,
475                   llvm::StringRef kind) {
476   PlaceholderBindings bindings;
477   ExecutionEngine EE(kind);
478   auto &mod = EE.getModule();
479   Function *F = mod.createFunction("main");
480   Placeholder *inputP;
481   Placeholder *filterP;
482   Placeholder *biasP;
483   Placeholder *outP;
484   TypeRef OT;
485   if (inputs->getType().isQuantizedType()) {
486     auto &outType = out->getType();
487     auto &inType = inputs->getType();
488     auto &filterType = filter->getType();
489     auto &biasType = bias->getType();
490     inputP = createQuantizedPlaceholder(
491         mod, bindings, inputs, inType.getScale(), inType.getOffset(), "inputP");
492     filterP =
493         createQuantizedPlaceholder(mod, bindings, filter, filterType.getScale(),
494                                    filterType.getOffset(), "filterP");
495     biasP = createQuantizedPlaceholder(mod, bindings, bias, biasType.getScale(),
496                                        biasType.getOffset(), "biasP");
497     outP = createQuantizedPlaceholder(mod, bindings, out, outType.getScale(),
498                                       outType.getOffset(), "outP");
499     OT = F->getParent()->uniqueType(out->getElementType(), out->dims(),
500                                     outType.getScale(), outType.getOffset());
501   } else {
502     inputP = createPlaceholder(mod, bindings, inputs, "inputP");
503     filterP = createPlaceholder(mod, bindings, filter, "filterP");
504     biasP = createPlaceholder(mod, bindings, bias, "biasP");
505     outP = createPlaceholder(mod, bindings, out, "outP");
506     OT = F->getParent()->uniqueType(out->getElementType(), out->dims());
507   }
508   auto *conv = F->createConv("conv", inputP, filterP, biasP, OT, 5, 3, 4, 1);
509   auto *result = F->createSave("ret", conv, outP);
510   auto *resultTensor = bindings.get(result->getPlaceholder());
511 
512   EE.compile(CompilationMode::Infer);
513 
514   updateInputPlaceholders(bindings, {inputP, filterP, biasP},
515                           {inputs, filter, bias});
516   EE.run(bindings);
517   out->assign(resultTensor);
518 }
519 
trainConvNet(Tensor * inputs,Tensor * kernel1,Tensor * bias1,Tensor * kernel2,Tensor * bias2,Tensor * selected,llvm::ArrayRef<dim_t> shape1,llvm::ArrayRef<dim_t> shape2,Tensor * out,llvm::StringRef kind)520 void trainConvNet(Tensor *inputs, Tensor *kernel1, Tensor *bias1,
521                   Tensor *kernel2, Tensor *bias2, Tensor *selected,
522                   llvm::ArrayRef<dim_t> shape1, llvm::ArrayRef<dim_t> shape2,
523                   Tensor *out, llvm::StringRef kind) {
524   ExecutionEngine EET(kind);
525   ExecutionEngine EEI(kind);
526   std::vector<ExecutionEngine *> engines;
527   engines.push_back(&EEI);
528   engines.push_back(&EET);
529   TrainingConfig TC;
530   PlaceholderBindings bindings, inferBindings, trainingBindings;
531 
532   // This variable records the number of the next sample to be used for
533   // training.
534   size_t sampleCounter = 0;
535 
536   TC.learningRate = 0.03;
537   TC.momentum = 0.3;
538   TC.L2Decay = 0.01;
539   Function *F;
540   Placeholder *var1, *var2;
541   for (auto *EE : engines) {
542     auto &mod = EE->getModule();
543     F = mod.createFunction("main");
544     var1 = createPlaceholder(mod, bindings, inputs, "var1");
545     var2 = createPlaceholder(mod, bindings, selected, "var2");
546     auto *conv1 = F->createConv(bindings, "conv1", var1, 3, {5, 3}, {2, 1},
547                                 {2, 1, 2, 1}, 1);
548     bindings.get(cast<Placeholder>(conv1->getFilter()))->assign(kernel1);
549     bindings.get(cast<Placeholder>(conv1->getBias()))->assign(bias1);
550     auto *reshape1 = F->createReshape("reshape1", conv1, shape1);
551     auto *conv2 = F->createConv(bindings, "conv2", reshape1, 2, 2, 2, 0, 1);
552     bindings.get(cast<Placeholder>(conv2->getFilter()))->assign(kernel2);
553     bindings.get(cast<Placeholder>(conv2->getBias()))->assign(bias2);
554     auto *reshape2 = F->createReshape("reshape2", conv2, shape2);
555     auto *softmax = F->createSoftMax("softmax", reshape2, var2);
556     F->createSave("ret", softmax);
557   }
558 
559   auto *TF = glow::differentiate(F, TC);
560   auto tfName = TF->getName();
561   auto fName = F->getName();
562   EET.compile(CompilationMode::Train);
563   trainingBindings.allocate(EET.getModule().getPlaceholders());
564   inferBindings.allocate(EEI.getModule().getPlaceholders());
565   bindings.copyTrainableWeightsTo(trainingBindings);
566   auto *res =
567       inferBindings.get(EEI.getModule().getPlaceholderByNameSlow("ret"));
568 
569   runBatch(EET, trainingBindings, 8, sampleCounter, {var1, var2},
570            {inputs, selected}, tfName);
571   trainingBindings.copyTrainableWeightsTo(inferBindings);
572   EEI.compile(CompilationMode::Infer);
573   var1 = inferBindings.getPlaceholderByNameSlow("var1");
574   var2 = inferBindings.getPlaceholderByNameSlow("var2");
575   updateInputPlaceholders(inferBindings, {var1, var2}, {inputs, selected});
576   EEI.run(inferBindings, fName);
577   out->assign(res);
578 }
579 
inferLocalResponseNormalizationNet(Tensor * inputs,Tensor * out,llvm::StringRef kind)580 void inferLocalResponseNormalizationNet(Tensor *inputs, Tensor *out,
581                                         llvm::StringRef kind) {
582   PlaceholderBindings bindings;
583   ExecutionEngine EE(kind);
584   auto &mod = EE.getModule();
585   Function *F = mod.createFunction("main");
586   auto *var = createPlaceholder(mod, bindings, inputs, "var");
587   auto *lrn = F->createLocalResponseNormalization("lrn", var, 5, 3.0, 0.5, 1.5);
588   auto *result = F->createSave("ret", lrn);
589   auto *resultTensor = bindings.allocate(result->getPlaceholder());
590 
591   EE.compile(CompilationMode::Infer);
592 
593   updateInputPlaceholders(bindings, {var}, {inputs});
594   EE.run(bindings);
595   out->assign(resultTensor);
596 }
597 
trainLocalResponseNormalizationNet(Tensor * inputs,Tensor * weights,Tensor * bias,Tensor * selected,llvm::ArrayRef<dim_t> shape1,llvm::ArrayRef<dim_t> shape2,Tensor * out,llvm::StringRef kind)598 void trainLocalResponseNormalizationNet(Tensor *inputs, Tensor *weights,
599                                         Tensor *bias, Tensor *selected,
600                                         llvm::ArrayRef<dim_t> shape1,
601                                         llvm::ArrayRef<dim_t> shape2,
602                                         Tensor *out, llvm::StringRef kind) {
603   PlaceholderBindings bindings, trainingBindings;
604   ExecutionEngine EET(kind);
605   ExecutionEngine EEI(kind);
606   std::vector<ExecutionEngine *> engines{&EEI, &EET};
607   TrainingConfig TC;
608 
609   // This variable records the number of the next sample to be used for
610   // training.
611   size_t sampleCounter = 0;
612 
613   TC.learningRate = 0.06;
614   TC.momentum = 0.1;
615   TC.L2Decay = 0.01;
616   Placeholder *var1, *var2;
617   std::string fName;
618   for (auto *EE : engines) {
619     auto &mod = EE->getModule();
620     Function *F = mod.createFunction("main");
621     fName = F->getName();
622     var1 = createPlaceholder(mod, bindings, inputs, "var1");
623     var2 = createPlaceholder(mod, bindings, selected, "var2");
624     auto *fc = F->createFullyConnected(bindings, "fc", var1, bias->dims()[0]);
625     bindings.get(cast<Placeholder>(fc->getWeights()))->assign(weights);
626     bindings.get(cast<Placeholder>(fc->getBias()))->assign(bias);
627     auto *reshape1 = F->createReshape("reshape1", fc, shape1);
628     auto *lrn =
629         F->createLocalResponseNormalization("lrn", reshape1, 2, 2.0, 0.5, 1.0);
630     auto *reshape2 = F->createReshape("reshape2", lrn, shape2);
631     auto *softmax = F->createSoftMax("softmax", reshape2, var2);
632     auto *result = F->createSave("ret", softmax);
633     bindings.allocate(result->getPlaceholder());
634   }
635   auto *TF = glow::differentiate(EET.getModule().getFunction(fName), TC);
636   auto tfName = TF->getName();
637   EET.compile(CompilationMode::Train);
638   trainingBindings.allocate(EET.getModule().getPlaceholders());
639   bindings.copyTrainableWeightsTo(trainingBindings);
640   bindings.clear();
641   bindings.allocate(EEI.getModule().getPlaceholders());
642 
643   runBatch(EET, trainingBindings, 8, sampleCounter, {var1, var2},
644            {inputs, selected}, tfName);
645   trainingBindings.copyTrainableWeightsTo(bindings);
646   var1 = bindings.getPlaceholderByNameSlow("var1");
647   var2 = bindings.getPlaceholderByNameSlow("var2");
648   EEI.compile(CompilationMode::Infer);
649 
650   runBatch(EEI, bindings, 1, sampleCounter, {var1, var2}, {inputs, selected});
651   out->assign(bindings.get(bindings.getPlaceholderByNameSlow("ret")));
652 }
653 
trainAvgPoolNet(Tensor * inputs,Tensor * weights,Tensor * bias,Tensor * selected,llvm::ArrayRef<dim_t> shape1,llvm::ArrayRef<dim_t> shape2,Tensor * out,llvm::StringRef kind)654 void trainAvgPoolNet(Tensor *inputs, Tensor *weights, Tensor *bias,
655                      Tensor *selected, llvm::ArrayRef<dim_t> shape1,
656                      llvm::ArrayRef<dim_t> shape2, Tensor *out,
657                      llvm::StringRef kind) {
658   ExecutionEngine EET(kind);
659   ExecutionEngine EEI(kind);
660   std::vector<ExecutionEngine *> engines{&EEI, &EET};
661   TrainingConfig TC;
662   PlaceholderBindings bindings, trainingBindings;
663 
664   // This variable records the number of the next sample to be used for
665   // training.
666   size_t sampleCounter = 0;
667 
668   TC.learningRate = 0.01;
669   TC.momentum = 0.4;
670   TC.L2Decay = 0.01;
671   Placeholder *var1, *var2;
672   std::string fName;
673   for (auto *EE : engines) {
674     auto &mod = EE->getModule();
675     Function *F = mod.createFunction("main");
676     fName = F->getName();
677     var1 = createPlaceholder(mod, bindings, inputs, "var1");
678     var2 = createPlaceholder(mod, bindings, selected, "var2");
679     auto *fc = F->createFullyConnected(bindings, "fc", var1, bias->dims()[0]);
680     bindings.get(cast<Placeholder>(fc->getWeights()))->assign(weights);
681     bindings.get(cast<Placeholder>(fc->getBias()))->assign(bias);
682     auto *reshape1 = F->createReshape("reshape1", fc, shape1);
683     auto *pool = F->createAvgPool("pool", reshape1, 2, 2, 0);
684     auto *reshape2 = F->createReshape("reshape2", pool, shape2);
685     auto *softmax = F->createSoftMax("softmax", reshape2, var2);
686     auto *result = F->createSave("ret", softmax);
687     bindings.allocate(result->getPlaceholder());
688   }
689   auto *TF = glow::differentiate(EET.getModule().getFunction("main"), TC);
690   auto tfName = TF->getName();
691   EET.compile(CompilationMode::Train);
692   trainingBindings.allocate(EET.getModule().getPlaceholders());
693   bindings.copyTrainableWeightsTo(trainingBindings);
694   bindings.clear();
695   bindings.allocate(EEI.getModule().getPlaceholders());
696 
697   runBatch(EET, trainingBindings, 10, sampleCounter, {var1, var2},
698            {inputs, selected}, tfName);
699   trainingBindings.copyTrainableWeightsTo(bindings);
700   var1 = bindings.getPlaceholderByNameSlow("var1");
701   var2 = bindings.getPlaceholderByNameSlow("var2");
702   EEI.compile(CompilationMode::Infer);
703 
704   updateInputPlaceholders(bindings, {var1, var2}, {inputs, selected});
705   EEI.run(bindings);
706   out->assign(bindings.get(bindings.getPlaceholderByNameSlow("ret")));
707 }
708 
trainMaxPoolNet(Tensor * inputs,Tensor * weights,Tensor * bias,Tensor * selected,llvm::ArrayRef<dim_t> shape1,llvm::ArrayRef<dim_t> shape2,Tensor * out,llvm::StringRef kind)709 void trainMaxPoolNet(Tensor *inputs, Tensor *weights, Tensor *bias,
710                      Tensor *selected, llvm::ArrayRef<dim_t> shape1,
711                      llvm::ArrayRef<dim_t> shape2, Tensor *out,
712                      llvm::StringRef kind) {
713   ExecutionEngine EET(kind);
714   ExecutionEngine EEI(kind);
715   std::vector<ExecutionEngine *> engines;
716   engines.push_back(&EEI);
717   engines.push_back(&EET);
718   TrainingConfig TC;
719   PlaceholderBindings bindings, inferBindings, trainingBindings;
720 
721   // This variable records the number of the next sample to be used for
722   // training.
723   size_t sampleCounter = 0;
724 
725   TC.learningRate = 0.03;
726   TC.momentum = 0.3;
727   TC.L2Decay = 0.003;
728   Function *F;
729   Placeholder *var1, *var2;
730   for (auto *EE : engines) {
731     bindings.clear();
732     auto &mod = EE->getModule();
733     F = mod.createFunction("main");
734     var1 = createPlaceholder(mod, bindings, inputs, "var1");
735     var2 = createPlaceholder(mod, bindings, selected, "var2");
736     auto *fc = F->createFullyConnected(bindings, "fc", var1, bias->dims()[0]);
737     bindings.get(cast<Placeholder>(fc->getWeights()))->assign(weights);
738     bindings.get(cast<Placeholder>(fc->getBias()))->assign(bias);
739     auto *reshape1 = F->createReshape("reshape1", fc, shape1);
740     auto *pool = F->createMaxPool("pool", reshape1, 5, 3, 4);
741     auto *reshape2 = F->createReshape("reshape2", pool->getResult(), shape2);
742     auto *softmax = F->createSoftMax("softmax", reshape2, var2);
743     F->createSave("ret", softmax);
744   }
745   auto *TF = glow::differentiate(F, TC);
746   auto fName = F->getName();
747   auto tfName = TF->getName();
748   EET.compile(CompilationMode::Train);
749   trainingBindings.allocate(EET.getModule().getPlaceholders());
750   inferBindings.allocate(EEI.getModule().getPlaceholders());
751   bindings.copyTrainableWeightsTo(trainingBindings);
752   auto *res =
753       inferBindings.get(EEI.getModule().getPlaceholderByNameSlow("ret"));
754 
755   runBatch(EET, trainingBindings, 7, sampleCounter, {var1, var2},
756            {inputs, selected}, tfName);
757   trainingBindings.copyTrainableWeightsTo(inferBindings);
758   EEI.compile(CompilationMode::Infer);
759   var1 = inferBindings.getPlaceholderByNameSlow("var1");
760   var2 = inferBindings.getPlaceholderByNameSlow("var2");
761   runBatch(EEI, inferBindings, 1, sampleCounter, {var1, var2},
762            {inputs, selected}, fName);
763   out->assign(res);
764 }
765 
inferSmallConv(Tensor * inputs,Tensor * out,llvm::StringRef kind)766 void inferSmallConv(Tensor *inputs, Tensor *out, llvm::StringRef kind) {
767   PlaceholderBindings bindings;
768   ExecutionEngine EE(kind);
769   auto &mod = EE.getModule();
770   auto *F = mod.createFunction("main");
771   auto *in = createPlaceholder(mod, bindings, inputs, "in", "NHWC");
772   auto *C = F->createConv(bindings, "conv2a", in, 64, 1, 1, 0, 1);
773   bindings.get(cast<Placeholder>(C->getFilter()))->getHandle().clear(0.3);
774   bindings.get(cast<Placeholder>(C->getBias()))->getHandle().clear(0.4);
775   auto *result = F->createSave("ret", C);
776   auto *resultTensor = bindings.allocate(result->getPlaceholder());
777   convertPlaceholdersToConstants(F, bindings, {in, result->getPlaceholder()});
778 
779   EE.compile(CompilationMode::Infer);
780 
781   updateInputPlaceholders(bindings, {in}, {inputs});
782   EE.run(bindings);
783 
784   out->assign(resultTensor);
785 }
786 
inferGroupConv(Tensor * out,llvm::StringRef kind)787 void inferGroupConv(Tensor *out, llvm::StringRef kind) {
788   PlaceholderBindings bindings;
789   ExecutionEngine EE(kind);
790   auto &mod = EE.getModule();
791   auto *F = mod.createFunction("main");
792 
793   auto *input =
794       mod.createPlaceholder(ElemKind::FloatTy, {1, 2, 1, 32}, "input", false);
795   auto *inputTensor = bindings.allocate(input);
796   auto IH = inputTensor->getHandle();
797   for (size_t i = 0; i < 2 * 32; i++) {
798     IH.raw(i) = (i + 1) / 10.0;
799   }
800 
801   auto *filter = mod.createPlaceholder(ElemKind::FloatTy, {128, 1, 1, 16},
802                                        "filter", false);
803   auto *filterTensor = bindings.allocate(filter);
804   auto FH = filterTensor->getHandle();
805   for (dim_t i = 0; i < 128; i++)
806     for (dim_t j = 0; j < 16; j++) {
807       FH.at({i, 0, 0, j}) = (i + j) / 100.0;
808     }
809   auto *zeroBias =
810       mod.createPlaceholder(ElemKind::FloatTy, {128}, "bias", false);
811   auto *zeroBiasTensor = bindings.allocate(zeroBias);
812   zeroBiasTensor->zero();
813 
814   auto outTy = mod.uniqueType(ElemKind::FloatTy, {1, 2, 1, 128});
815 
816   ConvolutionNode *CN =
817       F->createConv("Conv", input, filter, zeroBias, outTy, 1, 1, 0, 2);
818   SaveNode *result = F->createSave("save", CN);
819   auto *resultTensor = bindings.allocate(result->getPlaceholder());
820 
821   EE.compile(CompilationMode::Infer);
822 
823   EE.run(bindings);
824   out->assign(resultTensor);
825 }
826 
inferNonSquarePaddingConv(Tensor * out,llvm::StringRef kind)827 void inferNonSquarePaddingConv(Tensor *out, llvm::StringRef kind) {
828   PlaceholderBindings bindings;
829   ExecutionEngine EE(kind);
830   auto &mod = EE.getModule();
831   auto *F = mod.createFunction("main");
832 
833   auto *input =
834       mod.createPlaceholder(ElemKind::FloatTy, {1, 2, 1, 32}, "input", false);
835   auto *inputTensor = bindings.allocate(input);
836   auto IH = inputTensor->getHandle();
837   for (size_t i = 0; i < 2 * 32; i++) {
838     IH.raw(i) = (i + 1) / 10.0;
839   }
840 
841   auto *filter = mod.createPlaceholder(ElemKind::FloatTy, {128, 1, 1, 32},
842                                        "filter", false);
843   auto *filterTensor = bindings.allocate(filter);
844   auto FH = filterTensor->getHandle();
845   for (dim_t i = 0; i < 128; i++)
846     for (dim_t j = 0; j < 32; j++) {
847       FH.at({i, 0, 0, j}) = (i + j) / 100.0;
848     }
849   auto *zeroBias =
850       mod.createPlaceholder(ElemKind::FloatTy, {128}, "bias", false);
851   auto *zeroBiasTensor = bindings.allocate(zeroBias);
852   zeroBiasTensor->zero();
853   auto outTy = mod.uniqueType(ElemKind::FloatTy, {1, 4, 5, 128});
854 
855   ConvolutionNode *CN = F->createConv("Conv", input, filter, zeroBias, outTy,
856                                       {1, 1}, {1, 1}, {0, 1, 2, 3}, 1);
857   SaveNode *result = F->createSave("save", CN);
858   auto *resultTensor = bindings.allocate(result->getPlaceholder());
859 
860   EE.compile(CompilationMode::Infer);
861 
862   EE.run(bindings);
863   out->assign(resultTensor);
864 }
865 
inferNonSquareKernelConv(Tensor * out,llvm::StringRef kind)866 void inferNonSquareKernelConv(Tensor *out, llvm::StringRef kind) {
867   PlaceholderBindings bindings;
868   ExecutionEngine EE(kind);
869   auto &mod = EE.getModule();
870   auto *F = mod.createFunction("main");
871 
872   auto *input =
873       mod.createPlaceholder(ElemKind::FloatTy, {1, 2, 1, 32}, "input", false);
874   auto *inputTensor = bindings.allocate(input);
875   auto IH = inputTensor->getHandle();
876   for (size_t i = 0; i < 2 * 32; i++) {
877     IH.raw(i) = (i + 1) / 10.0;
878   }
879 
880   auto *filter = mod.createPlaceholder(ElemKind::FloatTy, {128, 2, 1, 32},
881                                        "filter", false);
882   auto *filterTensor = bindings.allocate(filter);
883   auto FH = filterTensor->getHandle();
884   for (dim_t i = 0; i < 128; i++)
885     for (dim_t j = 0; j < 2; j++)
886       for (dim_t k = 0; k < 32; k++) {
887         FH.at({i, j, 0, k}) = (i + j + k) / 100.0;
888       }
889   auto *zeroBias =
890       mod.createPlaceholder(ElemKind::FloatTy, {128}, "bias", false);
891   auto *zeroBiasTensor = bindings.allocate(zeroBias);
892   zeroBiasTensor->zero();
893   auto outTy = mod.uniqueType(ElemKind::FloatTy, {1, 3, 5, 128});
894 
895   ConvolutionNode *CN = F->createConv("Conv", input, filter, zeroBias, outTy,
896                                       {2, 1}, {1, 1}, {0, 1, 2, 3}, 1);
897   SaveNode *result = F->createSave("save", CN);
898   auto *resultTensor = bindings.allocate(result->getPlaceholder());
899 
900   EE.compile(CompilationMode::Infer);
901 
902   EE.run(bindings);
903   out->assign(resultTensor);
904 }
905 
inferNonSquareStrideConv(Tensor * out,llvm::StringRef kind)906 void inferNonSquareStrideConv(Tensor *out, llvm::StringRef kind) {
907   PlaceholderBindings bindings;
908   ExecutionEngine EE(kind);
909   auto &mod = EE.getModule();
910   auto *F = mod.createFunction("main");
911 
912   auto *input =
913       mod.createPlaceholder(ElemKind::FloatTy, {1, 2, 1, 32}, "input", false);
914   auto *inputTensor = bindings.allocate(input);
915   auto IH = inputTensor->getHandle();
916   for (size_t i = 0; i < 2 * 32; i++) {
917     IH.raw(i) = (i + 1) / 10.0;
918   }
919 
920   auto *filter = mod.createPlaceholder(ElemKind::FloatTy, {128, 2, 1, 32},
921                                        "filter", false);
922   auto *filterTensor = bindings.allocate(filter);
923   auto FH = filterTensor->getHandle();
924   for (dim_t i = 0; i < 128; i++)
925     for (dim_t j = 0; j < 2; j++)
926       for (dim_t k = 0; k < 32; k++) {
927         FH.at({i, j, 0, k}) = (i + j + k) / 100.0;
928       }
929   auto *zeroBias =
930       mod.createPlaceholder(ElemKind::FloatTy, {128}, "bias", false);
931   auto *zeroBiasTensor = bindings.allocate(zeroBias);
932   zeroBiasTensor->zero();
933   auto outTy = mod.uniqueType(ElemKind::FloatTy, {1, 2, 5, 128});
934 
935   ConvolutionNode *CN = F->createConv("Conv", input, filter, zeroBias, outTy,
936                                       {2, 1}, {2, 1}, {0, 1, 2, 3}, 1);
937   SaveNode *result = F->createSave("save", CN);
938   auto *resultTensor = bindings.allocate(result->getPlaceholder());
939 
940   EE.compile(CompilationMode::Infer);
941 
942   EE.run(bindings);
943   out->assign(resultTensor);
944 }
945 
inferConvDKKC8(Tensor * out,llvm::StringRef kind)946 void inferConvDKKC8(Tensor *out, llvm::StringRef kind) {
947   PlaceholderBindings bindings;
948   ExecutionEngine EE(kind);
949   auto &mod = EE.getModule();
950   auto *F = mod.createFunction("main");
951 
952   auto *input =
953       mod.createPlaceholder(ElemKind::FloatTy, {3, 3, 3, 32}, "input", false);
954   auto *inputTensor = bindings.allocate(input);
955   auto IH = inputTensor->getHandle();
956   for (size_t i = 0; i < 3 * 3 * 3 * 32; i++) {
957     IH.raw(i) = (i + 1) / 10.0;
958   }
959 
960   auto *filter = mod.createPlaceholder(ElemKind::FloatTy, {192, 3, 3, 32},
961                                        "filter", false);
962   auto *filterTensor = bindings.allocate(filter);
963   filterTensor->zero();
964   auto FH = filterTensor->getHandle();
965   for (dim_t i = 0; i < 192; i++)
966     for (dim_t j = 0; j < 3; j++)
967       for (dim_t k = 0; k < 3; k++)
968         for (dim_t l = 0; l < 32; l++) {
969           FH.at({i, j, k, k}) = (i + j + k + l) / 200.0;
970         }
971   auto *zeroBias =
972       mod.createPlaceholder(ElemKind::FloatTy, {192}, "bias", false);
973   auto *zeroBiasTensor = bindings.allocate(zeroBias);
974   zeroBiasTensor->zero();
975   auto outTy = mod.uniqueType(ElemKind::FloatTy, {3, 3, 3, 192});
976 
977   ConvolutionNode *CN = F->createConv("Conv", input, filter, zeroBias, outTy,
978                                       {3, 3}, {1, 1}, {1, 1, 1, 1}, 1);
979   SaveNode *result = F->createSave("save", CN);
980   auto *resultTensor = bindings.allocate(result->getPlaceholder());
981 
982   EE.compile(CompilationMode::Infer);
983 
984   EE.run(bindings);
985   out->assign(resultTensor);
986 }
987 
trainSoftMaxNet(Tensor * inputs,Tensor * weights,Tensor * bias,Tensor * selected,Tensor * out,llvm::StringRef kind)988 void trainSoftMaxNet(Tensor *inputs, Tensor *weights, Tensor *bias,
989                      Tensor *selected, Tensor *out, llvm::StringRef kind) {
990   ExecutionEngine EEI(kind);
991   ExecutionEngine EET(kind);
992   std::vector<ExecutionEngine *> engines;
993   engines.push_back(&EEI);
994   engines.push_back(&EET);
995   TrainingConfig TC;
996   PlaceholderBindings bindings, inferBindings, trainingBindings;
997 
998   // This variable records the number of the next sample to be used for
999   // training.
1000   size_t sampleCounter = 0;
1001 
1002   TC.learningRate = 0.003;
1003   TC.momentum = 0.7;
1004   TC.L2Decay = 0.001;
1005   Function *F;
1006   Placeholder *var1, *var2;
1007   for (auto *EE : engines) {
1008     auto &mod = EE->getModule();
1009     F = mod.createFunction("main");
1010     var1 = createPlaceholder(mod, bindings, inputs, "var1");
1011     var2 = createPlaceholder(mod, bindings, selected, "var2");
1012     auto *fc = F->createFullyConnected(bindings, "fc", var1, bias->dims()[0]);
1013     bindings.get(cast<Placeholder>(fc->getWeights()))->assign(weights);
1014     bindings.get(cast<Placeholder>(fc->getBias()))->assign(bias);
1015     auto *softmax = F->createSoftMax("softmax", fc, var2);
1016     F->createSave("ret", softmax);
1017   }
1018 
1019   auto *TF = glow::differentiate(F, TC);
1020   auto tfName = TF->getName();
1021   auto fName = F->getName();
1022 
1023   EET.compile(CompilationMode::Train);
1024   trainingBindings.allocate(EET.getModule().getPlaceholders());
1025   bindings.copyTrainableWeightsTo(trainingBindings);
1026   runBatch(EET, trainingBindings, 30, sampleCounter, {var1, var2},
1027            {inputs, selected}, tfName);
1028   EEI.compile(CompilationMode::Infer);
1029   inferBindings.allocate(EEI.getModule().getPlaceholders());
1030   trainingBindings.copyTrainableWeightsTo(inferBindings);
1031   auto *res =
1032       inferBindings.get(EEI.getModule().getPlaceholderByNameSlow("ret"));
1033   var1 = inferBindings.getPlaceholderByNameSlow("var1");
1034   var2 = inferBindings.getPlaceholderByNameSlow("var2");
1035   updateInputPlaceholders(inferBindings, {var1, var2}, {inputs, selected});
1036   EEI.run(inferBindings, fName);
1037   out->assign(res);
1038 }
1039 
inferTanhConcatNet(Tensor * input1,Tensor * input2,Tensor * input3,Tensor * out,llvm::StringRef kind)1040 void inferTanhConcatNet(Tensor *input1, Tensor *input2, Tensor *input3,
1041                         Tensor *out, llvm::StringRef kind) {
1042   PlaceholderBindings bindings;
1043   ExecutionEngine EE(kind);
1044   auto &mod = EE.getModule();
1045   Function *F = mod.createFunction("main");
1046   auto *var1 = createPlaceholder(mod, bindings, input1, "var1");
1047   auto *var2 = createPlaceholder(mod, bindings, input2, "var2");
1048   auto *var3 = createPlaceholder(mod, bindings, input3, "var3");
1049   auto *T1 = F->createTanh("tanh1", var1);
1050   auto *T2 = F->createTanh("tanh2", var2);
1051   auto *T3 = F->createTanh("tanh3", var3);
1052   Node *C1 = F->createConcat("concat", {T1, T2}, 0);
1053   Node *C2 = F->createConcat("concat", {T2, T3, C1, T2}, 0);
1054   auto *result = F->createSave("ret", C2);
1055   auto *resultTensor = bindings.allocate(result->getPlaceholder());
1056 
1057   EE.compile(CompilationMode::Infer);
1058 
1059   updateInputPlaceholders(bindings, {var1, var2, var3},
1060                           {input1, input2, input3});
1061   EE.run(bindings);
1062   out->assign(resultTensor);
1063 }
1064 
inferBasicConvNet(Tensor * inputs,Tensor * out,llvm::StringRef kind,size_t convDepth)1065 void inferBasicConvNet(Tensor *inputs, Tensor *out, llvm::StringRef kind,
1066                        size_t convDepth) {
1067   PlaceholderBindings bindings;
1068   ExecutionEngine EE(kind);
1069   auto &mod = EE.getModule();
1070   Function *F = mod.createFunction("main");
1071   auto *var = createPlaceholder(mod, bindings, inputs, "var", "NCHW");
1072   auto *tr = F->createTranspose("tr", var, NCHW2NHWC);
1073   auto *conv = F->createConv(bindings, "conv", tr, convDepth, {5, 5}, {2, 2},
1074                              {1, 1, 1, 1}, 1);
1075   bindings.get(cast<Placeholder>(conv->getFilter()))->getHandle().clear(0.1);
1076   bindings.get(cast<Placeholder>(conv->getBias()))->getHandle().clear(0.2);
1077   auto *pool = F->createMaxPool("pool", conv, 2, 2, 0);
1078   auto *result = F->createSave("ret", pool->getResult());
1079   auto *resultTensor = bindings.allocate(result->getPlaceholder());
1080   convertPlaceholdersToConstants(F, bindings, {var, result->getPlaceholder()});
1081 
1082   EE.compile(CompilationMode::Infer);
1083 
1084   updateInputPlaceholders(bindings, {var}, {inputs});
1085   EE.run(bindings);
1086   out->assign(resultTensor);
1087 }
1088 
createAndInitBasicFCNet(PlaceholderBindings & bindings,ExecutionEngine & EE)1089 FunctionTensorPair createAndInitBasicFCNet(PlaceholderBindings &bindings,
1090                                            ExecutionEngine &EE) {
1091   auto &mod = EE.getModule();
1092   Function *F = mod.createFunction("main");
1093 
1094   auto *var = mod.createPlaceholder(ElemKind::FloatTy, {2, 3, 16, 16}, "var",
1095                                     false, "NCHW");
1096   auto *tr = F->createTranspose("tr", var, NCHW2NHWC);
1097   auto *fc = F->createFullyConnected(bindings, "fc", tr, 16);
1098   auto *rl0 = F->createRELU("relu", fc);
1099   auto *fc2 = F->createFullyConnected(bindings, "fc2", rl0, 8);
1100   auto *rl1 = F->createRELU("relu", fc2);
1101   bindings.get(cast<Placeholder>(fc->getWeights()))->getHandle().clear(0.8);
1102   bindings.get(cast<Placeholder>(fc2->getWeights()))->getHandle().clear(1.5);
1103   auto *result = F->createSave("ret", rl1);
1104   auto *resultTensor = bindings.allocate(result->getPlaceholder());
1105 
1106   PseudoRNG PRNG;
1107   bindings.allocate(var)->getHandle().initXavier(1, PRNG);
1108 
1109   return std::make_pair(F, resultTensor);
1110 }
1111 
inferMixedNet(Tensor * inputs,Tensor * out,llvm::StringRef kind)1112 void inferMixedNet(Tensor *inputs, Tensor *out, llvm::StringRef kind) {
1113   PlaceholderBindings bindings;
1114   ExecutionEngine EE(kind);
1115   auto &mod = EE.getModule();
1116   Function *F = mod.createFunction("main");
1117   auto *var = createPlaceholder(mod, bindings, inputs, "var", "NCHW");
1118   auto *selected =
1119       mod.createPlaceholder(ElemKind::Int64ITy, {2, 1}, "selected", false);
1120 
1121   auto *tr = F->createTranspose("tr", var, NCHW2NHWC);
1122   auto *fc = F->createFullyConnected(bindings, "fc", tr, 16);
1123   auto *th0 = F->createTanh("tanh", fc);
1124   auto *sg0 = F->createSigmoid("sig", fc);
1125   auto *A1 = F->createAdd("add", th0, sg0);
1126   auto *fc2 = F->createFullyConnected(bindings, "fc2", A1, 16);
1127 
1128   auto *R = F->createRegression("reg", fc2, fc2);
1129   auto *SM = F->createSoftMax("SM", R, selected);
1130   auto *result = F->createSave("ret", SM);
1131   auto *resultTensor = bindings.allocate(result->getPlaceholder());
1132 
1133   bindings.get(cast<Placeholder>(fc->getWeights()))->getHandle().clear(0.4);
1134   bindings.get(cast<Placeholder>(fc2->getWeights()))->getHandle().clear(3.5);
1135 
1136   EE.compile(CompilationMode::Infer);
1137 
1138   updateInputPlaceholders(bindings, {var}, {inputs});
1139   EE.run(bindings);
1140   out->assign(resultTensor);
1141 }
1142 
inferComplexNet1(Tensor * inputs1,Tensor * inputs2,Tensor * inputs3,Tensor * inputs4,Tensor * out,llvm::StringRef kind)1143 void inferComplexNet1(Tensor *inputs1, Tensor *inputs2, Tensor *inputs3,
1144                       Tensor *inputs4, Tensor *out, llvm::StringRef kind) {
1145   PlaceholderBindings bindings;
1146   ExecutionEngine EE(kind);
1147   auto &mod = EE.getModule();
1148   Function *F = mod.createFunction("main");
1149   auto *var1 = createPlaceholder(mod, bindings, inputs1, "var1");
1150   auto *var2 = createPlaceholder(mod, bindings, inputs2, "var2");
1151   auto *var3 = createPlaceholder(mod, bindings, inputs3, "var3");
1152   auto *var4 = createPlaceholder(mod, bindings, inputs4, "var4");
1153   auto *conv1 = F->createConv(bindings, "conv1", var1, 6, 4, 1, 2, 1);
1154   bindings.get(cast<Placeholder>(conv1->getFilter()))->getHandle().clear(0.5);
1155   bindings.get(cast<Placeholder>(conv1->getBias()))->getHandle().clear(0.7);
1156   auto *sigmoid1 = F->createSigmoid("sigmoid1", conv1);
1157   auto *fc1 = F->createFullyConnected(bindings, "fc1", var2, 2352);
1158   bindings.get(cast<Placeholder>(fc1->getWeights()))->getHandle().clear(0.6);
1159   auto *reshape1 = F->createReshape("reshape1", fc1, {8, 14, 28, 6}, "NHWC");
1160   auto *relu1 = F->createRELU("relu1", reshape1);
1161   auto *pool1 = F->createMaxPool("pool1", relu1, 2, 2, 1);
1162   auto *add = F->createAdd("add", sigmoid1, pool1->getResult());
1163   auto *tanh = F->createTanh("tanh", add);
1164   auto *fc2 = F->createFullyConnected(bindings, "fc2", var3, 720);
1165   bindings.get(cast<Placeholder>(fc2->getWeights()))->getHandle().clear(1.1);
1166   auto *reshape2 = F->createReshape("reshape2", fc2, {8, 8, 15, 6}, "NHWC");
1167   auto *mul = F->createMul("mul", tanh, reshape2);
1168   auto *sigmoid2 = F->createSigmoid("sigmoid2", mul);
1169   auto *conv2 = F->createConv(bindings, "conv2", sigmoid2, 7, 3, 2, 1, 1);
1170   bindings.get(cast<Placeholder>(conv2->getFilter()))->getHandle().clear(0.3);
1171   bindings.get(cast<Placeholder>(conv2->getBias()))->getHandle().clear(1.3);
1172   auto *reshape3 = F->createReshape("reshape3", conv2, {8, 8, 7, 4}, "NHWC");
1173   auto *sub = F->createSub("sub", reshape3, var4);
1174   auto *relu2 = F->createRELU("relu2", sub);
1175   auto *pool2 = F->createAvgPool("pool2", relu2, 3, 2, 1);
1176   auto *sigmoid3 = F->createSigmoid("sigmoid3", pool2);
1177   auto *result = F->createSave("ret", sigmoid3);
1178   auto *resultTensor = bindings.allocate(result->getPlaceholder());
1179 
1180   EE.compile(CompilationMode::Infer);
1181 
1182   updateInputPlaceholders(bindings, {var1, var2, var3, var4},
1183                           {inputs1, inputs2, inputs3, inputs4});
1184   EE.run(bindings);
1185   out->assign(resultTensor);
1186 }
1187 
1188 namespace {
1189 // Helper for initializing conv node filter/bias from input tensors.
initConv(PlaceholderBindings & bindings,ConvolutionNode * C,Tensor & filter,Tensor & bias)1190 static void initConv(PlaceholderBindings &bindings, ConvolutionNode *C,
1191                      Tensor &filter, Tensor &bias) {
1192   bindings.get(cast<Placeholder>(C->getFilter()))->assign(&filter);
1193   bindings.get(cast<Placeholder>(C->getBias()))->assign(&bias);
1194 }
1195 } // namespace
1196 
inferTinyResnet(Tensor * input,Tensor * out,std::vector<Tensor> & weights,llvm::StringRef kind)1197 void inferTinyResnet(Tensor *input, Tensor *out, std::vector<Tensor> &weights,
1198                      llvm::StringRef kind) {
1199   PlaceholderBindings bindings;
1200   ExecutionEngine EE(kind);
1201   auto &mod = EE.getModule();
1202   auto *F = mod.createFunction("main");
1203 
1204   auto *in = createPlaceholder(mod, bindings, input, "in", "NHWC");
1205   auto *conv1 = F->createConv(bindings, "conv1", in, 256, 1, 1, 0, 1);
1206   auto *conv2a = F->createConv(bindings, "conv2a", conv1, 64, 1, 1, 0, 1);
1207   auto *relu2a = F->createRELU("relu2a", conv2a);
1208   auto *conv2b = F->createConv(bindings, "conv2b", relu2a, 64, 3, 1, 1, 1);
1209   auto *relu2b = F->createRELU("relu2b", conv2b);
1210   auto *conv2c = F->createConv(bindings, "conv2c", relu2b, 256, 1, 1, 0, 1);
1211   auto *add = F->createAdd("add", conv2c, conv1);
1212   auto *relu = F->createRELU("res2a_relu", add);
1213   auto *result = F->createSave("ret", relu);
1214   auto *resultTensor = bindings.allocate(result->getPlaceholder());
1215 
1216   initConv(bindings, conv1, weights[0], weights[1]);
1217   initConv(bindings, conv2a, weights[2], weights[3]);
1218   initConv(bindings, conv2b, weights[4], weights[5]);
1219   initConv(bindings, conv2c, weights[6], weights[7]);
1220   convertPlaceholdersToConstants(F, bindings, {in, result->getPlaceholder()});
1221 
1222   EE.compile(CompilationMode::Infer);
1223 
1224   updateInputPlaceholders(bindings, {in}, {input});
1225   EE.run(bindings);
1226   out->assign(resultTensor);
1227 }
1228 
inferExtract3D(Tensor * input,Tensor * out,llvm::StringRef kind)1229 void inferExtract3D(Tensor *input, Tensor *out, llvm::StringRef kind) {
1230   PlaceholderBindings bindings;
1231   ExecutionEngine EE(kind);
1232   auto &mod = EE.getModule();
1233   auto *F = mod.createFunction("main");
1234 
1235   auto *inputs = createPlaceholder(mod, bindings, input, "inputs");
1236 
1237   auto *x1 = F->createSlice("ex1", inputs, {0, 5, 0}, {1, 100, 100});
1238   auto *x2 = F->createSlice("ex2", inputs, {1, 5, 0}, {2, 100, 100});
1239   auto *x3 = F->createSlice("ex3", inputs, {2, 5, 0}, {3, 100, 100});
1240   auto *x4 = F->createSlice("ex4", inputs, {3, 5, 0}, {4, 100, 100});
1241 
1242   auto *x12 = F->createConcat("x12", {x1, x2}, 1);
1243   auto *x34 = F->createConcat("x34", {x3, x4}, 1);
1244   auto *x13 = F->createConcat("x34", {x1, x3}, 1);
1245   auto *x24 = F->createConcat("x34", {x2, x4}, 1);
1246 
1247   auto *add1 = F->createAdd("add1", x12, x34);
1248   auto *add2 = F->createAdd("add1", x13, x24);
1249   auto *add3 = F->createAdd("add1", add1, add2);
1250 
1251   auto *e = F->createSlice("slice", add3, {0, 55, 50}, {1, 150, 100});
1252   auto *result = F->createSave("ret", e);
1253   auto *resultTensor = bindings.allocate(result->getPlaceholder());
1254 
1255   EE.compile(CompilationMode::Infer);
1256 
1257   updateInputPlaceholders(bindings, {inputs}, {input});
1258   EE.run(bindings);
1259   out->assign(resultTensor);
1260 }
1261 
inferMaxSplat(Tensor * input,Tensor * out,llvm::StringRef kind)1262 void inferMaxSplat(Tensor *input, Tensor *out, llvm::StringRef kind) {
1263   PlaceholderBindings bindings;
1264   ExecutionEngine EE(kind);
1265   auto &mod = EE.getModule();
1266   Function *F = mod.createFunction("main");
1267 
1268   auto T = mod.uniqueType(ElemKind::Int8QTy, input->getType().dims(),
1269                           2 * input->getType().getScale(),
1270                           -input->getType().getOffset());
1271   auto *var = createQuantizedPlaceholder(mod, bindings, input,
1272                                          input->getType().getScale(),
1273                                          input->getType().getOffset(), "var");
1274   auto *rescale = F->createRescaleQuantized("rescale", var, T);
1275 
1276   auto *splat1 = F->createSplat("splat1", T, 0.0);
1277   auto *splat2 = F->createSplat("splat2", T, 5.0);
1278 
1279   auto *max1 = F->createMax("max1", rescale, splat1);
1280   auto *max2 = F->createMax("max2", splat2, max1);
1281 
1282   auto *result = F->createSave("ret", max2);
1283   auto *resultTensor = bindings.allocate(result->getPlaceholder());
1284 
1285   EE.compile(CompilationMode::Infer);
1286 
1287   updateInputPlaceholders(bindings, {var}, {input});
1288   EE.run(bindings);
1289   out->assign(resultTensor);
1290 }
1291 
insertCompiledFunction(llvm::StringRef name,CompiledFunction * func,runtime::DeviceManager * device,Module * mod)1292 void insertCompiledFunction(llvm::StringRef name, CompiledFunction *func,
1293                             runtime::DeviceManager *device, Module *mod) {
1294   runtime::FunctionMapTy functionMap;
1295   functionMap[name] = func;
1296 
1297   std::promise<void> addPromise;
1298   auto fut = addPromise.get_future();
1299   Error addErr = Error::empty();
1300   device->addNetwork(mod, std::move(functionMap),
1301                      [&addPromise, &addErr](const Module *, Error err) {
1302                        addErr = std::move(err);
1303                        addPromise.set_value();
1304                      });
1305   fut.wait();
1306   EXIT_ON_ERR(std::move(addErr));
1307 }
1308 
runOnDevice(ExecutionContext & context,llvm::StringRef name,runtime::DeviceManager * device)1309 void runOnDevice(ExecutionContext &context, llvm::StringRef name,
1310                  runtime::DeviceManager *device) {
1311   std::unique_ptr<ExecutionContext> contextPtr(&context);
1312   std::promise<void> runPromise;
1313   auto fut = runPromise.get_future();
1314   Error runErr = Error::empty();
1315   device->runFunction(
1316       name, std::move(contextPtr),
1317       [&runPromise, &runErr](runtime::RunIdentifierTy, Error err,
1318                              std::unique_ptr<ExecutionContext> contextPtr) {
1319         // Don't delete context.
1320         contextPtr.release();
1321         runErr = std::move(err);
1322         runPromise.set_value();
1323       });
1324   fut.wait();
1325   EXIT_ON_ERR(std::move(runErr));
1326 }
1327 
createRandomizedConstant(Module & mod,TypeRef type,llvm::ArrayRef<dim_t> dims,llvm::StringRef name)1328 Constant *createRandomizedConstant(Module &mod, TypeRef type,
1329                                    llvm::ArrayRef<dim_t> dims,
1330                                    llvm::StringRef name) {
1331   auto *c = mod.createConstant(mod.uniqueTypeWithNewShape(type, dims), name);
1332 
1333   switch (type->getElementType()) {
1334   case ElemKind::FloatTy: {
1335     c->getHandle<float>().initXavier(c->getType()->size() * 2, mod.getPRNG());
1336     break;
1337   }
1338   case ElemKind::Float16Ty: {
1339     c->getHandle<float16_t>().initXavier(c->getType()->size() * 2,
1340                                          mod.getPRNG());
1341     break;
1342   }
1343   case ElemKind::BFloat16Ty: {
1344     c->getHandle<bfloat16_t>().initXavier(c->getType()->size() * 2,
1345                                           mod.getPRNG());
1346     break;
1347   }
1348   case ElemKind::Int32QTy: {
1349     c->getHandle<int32_t>().randomize(INT32_MIN, INT32_MAX, mod.getPRNG());
1350     break;
1351   }
1352   case ElemKind::Int8QTy: {
1353     c->getHandle<int8_t>().randomize(INT8_MIN, INT8_MAX, mod.getPRNG());
1354     break;
1355   }
1356   case ElemKind::UInt8FusedQTy:
1357   case ElemKind::UInt8FusedFP16QTy: {
1358     c->getHandle<uint8_t>().randomize(UINT8_MIN, UINT8_MAX, mod.getPRNG());
1359     break;
1360   }
1361   default:
1362     LOG(FATAL) << "Unsupported type: " << type->getElementName().str();
1363   }
1364 
1365   return c;
1366 }
1367 
createRandomFusedRowwiseQuantizedConstant(Module & mod,llvm::ArrayRef<dim_t> dims,llvm::StringRef name,bool useFusedFP16)1368 Constant *createRandomFusedRowwiseQuantizedConstant(Module &mod,
1369                                                     llvm::ArrayRef<dim_t> dims,
1370                                                     llvm::StringRef name,
1371                                                     bool useFusedFP16) {
1372   auto T = mod.uniqueType(
1373       (useFusedFP16 ? ElemKind::UInt8FusedFP16QTy : ElemKind::UInt8FusedQTy),
1374       {1}, 1, 0);
1375   const dim_t sizeScaleOffset =
1376       useFusedFP16 ? sizeof(float16_t) : sizeof(float);
1377   Constant *c = createRandomizedConstant(
1378       mod, T, {dims[0], dims[1] + 2 * sizeScaleOffset}, name);
1379 
1380   // Range (0, 255) -> (-0.1, 0.1)
1381   constexpr float scale = 1.0f / 1275;
1382   constexpr float offset = -0.1;
1383   auto cH = c->getPayload().getHandle<uint8_t>();
1384   for (unsigned i = 0, e = c->dims()[0]; i < e; i++) {
1385     if (useFusedFP16) {
1386       cH.setFusedScaleOffsetInRow<float16_t>(i, scale, offset);
1387     } else {
1388       cH.setFusedScaleOffsetInRow<float>(i, scale, offset);
1389     }
1390   }
1391 
1392   return c;
1393 }
1394 
createFusedRowwiseQuantizedPlaceholder(Module & mod,llvm::ArrayRef<dim_t> dims,llvm::StringRef name,bool useFusedFP16)1395 Placeholder *createFusedRowwiseQuantizedPlaceholder(Module &mod,
1396                                                     llvm::ArrayRef<dim_t> dims,
1397                                                     llvm::StringRef name,
1398                                                     bool useFusedFP16) {
1399   auto T = useFusedFP16 ? ElemKind::UInt8FusedFP16QTy : ElemKind::UInt8FusedQTy;
1400   const dim_t sizeScaleOffset =
1401       useFusedFP16 ? sizeof(float16_t) : sizeof(float);
1402   constexpr float scale = 1.0f / 1275;
1403   constexpr float offset = -0.1;
1404   Placeholder *ph = mod.createPlaceholder(
1405       T, {dims[0], dims[1] + 2 * sizeScaleOffset}, scale, offset, name, false);
1406 
1407   return ph;
1408 }
1409 } // namespace glow
1410