1 /**
2 * Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "BackendTestUtils.h"
18
19 #include "glow/Converter/TypeAToTypeBFunctionConverter.h"
20 #include "glow/ExecutionEngine/ExecutionEngine.h"
21 #include "glow/Graph/Graph.h"
22 #include "glow/IR/IR.h"
23 #include "glow/IR/IRBuilder.h"
24 #include "glow/IR/Instrs.h"
25 #include "glow/Optimizer/GraphOptimizer/GraphOptimizer.h"
26 #include "glow/Quantization/Quantization.h"
27
28 #include "gtest/gtest.h"
29
30 #include "llvm/Support/CommandLine.h"
31
32 #include <future>
33
34 namespace glow {
35
36 llvm::cl::OptionCategory backendTestUtilsCat("BackendTestUtils Category");
37
38 unsigned parCloneCountOpt;
39 llvm::cl::opt<unsigned, /* ExternalStorage */ true> parCloneCountI(
40 "parallel-clone-count",
41 llvm::cl::desc(
42 "Number of times to clone a graph in parallel. Intended to stress test "
43 "different backends. This option is not used by all unit "
44 "tests; for now you must check the test to see if so."),
45 llvm::cl::location(parCloneCountOpt), llvm::cl::Optional, llvm::cl::init(1),
46 llvm::cl::cat(backendTestUtilsCat));
47
48 bool runDisabledTests;
49 llvm::cl::opt<bool, /* ExternalStorage */ true> runDisabledTestsI(
50 "run-disabled-tests",
51 llvm::cl::desc("If set, disabled tests will not be skipped."),
52 llvm::cl::location(runDisabledTests), llvm::cl::Optional,
53 llvm::cl::init(false), llvm::cl::cat(backendTestUtilsCat));
54
55 using llvm::cast;
56
57 namespace {
58 // Helpers for creating and intializing placeholders from tensors.
createPlaceholder(Module & mod,PlaceholderBindings & bindings,Tensor * tensor,llvm::StringRef name,const std::string layout=ANY_LAYOUT)59 static Placeholder *createPlaceholder(Module &mod,
60 PlaceholderBindings &bindings,
61 Tensor *tensor, llvm::StringRef name,
62 const std::string layout = ANY_LAYOUT) {
63 auto *P = mod.createPlaceholder(tensor->getElementType(), tensor->dims(),
64 name, false, layout);
65 auto *PTensor = bindings.allocate(P);
66 PTensor->assign(tensor);
67
68 return P;
69 }
70
createQuantizedPlaceholder(Module & mod,PlaceholderBindings & bindings,Tensor * tensor,float scale,int32_t offset,llvm::StringRef name)71 static Placeholder *createQuantizedPlaceholder(Module &mod,
72 PlaceholderBindings &bindings,
73 Tensor *tensor, float scale,
74 int32_t offset,
75 llvm::StringRef name) {
76 auto *P = mod.createPlaceholder(tensor->getElementType(), tensor->dims(),
77 scale, offset, name, false);
78 auto *PTensor = bindings.allocate(P);
79 PTensor->assign(tensor);
80
81 return P;
82 }
83
84 /// Create and initialize a function using the argument \p createAndInitFunction
85 /// then run the function in profiling mode to get the profiling parameters.
86 /// \p count is the number of times to clone the Function inside itself before
87 /// profiling. \returns the profiling parameters for all the function nodes.
88 static std::vector<NodeProfilingInfo>
profileAndGetNodeProfilingInfo(CreateAndInitFunction createAndInitFunction,unsigned count)89 profileAndGetNodeProfilingInfo(CreateAndInitFunction createAndInitFunction,
90 unsigned count) {
91 LoweredInfoMap loweredMapForProf;
92 PlaceholderBindings pBindings;
93 // Note: deviceMemory = 0 is a signal to use the defaultMemory.
94 ExecutionEngine PEE{"Interpreter", /* deviceMemory */ 0,
95 /* ignoreUserDeviceConfig */ true};
96 auto FT = createAndInitFunction(pBindings, PEE);
97 CompilationContext cctx{&pBindings, &loweredMapForProf};
98
99 // Clone the number of times as requested to match the Function that will be
100 // quantized.
101 cloneFunInsideFun(FT, &pBindings, cctx, count);
102 cctx.precisionConfig.quantMode = QuantizationMode::Profile;
103 PEE.compile(cctx);
104 PEE.run(pBindings);
105
106 // We get the new function using front() because the original function was
107 // deleted as part of the Partitioner quantization flow.
108 return quantization::generateNodeProfilingInfos(
109 pBindings, PEE.getModule().getFunctions().front(), loweredMapForProf);
110 }
111
112 /// Helper that sets up and \returns a pair of configs for both interpreter and
113 /// backend being tested.
114 static std::pair<CompilationContext, CompilationContext>
setupInterpAndBackendConfigs(Function * IF,ExecutionEngine & IEE,PlaceholderBindings & iBindings,LoweredInfoMap & ILIM,PlaceholderBindings & bBindings,LoweredInfoMap & BLIM,ElemKind interpElemKind,ElemKind backendElemKind,quantization::Schema schema,bool convertToRowwiseQuantization,CreateAndInitFunction createAndInitFunction,ElemKind biasElemKind,bool forceFP16AccumSLS,PrecisionConfiguration::Float16Format float16Format,unsigned count,bool convertToChannelwiseQuantization,bool skipQuantizeFCBias)115 setupInterpAndBackendConfigs(
116 Function *IF, ExecutionEngine &IEE, PlaceholderBindings &iBindings,
117 LoweredInfoMap &ILIM, PlaceholderBindings &bBindings, LoweredInfoMap &BLIM,
118 ElemKind interpElemKind, ElemKind backendElemKind,
119 quantization::Schema schema, bool convertToRowwiseQuantization,
120 CreateAndInitFunction createAndInitFunction, ElemKind biasElemKind,
121 bool forceFP16AccumSLS, PrecisionConfiguration::Float16Format float16Format,
122 unsigned count, bool convertToChannelwiseQuantization,
123 bool skipQuantizeFCBias) {
124 CompilationContext cctxI{&iBindings, &ILIM};
125 CompilationContext cctxB{&bBindings, &BLIM};
126 PrecisionConfiguration &precConfigI = cctxI.precisionConfig;
127 PrecisionConfiguration &precConfigB = cctxB.precisionConfig;
128
129 if (isQuantizedElemKind(interpElemKind) ||
130 isQuantizedElemKind(backendElemKind)) {
131 // If either interp or backend need to be quantized then we need to profile
132 // and get quantization infos.
133 if (isQuantizedElemKind(interpElemKind)) {
134 // Note: We only do parallel cloning for the backend, so always use count
135 // of 1 here.
136 auto NQII =
137 profileAndGetNodeProfilingInfo(createAndInitFunction, /* count */ 1);
138
139 precConfigI.quantMode = QuantizationMode::Quantize;
140 precConfigI.quantConfig.infos = NQII;
141 precConfigI.quantConfig.enableRowwise = convertToRowwiseQuantization;
142 precConfigI.quantConfig.enableChannelwise =
143 convertToChannelwiseQuantization;
144 precConfigI.quantConfig.schema = schema;
145 precConfigI.quantConfig.precision = interpElemKind;
146 precConfigI.quantConfig.assertAllNodesQuantized = true;
147 precConfigI.quantConfig.precisionBias = biasElemKind;
148 precConfigI.quantConfig.skipQuantizeFCBias = skipQuantizeFCBias;
149 }
150
151 if (isQuantizedElemKind(backendElemKind)) {
152 // Always clone count times here. This matches the Function the backend
153 // will quantize.
154 auto NQIB = profileAndGetNodeProfilingInfo(createAndInitFunction, count);
155
156 precConfigB.quantMode = QuantizationMode::Quantize;
157 precConfigB.quantConfig.infos = NQIB;
158 precConfigB.quantConfig.enableRowwise = convertToRowwiseQuantization;
159 precConfigB.quantConfig.enableChannelwise =
160 convertToChannelwiseQuantization;
161 precConfigB.quantConfig.schema = schema;
162 precConfigB.quantConfig.precision = backendElemKind;
163 precConfigB.quantConfig.assertAllNodesQuantized = true;
164 precConfigB.quantConfig.precisionBias = biasElemKind;
165 precConfigB.quantConfig.skipQuantizeFCBias = skipQuantizeFCBias;
166 }
167 }
168
169 // For now if the ElemKind is FP16 then we use Float16Ty, UInt8FusedFP16QTy.
170 precConfigI.convertToFP16 = interpElemKind == ElemKind::Float16Ty;
171 precConfigI.convertFusedToFP16 = interpElemKind == ElemKind::Float16Ty;
172 precConfigI.forceFP16AccumSLS = forceFP16AccumSLS;
173 precConfigB.convertToFP16 = backendElemKind == ElemKind::Float16Ty;
174 precConfigB.convertFusedToFP16 = backendElemKind == ElemKind::Float16Ty;
175 precConfigB.forceFP16AccumSLS = forceFP16AccumSLS;
176
177 return std::make_pair(cctxI, cctxB);
178 }
179 } // namespace
180
dispatchInference(const std::string & fname,runtime::HostManager * hostManager,ExecutionContext & context,unsigned concurrentRequestsOpt)181 void dispatchInference(const std::string &fname,
182 runtime::HostManager *hostManager,
183 ExecutionContext &context,
184 unsigned concurrentRequestsOpt) {
185 // If additional requests are desired, setup additional contexts.
186 std::vector<std::unique_ptr<ExecutionContext>> contexts;
187 std::unique_ptr<ExecutionContext> originalContextPtr(&context);
188 contexts.push_back(std::move(originalContextPtr));
189 if (concurrentRequestsOpt > 1) {
190 // Clone the placeholder bindings into a new executionContext.
191 for (unsigned i = 0, max = concurrentRequestsOpt - 1; i < max; i++) {
192 std::unique_ptr<ExecutionContext> newContext =
193 glow::make_unique<ExecutionContext>(
194 glow::make_unique<PlaceholderBindings>(
195 context.getPlaceholderBindings()->clone()));
196 contexts.push_back(std::move(newContext));
197 }
198 }
199 std::vector<std::promise<void>> promises(concurrentRequestsOpt);
200 std::vector<std::future<void>> futures;
201 for (auto &promise : promises) {
202 futures.push_back(promise.get_future());
203 }
204 for (unsigned i = 0; i < concurrentRequestsOpt; i++) {
205 hostManager->runNetwork(fname, std::move(contexts[i]),
206 [&contexts, &promises,
207 i](runtime::RunIdentifierTy, Error err,
208 std::unique_ptr<ExecutionContext> contextPtr) {
209 contexts[i] = std::move(contextPtr);
210 // Expect no errors.
211 EXIT_ON_ERR(std::move(err));
212 promises[i].set_value();
213 });
214 }
215
216 for (auto &future : futures) {
217 future.wait();
218 }
219
220 for (auto &c : contexts) {
221 c->getPlaceholderBindings()->ensureOnHost();
222 }
223 // Release the original context passed in by reference so we don't free it.
224 contexts[0].release();
225 }
226
227 /// Helper that iterates over all of the Placeholders from the function \p F
228 /// and converts the Tensors found in \p bindings to the same type as the
229 /// Placeholders if necessary.
convertBindingsToCorrectType(Function * F,PlaceholderBindings & bindings)230 static void convertBindingsToCorrectType(Function *F,
231 PlaceholderBindings &bindings) {
232 PlaceholderList PHs = F->findPlaceholders();
233 for (Placeholder *PH : PHs) {
234 Tensor *T = bindings.get(PH);
235 TypeRef newTy = PH->getType();
236 if (T->getType().isEqual(newTy)) {
237 continue;
238 }
239 // For input placeholders convert tensor type and values.
240 // For output placeholders convert only the tensor type.
241 if (isInput(PH, *F)) {
242 ElemKind newK = newTy->getElementType();
243 if (isQuantizedElemKind(newK)) {
244 Tensor QT = quantization::quantizeTensor(
245 *T, {newTy->getScale(), newTy->getOffset()}, newK);
246 T->assign(&QT);
247 } else {
248 T->convertToType(newK);
249 }
250 } else {
251 T->reset(*newTy);
252 }
253 }
254 }
255
256 /// Helper to get a float copy of a Tensor \p T if needed.
convertToFloatIfNecessary(Tensor & T)257 static Tensor convertToFloatIfNecessary(Tensor &T) {
258 const ElemKind srcK = T.getType().getElementType();
259 if (srcK == ElemKind::FloatTy) {
260 return T.clone();
261 }
262 if (isQuantizedElemKind(srcK)) {
263 return quantization::dequantizeTensor(T, ElemKind::FloatTy);
264 }
265 return T.getCopyConvertedToType(ElemKind::FloatTy);
266 }
267
compareAgainstInterpreter(llvm::StringRef backendName,CreateAndInitFunction createAndInitFunction,ElemKind interpElemKind,ElemKind backendElemKind,float allowedError,unsigned count,bool convertToRowwiseQuantization,quantization::Schema schema,ElemKind biasElemKind,bool forceFP16AccumSLS,PrecisionConfiguration::Float16Format float16Format,bool convertToChannelwiseQuantization,bool skipQuantizeFCBias)268 void compareAgainstInterpreter(
269 llvm::StringRef backendName, CreateAndInitFunction createAndInitFunction,
270 ElemKind interpElemKind, ElemKind backendElemKind, float allowedError,
271 unsigned count, bool convertToRowwiseQuantization,
272 quantization::Schema schema, ElemKind biasElemKind, bool forceFP16AccumSLS,
273 PrecisionConfiguration::Float16Format float16Format,
274 bool convertToChannelwiseQuantization, bool skipQuantizeFCBias) {
275 // Note: deviceMemory = 0 is a signal to use the defaultMemory.
276 ExecutionEngine IEE{"Interpreter", /* deviceMemory */ 0,
277 /* ignoreUserDeviceConfig */ true};
278 ExecutionEngine BEE{backendName};
279 PlaceholderBindings iBindings, bBindings;
280
281 LOG(INFO) << "Comparing Interpreter with precision "
282 << Type::getElementName(interpElemKind).str() << " against "
283 << backendName.str() << " with precision "
284 << Type::getElementName(backendElemKind).str() << " with Bias "
285 << (skipQuantizeFCBias ? "unquantized"
286 : Type::getElementName(biasElemKind).str())
287 << " with FP16 AccumulationSLS " << forceFP16AccumSLS;
288
289 // Create the same network on the interpreter and the backend being tested.
290 FunctionTensorPair IFT = createAndInitFunction(iBindings, IEE);
291 FunctionTensorPair BFT = createAndInitFunction(bBindings, BEE);
292
293 Function *IF = IFT.first;
294
295 // Set up the configs for interpreter and backend. If one or both functions
296 // will be quantized, then gather a profile the graph on the interpreter, and
297 // then quantize the Functions as requested.
298 LoweredInfoMap ILIM, BLIM;
299 auto configs = setupInterpAndBackendConfigs(
300 IF, IEE, iBindings, ILIM, bBindings, BLIM, interpElemKind,
301 backendElemKind, schema, convertToRowwiseQuantization,
302 createAndInitFunction, biasElemKind, forceFP16AccumSLS, float16Format,
303 count, convertToChannelwiseQuantization, skipQuantizeFCBias);
304 CompilationContext &cctxI = configs.first;
305 CompilationContext &cctxB = configs.second;
306
307 // Skip conversion for rowwise quantized tests as they are a special case
308 // which don't fit cleanly here -- e.g. RWQ-SLS has FloatTy outputs.
309 if (!convertToRowwiseQuantization) {
310 // We want to compare the ops themselves and not see differences in
311 // conversion, so fold ElemKind conversion nodes into IO.
312 cctxI.optimizationOpts.foldElemKindConversionIntoIO = true;
313 cctxB.optimizationOpts.foldElemKindConversionIntoIO = true;
314 }
315
316 // Clone the Function inside itself many times if desired.
317 std::unordered_set<Tensor *> resultTensors =
318 cloneFunInsideFun(BFT, &bBindings, cctxB, count);
319 assert(resultTensors.size() == count &&
320 "Should get the same number of Tensors back as count.");
321
322 IEE.compile(cctxI);
323 BEE.compile(cctxB);
324
325 // Again skip rowwise quantization as before.
326 if (!convertToRowwiseQuantization) {
327 // Now that we have compiled, precision transformation has occurred. Now
328 // convert all mismatches for Placeholders given their original bindings.
329 convertBindingsToCorrectType(IEE.getSingleFunctionFromModule(), iBindings);
330 convertBindingsToCorrectType(BEE.getSingleFunctionFromModule(), bBindings);
331 }
332
333 IEE.run(iBindings);
334 BEE.run(bBindings);
335
336 // Compare each of our result tensors to the original. Always convert back to
337 // float if necessary, as allowed error is expected to compare float.
338 Tensor finalIT = convertToFloatIfNecessary(*IFT.second);
339 for (Tensor *T : resultTensors) {
340 Tensor finalBT = convertToFloatIfNecessary(*T);
341 EXPECT_TRUE(finalIT.isEqual(finalBT, allowedError, /* verbose */ true));
342 }
343
344 // Additionally check that each of the results from the parallel cloned
345 // Functions are bitwise equal.
346 auto it = resultTensors.begin();
347 Tensor *firstResult = *it;
348 for (it++; it != resultTensors.end(); it++) {
349 EXPECT_TRUE(firstResult->isBitwiseEqual(**it));
350 }
351 }
352
cloneFunInsideFun(FunctionTensorPair FTP,PlaceholderBindings * bindings,CompilationContext & cctx,unsigned count)353 std::unordered_set<Tensor *> cloneFunInsideFun(FunctionTensorPair FTP,
354 PlaceholderBindings *bindings,
355 CompilationContext &cctx,
356 unsigned count) {
357 Function *origF = FTP.first;
358
359 // Always save the original Function's Tensor, which we will keep around.
360 std::unordered_set<Tensor *> resultTensors;
361 resultTensors.insert(FTP.second);
362
363 // Nothing to do if we just want the one.
364 if (count == 1) {
365 return resultTensors;
366 }
367
368 Module *mod = origF->getParent();
369
370 // Clone the original Function to repeatedly add it to the original.
371 auto *cloneF = origF->clone("single_clone");
372
373 // We keep the original Function, then clone/add count-1 more.
374 for (size_t i = 1; i < count; i++) {
375 // Clone the clone, and then add all the new nodes to the original function.
376 auto *tmpF = cloneF->clone("tmp" + std::to_string(i));
377 std::unordered_set<Node *> clonedNodes;
378 bool foundSaveNode = false;
379 for (auto &N : tmpF->getNodes()) {
380 clonedNodes.insert(&N);
381
382 // For every Node we add, check if it uses a Placeholder node, and if so
383 // clone it in the Module so that CSE doesn't undo all our hard work.
384 for (size_t j = 0, f = N.getNumInputs(); j < f; j++) {
385 Placeholder *origPH = llvm::dyn_cast<Placeholder>(N.getNthInput(j));
386 if (!origPH) {
387 continue;
388 }
389
390 // Clone the Placeholder, allocate it in the bindings, and replace the
391 // usage of the original node to point to the clone.
392 Placeholder *clonePH = mod->createPlaceholder(
393 origPH->getType(), origPH->getName(), origPH->isTraining());
394 Tensor *oldT = bindings->get(origPH);
395 assert(oldT);
396 Tensor *newT = bindings->allocate(clonePH);
397 newT->assign(oldT);
398 N.setNthInput(j, clonePH);
399
400 // Save the result Tensors to return so we can compare the results of
401 // all of our clones.
402 if (llvm::isa<SaveNode>(N)) {
403 assert(!foundSaveNode &&
404 "Can only handle Functions with a single SaveNode.");
405 foundSaveNode = true;
406 resultTensors.insert(newT);
407 }
408 }
409 }
410 for (auto &N : clonedNodes) {
411 origF->takeOwnershipOfNode(N);
412 }
413 mod->eraseFunction(tmpF);
414 }
415 // Now erase the clone we used to copy in, as it's no longer needed.
416 mod->eraseFunction(cloneF);
417
418 // Finally, duplicate all of the node profiling infos with the new expected
419 // clone's name so that the cloned copies will find the same profiling info
420 // as the original node if being quantized.
421 auto &origInfos = cctx.precisionConfig.quantConfig.infos;
422 origInfos.reserve(count * origInfos.size());
423 std::vector<NodeProfilingInfo> newInfos;
424 newInfos.reserve((count - 1) * origInfos.size());
425 for (const auto &PI : origInfos) {
426 const size_t colonIdx = PI.nodeOutputName_.find(":");
427 assert(colonIdx != std::string::npos && "Name should always contain ':'");
428 for (size_t i = 1; i < count; i++) {
429 std::string newName(PI.nodeOutputName_);
430 // Cloned nodes end up with the original name plus the count number
431 // appended to their name due to uniquing. Replicate the same thing.
432 newName.insert(colonIdx, std::to_string(i));
433 newInfos.emplace_back(newName, PI.tensorProfilingParams_);
434 }
435 }
436 origInfos.insert(origInfos.end(), newInfos.begin(), newInfos.end());
437
438 return resultTensors;
439 }
440
countNodeKind(Function * F,Kinded::Kind kind)441 unsigned countNodeKind(Function *F, Kinded::Kind kind) {
442 unsigned count = 0;
443 for (auto &n : F->getNodes()) {
444 if (n.getKind() == kind) {
445 count++;
446 }
447 }
448 return count;
449 }
450
inferIntLookupTableNet(Tensor * input,Tensor * out,llvm::ArrayRef<int8_t> table,llvm::StringRef kind)451 void inferIntLookupTableNet(Tensor *input, Tensor *out,
452 llvm::ArrayRef<int8_t> table,
453 llvm::StringRef kind) {
454 PlaceholderBindings bindings;
455 ExecutionEngine EE(kind);
456 auto &mod = EE.getModule();
457 Function *F = mod.createFunction("main");
458 auto outTy = mod.uniqueType(ElemKind::Int8QTy, {(dim_t)input->size()}, 3, 3);
459 auto var = createQuantizedPlaceholder(mod, bindings, input,
460 input->getType().getScale(),
461 input->getType().getOffset(), "var");
462 auto *lookupTable = F->createIntLookupTable("lookuptable", var, table, outTy);
463 auto *result = F->createSave("ret", lookupTable);
464 auto *resultTensor = bindings.allocate(result->getPlaceholder());
465
466 EE.compile(CompilationMode::Infer);
467 bindings.allocate(mod.getPlaceholders());
468
469 updateInputPlaceholders(bindings, {var}, {input});
470 EE.run(bindings);
471 out->assign(resultTensor);
472 }
473
inferConvNet(Tensor * inputs,Tensor * filter,Tensor * bias,Tensor * out,llvm::StringRef kind)474 void inferConvNet(Tensor *inputs, Tensor *filter, Tensor *bias, Tensor *out,
475 llvm::StringRef kind) {
476 PlaceholderBindings bindings;
477 ExecutionEngine EE(kind);
478 auto &mod = EE.getModule();
479 Function *F = mod.createFunction("main");
480 Placeholder *inputP;
481 Placeholder *filterP;
482 Placeholder *biasP;
483 Placeholder *outP;
484 TypeRef OT;
485 if (inputs->getType().isQuantizedType()) {
486 auto &outType = out->getType();
487 auto &inType = inputs->getType();
488 auto &filterType = filter->getType();
489 auto &biasType = bias->getType();
490 inputP = createQuantizedPlaceholder(
491 mod, bindings, inputs, inType.getScale(), inType.getOffset(), "inputP");
492 filterP =
493 createQuantizedPlaceholder(mod, bindings, filter, filterType.getScale(),
494 filterType.getOffset(), "filterP");
495 biasP = createQuantizedPlaceholder(mod, bindings, bias, biasType.getScale(),
496 biasType.getOffset(), "biasP");
497 outP = createQuantizedPlaceholder(mod, bindings, out, outType.getScale(),
498 outType.getOffset(), "outP");
499 OT = F->getParent()->uniqueType(out->getElementType(), out->dims(),
500 outType.getScale(), outType.getOffset());
501 } else {
502 inputP = createPlaceholder(mod, bindings, inputs, "inputP");
503 filterP = createPlaceholder(mod, bindings, filter, "filterP");
504 biasP = createPlaceholder(mod, bindings, bias, "biasP");
505 outP = createPlaceholder(mod, bindings, out, "outP");
506 OT = F->getParent()->uniqueType(out->getElementType(), out->dims());
507 }
508 auto *conv = F->createConv("conv", inputP, filterP, biasP, OT, 5, 3, 4, 1);
509 auto *result = F->createSave("ret", conv, outP);
510 auto *resultTensor = bindings.get(result->getPlaceholder());
511
512 EE.compile(CompilationMode::Infer);
513
514 updateInputPlaceholders(bindings, {inputP, filterP, biasP},
515 {inputs, filter, bias});
516 EE.run(bindings);
517 out->assign(resultTensor);
518 }
519
trainConvNet(Tensor * inputs,Tensor * kernel1,Tensor * bias1,Tensor * kernel2,Tensor * bias2,Tensor * selected,llvm::ArrayRef<dim_t> shape1,llvm::ArrayRef<dim_t> shape2,Tensor * out,llvm::StringRef kind)520 void trainConvNet(Tensor *inputs, Tensor *kernel1, Tensor *bias1,
521 Tensor *kernel2, Tensor *bias2, Tensor *selected,
522 llvm::ArrayRef<dim_t> shape1, llvm::ArrayRef<dim_t> shape2,
523 Tensor *out, llvm::StringRef kind) {
524 ExecutionEngine EET(kind);
525 ExecutionEngine EEI(kind);
526 std::vector<ExecutionEngine *> engines;
527 engines.push_back(&EEI);
528 engines.push_back(&EET);
529 TrainingConfig TC;
530 PlaceholderBindings bindings, inferBindings, trainingBindings;
531
532 // This variable records the number of the next sample to be used for
533 // training.
534 size_t sampleCounter = 0;
535
536 TC.learningRate = 0.03;
537 TC.momentum = 0.3;
538 TC.L2Decay = 0.01;
539 Function *F;
540 Placeholder *var1, *var2;
541 for (auto *EE : engines) {
542 auto &mod = EE->getModule();
543 F = mod.createFunction("main");
544 var1 = createPlaceholder(mod, bindings, inputs, "var1");
545 var2 = createPlaceholder(mod, bindings, selected, "var2");
546 auto *conv1 = F->createConv(bindings, "conv1", var1, 3, {5, 3}, {2, 1},
547 {2, 1, 2, 1}, 1);
548 bindings.get(cast<Placeholder>(conv1->getFilter()))->assign(kernel1);
549 bindings.get(cast<Placeholder>(conv1->getBias()))->assign(bias1);
550 auto *reshape1 = F->createReshape("reshape1", conv1, shape1);
551 auto *conv2 = F->createConv(bindings, "conv2", reshape1, 2, 2, 2, 0, 1);
552 bindings.get(cast<Placeholder>(conv2->getFilter()))->assign(kernel2);
553 bindings.get(cast<Placeholder>(conv2->getBias()))->assign(bias2);
554 auto *reshape2 = F->createReshape("reshape2", conv2, shape2);
555 auto *softmax = F->createSoftMax("softmax", reshape2, var2);
556 F->createSave("ret", softmax);
557 }
558
559 auto *TF = glow::differentiate(F, TC);
560 auto tfName = TF->getName();
561 auto fName = F->getName();
562 EET.compile(CompilationMode::Train);
563 trainingBindings.allocate(EET.getModule().getPlaceholders());
564 inferBindings.allocate(EEI.getModule().getPlaceholders());
565 bindings.copyTrainableWeightsTo(trainingBindings);
566 auto *res =
567 inferBindings.get(EEI.getModule().getPlaceholderByNameSlow("ret"));
568
569 runBatch(EET, trainingBindings, 8, sampleCounter, {var1, var2},
570 {inputs, selected}, tfName);
571 trainingBindings.copyTrainableWeightsTo(inferBindings);
572 EEI.compile(CompilationMode::Infer);
573 var1 = inferBindings.getPlaceholderByNameSlow("var1");
574 var2 = inferBindings.getPlaceholderByNameSlow("var2");
575 updateInputPlaceholders(inferBindings, {var1, var2}, {inputs, selected});
576 EEI.run(inferBindings, fName);
577 out->assign(res);
578 }
579
inferLocalResponseNormalizationNet(Tensor * inputs,Tensor * out,llvm::StringRef kind)580 void inferLocalResponseNormalizationNet(Tensor *inputs, Tensor *out,
581 llvm::StringRef kind) {
582 PlaceholderBindings bindings;
583 ExecutionEngine EE(kind);
584 auto &mod = EE.getModule();
585 Function *F = mod.createFunction("main");
586 auto *var = createPlaceholder(mod, bindings, inputs, "var");
587 auto *lrn = F->createLocalResponseNormalization("lrn", var, 5, 3.0, 0.5, 1.5);
588 auto *result = F->createSave("ret", lrn);
589 auto *resultTensor = bindings.allocate(result->getPlaceholder());
590
591 EE.compile(CompilationMode::Infer);
592
593 updateInputPlaceholders(bindings, {var}, {inputs});
594 EE.run(bindings);
595 out->assign(resultTensor);
596 }
597
trainLocalResponseNormalizationNet(Tensor * inputs,Tensor * weights,Tensor * bias,Tensor * selected,llvm::ArrayRef<dim_t> shape1,llvm::ArrayRef<dim_t> shape2,Tensor * out,llvm::StringRef kind)598 void trainLocalResponseNormalizationNet(Tensor *inputs, Tensor *weights,
599 Tensor *bias, Tensor *selected,
600 llvm::ArrayRef<dim_t> shape1,
601 llvm::ArrayRef<dim_t> shape2,
602 Tensor *out, llvm::StringRef kind) {
603 PlaceholderBindings bindings, trainingBindings;
604 ExecutionEngine EET(kind);
605 ExecutionEngine EEI(kind);
606 std::vector<ExecutionEngine *> engines{&EEI, &EET};
607 TrainingConfig TC;
608
609 // This variable records the number of the next sample to be used for
610 // training.
611 size_t sampleCounter = 0;
612
613 TC.learningRate = 0.06;
614 TC.momentum = 0.1;
615 TC.L2Decay = 0.01;
616 Placeholder *var1, *var2;
617 std::string fName;
618 for (auto *EE : engines) {
619 auto &mod = EE->getModule();
620 Function *F = mod.createFunction("main");
621 fName = F->getName();
622 var1 = createPlaceholder(mod, bindings, inputs, "var1");
623 var2 = createPlaceholder(mod, bindings, selected, "var2");
624 auto *fc = F->createFullyConnected(bindings, "fc", var1, bias->dims()[0]);
625 bindings.get(cast<Placeholder>(fc->getWeights()))->assign(weights);
626 bindings.get(cast<Placeholder>(fc->getBias()))->assign(bias);
627 auto *reshape1 = F->createReshape("reshape1", fc, shape1);
628 auto *lrn =
629 F->createLocalResponseNormalization("lrn", reshape1, 2, 2.0, 0.5, 1.0);
630 auto *reshape2 = F->createReshape("reshape2", lrn, shape2);
631 auto *softmax = F->createSoftMax("softmax", reshape2, var2);
632 auto *result = F->createSave("ret", softmax);
633 bindings.allocate(result->getPlaceholder());
634 }
635 auto *TF = glow::differentiate(EET.getModule().getFunction(fName), TC);
636 auto tfName = TF->getName();
637 EET.compile(CompilationMode::Train);
638 trainingBindings.allocate(EET.getModule().getPlaceholders());
639 bindings.copyTrainableWeightsTo(trainingBindings);
640 bindings.clear();
641 bindings.allocate(EEI.getModule().getPlaceholders());
642
643 runBatch(EET, trainingBindings, 8, sampleCounter, {var1, var2},
644 {inputs, selected}, tfName);
645 trainingBindings.copyTrainableWeightsTo(bindings);
646 var1 = bindings.getPlaceholderByNameSlow("var1");
647 var2 = bindings.getPlaceholderByNameSlow("var2");
648 EEI.compile(CompilationMode::Infer);
649
650 runBatch(EEI, bindings, 1, sampleCounter, {var1, var2}, {inputs, selected});
651 out->assign(bindings.get(bindings.getPlaceholderByNameSlow("ret")));
652 }
653
trainAvgPoolNet(Tensor * inputs,Tensor * weights,Tensor * bias,Tensor * selected,llvm::ArrayRef<dim_t> shape1,llvm::ArrayRef<dim_t> shape2,Tensor * out,llvm::StringRef kind)654 void trainAvgPoolNet(Tensor *inputs, Tensor *weights, Tensor *bias,
655 Tensor *selected, llvm::ArrayRef<dim_t> shape1,
656 llvm::ArrayRef<dim_t> shape2, Tensor *out,
657 llvm::StringRef kind) {
658 ExecutionEngine EET(kind);
659 ExecutionEngine EEI(kind);
660 std::vector<ExecutionEngine *> engines{&EEI, &EET};
661 TrainingConfig TC;
662 PlaceholderBindings bindings, trainingBindings;
663
664 // This variable records the number of the next sample to be used for
665 // training.
666 size_t sampleCounter = 0;
667
668 TC.learningRate = 0.01;
669 TC.momentum = 0.4;
670 TC.L2Decay = 0.01;
671 Placeholder *var1, *var2;
672 std::string fName;
673 for (auto *EE : engines) {
674 auto &mod = EE->getModule();
675 Function *F = mod.createFunction("main");
676 fName = F->getName();
677 var1 = createPlaceholder(mod, bindings, inputs, "var1");
678 var2 = createPlaceholder(mod, bindings, selected, "var2");
679 auto *fc = F->createFullyConnected(bindings, "fc", var1, bias->dims()[0]);
680 bindings.get(cast<Placeholder>(fc->getWeights()))->assign(weights);
681 bindings.get(cast<Placeholder>(fc->getBias()))->assign(bias);
682 auto *reshape1 = F->createReshape("reshape1", fc, shape1);
683 auto *pool = F->createAvgPool("pool", reshape1, 2, 2, 0);
684 auto *reshape2 = F->createReshape("reshape2", pool, shape2);
685 auto *softmax = F->createSoftMax("softmax", reshape2, var2);
686 auto *result = F->createSave("ret", softmax);
687 bindings.allocate(result->getPlaceholder());
688 }
689 auto *TF = glow::differentiate(EET.getModule().getFunction("main"), TC);
690 auto tfName = TF->getName();
691 EET.compile(CompilationMode::Train);
692 trainingBindings.allocate(EET.getModule().getPlaceholders());
693 bindings.copyTrainableWeightsTo(trainingBindings);
694 bindings.clear();
695 bindings.allocate(EEI.getModule().getPlaceholders());
696
697 runBatch(EET, trainingBindings, 10, sampleCounter, {var1, var2},
698 {inputs, selected}, tfName);
699 trainingBindings.copyTrainableWeightsTo(bindings);
700 var1 = bindings.getPlaceholderByNameSlow("var1");
701 var2 = bindings.getPlaceholderByNameSlow("var2");
702 EEI.compile(CompilationMode::Infer);
703
704 updateInputPlaceholders(bindings, {var1, var2}, {inputs, selected});
705 EEI.run(bindings);
706 out->assign(bindings.get(bindings.getPlaceholderByNameSlow("ret")));
707 }
708
trainMaxPoolNet(Tensor * inputs,Tensor * weights,Tensor * bias,Tensor * selected,llvm::ArrayRef<dim_t> shape1,llvm::ArrayRef<dim_t> shape2,Tensor * out,llvm::StringRef kind)709 void trainMaxPoolNet(Tensor *inputs, Tensor *weights, Tensor *bias,
710 Tensor *selected, llvm::ArrayRef<dim_t> shape1,
711 llvm::ArrayRef<dim_t> shape2, Tensor *out,
712 llvm::StringRef kind) {
713 ExecutionEngine EET(kind);
714 ExecutionEngine EEI(kind);
715 std::vector<ExecutionEngine *> engines;
716 engines.push_back(&EEI);
717 engines.push_back(&EET);
718 TrainingConfig TC;
719 PlaceholderBindings bindings, inferBindings, trainingBindings;
720
721 // This variable records the number of the next sample to be used for
722 // training.
723 size_t sampleCounter = 0;
724
725 TC.learningRate = 0.03;
726 TC.momentum = 0.3;
727 TC.L2Decay = 0.003;
728 Function *F;
729 Placeholder *var1, *var2;
730 for (auto *EE : engines) {
731 bindings.clear();
732 auto &mod = EE->getModule();
733 F = mod.createFunction("main");
734 var1 = createPlaceholder(mod, bindings, inputs, "var1");
735 var2 = createPlaceholder(mod, bindings, selected, "var2");
736 auto *fc = F->createFullyConnected(bindings, "fc", var1, bias->dims()[0]);
737 bindings.get(cast<Placeholder>(fc->getWeights()))->assign(weights);
738 bindings.get(cast<Placeholder>(fc->getBias()))->assign(bias);
739 auto *reshape1 = F->createReshape("reshape1", fc, shape1);
740 auto *pool = F->createMaxPool("pool", reshape1, 5, 3, 4);
741 auto *reshape2 = F->createReshape("reshape2", pool->getResult(), shape2);
742 auto *softmax = F->createSoftMax("softmax", reshape2, var2);
743 F->createSave("ret", softmax);
744 }
745 auto *TF = glow::differentiate(F, TC);
746 auto fName = F->getName();
747 auto tfName = TF->getName();
748 EET.compile(CompilationMode::Train);
749 trainingBindings.allocate(EET.getModule().getPlaceholders());
750 inferBindings.allocate(EEI.getModule().getPlaceholders());
751 bindings.copyTrainableWeightsTo(trainingBindings);
752 auto *res =
753 inferBindings.get(EEI.getModule().getPlaceholderByNameSlow("ret"));
754
755 runBatch(EET, trainingBindings, 7, sampleCounter, {var1, var2},
756 {inputs, selected}, tfName);
757 trainingBindings.copyTrainableWeightsTo(inferBindings);
758 EEI.compile(CompilationMode::Infer);
759 var1 = inferBindings.getPlaceholderByNameSlow("var1");
760 var2 = inferBindings.getPlaceholderByNameSlow("var2");
761 runBatch(EEI, inferBindings, 1, sampleCounter, {var1, var2},
762 {inputs, selected}, fName);
763 out->assign(res);
764 }
765
inferSmallConv(Tensor * inputs,Tensor * out,llvm::StringRef kind)766 void inferSmallConv(Tensor *inputs, Tensor *out, llvm::StringRef kind) {
767 PlaceholderBindings bindings;
768 ExecutionEngine EE(kind);
769 auto &mod = EE.getModule();
770 auto *F = mod.createFunction("main");
771 auto *in = createPlaceholder(mod, bindings, inputs, "in", "NHWC");
772 auto *C = F->createConv(bindings, "conv2a", in, 64, 1, 1, 0, 1);
773 bindings.get(cast<Placeholder>(C->getFilter()))->getHandle().clear(0.3);
774 bindings.get(cast<Placeholder>(C->getBias()))->getHandle().clear(0.4);
775 auto *result = F->createSave("ret", C);
776 auto *resultTensor = bindings.allocate(result->getPlaceholder());
777 convertPlaceholdersToConstants(F, bindings, {in, result->getPlaceholder()});
778
779 EE.compile(CompilationMode::Infer);
780
781 updateInputPlaceholders(bindings, {in}, {inputs});
782 EE.run(bindings);
783
784 out->assign(resultTensor);
785 }
786
inferGroupConv(Tensor * out,llvm::StringRef kind)787 void inferGroupConv(Tensor *out, llvm::StringRef kind) {
788 PlaceholderBindings bindings;
789 ExecutionEngine EE(kind);
790 auto &mod = EE.getModule();
791 auto *F = mod.createFunction("main");
792
793 auto *input =
794 mod.createPlaceholder(ElemKind::FloatTy, {1, 2, 1, 32}, "input", false);
795 auto *inputTensor = bindings.allocate(input);
796 auto IH = inputTensor->getHandle();
797 for (size_t i = 0; i < 2 * 32; i++) {
798 IH.raw(i) = (i + 1) / 10.0;
799 }
800
801 auto *filter = mod.createPlaceholder(ElemKind::FloatTy, {128, 1, 1, 16},
802 "filter", false);
803 auto *filterTensor = bindings.allocate(filter);
804 auto FH = filterTensor->getHandle();
805 for (dim_t i = 0; i < 128; i++)
806 for (dim_t j = 0; j < 16; j++) {
807 FH.at({i, 0, 0, j}) = (i + j) / 100.0;
808 }
809 auto *zeroBias =
810 mod.createPlaceholder(ElemKind::FloatTy, {128}, "bias", false);
811 auto *zeroBiasTensor = bindings.allocate(zeroBias);
812 zeroBiasTensor->zero();
813
814 auto outTy = mod.uniqueType(ElemKind::FloatTy, {1, 2, 1, 128});
815
816 ConvolutionNode *CN =
817 F->createConv("Conv", input, filter, zeroBias, outTy, 1, 1, 0, 2);
818 SaveNode *result = F->createSave("save", CN);
819 auto *resultTensor = bindings.allocate(result->getPlaceholder());
820
821 EE.compile(CompilationMode::Infer);
822
823 EE.run(bindings);
824 out->assign(resultTensor);
825 }
826
inferNonSquarePaddingConv(Tensor * out,llvm::StringRef kind)827 void inferNonSquarePaddingConv(Tensor *out, llvm::StringRef kind) {
828 PlaceholderBindings bindings;
829 ExecutionEngine EE(kind);
830 auto &mod = EE.getModule();
831 auto *F = mod.createFunction("main");
832
833 auto *input =
834 mod.createPlaceholder(ElemKind::FloatTy, {1, 2, 1, 32}, "input", false);
835 auto *inputTensor = bindings.allocate(input);
836 auto IH = inputTensor->getHandle();
837 for (size_t i = 0; i < 2 * 32; i++) {
838 IH.raw(i) = (i + 1) / 10.0;
839 }
840
841 auto *filter = mod.createPlaceholder(ElemKind::FloatTy, {128, 1, 1, 32},
842 "filter", false);
843 auto *filterTensor = bindings.allocate(filter);
844 auto FH = filterTensor->getHandle();
845 for (dim_t i = 0; i < 128; i++)
846 for (dim_t j = 0; j < 32; j++) {
847 FH.at({i, 0, 0, j}) = (i + j) / 100.0;
848 }
849 auto *zeroBias =
850 mod.createPlaceholder(ElemKind::FloatTy, {128}, "bias", false);
851 auto *zeroBiasTensor = bindings.allocate(zeroBias);
852 zeroBiasTensor->zero();
853 auto outTy = mod.uniqueType(ElemKind::FloatTy, {1, 4, 5, 128});
854
855 ConvolutionNode *CN = F->createConv("Conv", input, filter, zeroBias, outTy,
856 {1, 1}, {1, 1}, {0, 1, 2, 3}, 1);
857 SaveNode *result = F->createSave("save", CN);
858 auto *resultTensor = bindings.allocate(result->getPlaceholder());
859
860 EE.compile(CompilationMode::Infer);
861
862 EE.run(bindings);
863 out->assign(resultTensor);
864 }
865
inferNonSquareKernelConv(Tensor * out,llvm::StringRef kind)866 void inferNonSquareKernelConv(Tensor *out, llvm::StringRef kind) {
867 PlaceholderBindings bindings;
868 ExecutionEngine EE(kind);
869 auto &mod = EE.getModule();
870 auto *F = mod.createFunction("main");
871
872 auto *input =
873 mod.createPlaceholder(ElemKind::FloatTy, {1, 2, 1, 32}, "input", false);
874 auto *inputTensor = bindings.allocate(input);
875 auto IH = inputTensor->getHandle();
876 for (size_t i = 0; i < 2 * 32; i++) {
877 IH.raw(i) = (i + 1) / 10.0;
878 }
879
880 auto *filter = mod.createPlaceholder(ElemKind::FloatTy, {128, 2, 1, 32},
881 "filter", false);
882 auto *filterTensor = bindings.allocate(filter);
883 auto FH = filterTensor->getHandle();
884 for (dim_t i = 0; i < 128; i++)
885 for (dim_t j = 0; j < 2; j++)
886 for (dim_t k = 0; k < 32; k++) {
887 FH.at({i, j, 0, k}) = (i + j + k) / 100.0;
888 }
889 auto *zeroBias =
890 mod.createPlaceholder(ElemKind::FloatTy, {128}, "bias", false);
891 auto *zeroBiasTensor = bindings.allocate(zeroBias);
892 zeroBiasTensor->zero();
893 auto outTy = mod.uniqueType(ElemKind::FloatTy, {1, 3, 5, 128});
894
895 ConvolutionNode *CN = F->createConv("Conv", input, filter, zeroBias, outTy,
896 {2, 1}, {1, 1}, {0, 1, 2, 3}, 1);
897 SaveNode *result = F->createSave("save", CN);
898 auto *resultTensor = bindings.allocate(result->getPlaceholder());
899
900 EE.compile(CompilationMode::Infer);
901
902 EE.run(bindings);
903 out->assign(resultTensor);
904 }
905
inferNonSquareStrideConv(Tensor * out,llvm::StringRef kind)906 void inferNonSquareStrideConv(Tensor *out, llvm::StringRef kind) {
907 PlaceholderBindings bindings;
908 ExecutionEngine EE(kind);
909 auto &mod = EE.getModule();
910 auto *F = mod.createFunction("main");
911
912 auto *input =
913 mod.createPlaceholder(ElemKind::FloatTy, {1, 2, 1, 32}, "input", false);
914 auto *inputTensor = bindings.allocate(input);
915 auto IH = inputTensor->getHandle();
916 for (size_t i = 0; i < 2 * 32; i++) {
917 IH.raw(i) = (i + 1) / 10.0;
918 }
919
920 auto *filter = mod.createPlaceholder(ElemKind::FloatTy, {128, 2, 1, 32},
921 "filter", false);
922 auto *filterTensor = bindings.allocate(filter);
923 auto FH = filterTensor->getHandle();
924 for (dim_t i = 0; i < 128; i++)
925 for (dim_t j = 0; j < 2; j++)
926 for (dim_t k = 0; k < 32; k++) {
927 FH.at({i, j, 0, k}) = (i + j + k) / 100.0;
928 }
929 auto *zeroBias =
930 mod.createPlaceholder(ElemKind::FloatTy, {128}, "bias", false);
931 auto *zeroBiasTensor = bindings.allocate(zeroBias);
932 zeroBiasTensor->zero();
933 auto outTy = mod.uniqueType(ElemKind::FloatTy, {1, 2, 5, 128});
934
935 ConvolutionNode *CN = F->createConv("Conv", input, filter, zeroBias, outTy,
936 {2, 1}, {2, 1}, {0, 1, 2, 3}, 1);
937 SaveNode *result = F->createSave("save", CN);
938 auto *resultTensor = bindings.allocate(result->getPlaceholder());
939
940 EE.compile(CompilationMode::Infer);
941
942 EE.run(bindings);
943 out->assign(resultTensor);
944 }
945
inferConvDKKC8(Tensor * out,llvm::StringRef kind)946 void inferConvDKKC8(Tensor *out, llvm::StringRef kind) {
947 PlaceholderBindings bindings;
948 ExecutionEngine EE(kind);
949 auto &mod = EE.getModule();
950 auto *F = mod.createFunction("main");
951
952 auto *input =
953 mod.createPlaceholder(ElemKind::FloatTy, {3, 3, 3, 32}, "input", false);
954 auto *inputTensor = bindings.allocate(input);
955 auto IH = inputTensor->getHandle();
956 for (size_t i = 0; i < 3 * 3 * 3 * 32; i++) {
957 IH.raw(i) = (i + 1) / 10.0;
958 }
959
960 auto *filter = mod.createPlaceholder(ElemKind::FloatTy, {192, 3, 3, 32},
961 "filter", false);
962 auto *filterTensor = bindings.allocate(filter);
963 filterTensor->zero();
964 auto FH = filterTensor->getHandle();
965 for (dim_t i = 0; i < 192; i++)
966 for (dim_t j = 0; j < 3; j++)
967 for (dim_t k = 0; k < 3; k++)
968 for (dim_t l = 0; l < 32; l++) {
969 FH.at({i, j, k, k}) = (i + j + k + l) / 200.0;
970 }
971 auto *zeroBias =
972 mod.createPlaceholder(ElemKind::FloatTy, {192}, "bias", false);
973 auto *zeroBiasTensor = bindings.allocate(zeroBias);
974 zeroBiasTensor->zero();
975 auto outTy = mod.uniqueType(ElemKind::FloatTy, {3, 3, 3, 192});
976
977 ConvolutionNode *CN = F->createConv("Conv", input, filter, zeroBias, outTy,
978 {3, 3}, {1, 1}, {1, 1, 1, 1}, 1);
979 SaveNode *result = F->createSave("save", CN);
980 auto *resultTensor = bindings.allocate(result->getPlaceholder());
981
982 EE.compile(CompilationMode::Infer);
983
984 EE.run(bindings);
985 out->assign(resultTensor);
986 }
987
trainSoftMaxNet(Tensor * inputs,Tensor * weights,Tensor * bias,Tensor * selected,Tensor * out,llvm::StringRef kind)988 void trainSoftMaxNet(Tensor *inputs, Tensor *weights, Tensor *bias,
989 Tensor *selected, Tensor *out, llvm::StringRef kind) {
990 ExecutionEngine EEI(kind);
991 ExecutionEngine EET(kind);
992 std::vector<ExecutionEngine *> engines;
993 engines.push_back(&EEI);
994 engines.push_back(&EET);
995 TrainingConfig TC;
996 PlaceholderBindings bindings, inferBindings, trainingBindings;
997
998 // This variable records the number of the next sample to be used for
999 // training.
1000 size_t sampleCounter = 0;
1001
1002 TC.learningRate = 0.003;
1003 TC.momentum = 0.7;
1004 TC.L2Decay = 0.001;
1005 Function *F;
1006 Placeholder *var1, *var2;
1007 for (auto *EE : engines) {
1008 auto &mod = EE->getModule();
1009 F = mod.createFunction("main");
1010 var1 = createPlaceholder(mod, bindings, inputs, "var1");
1011 var2 = createPlaceholder(mod, bindings, selected, "var2");
1012 auto *fc = F->createFullyConnected(bindings, "fc", var1, bias->dims()[0]);
1013 bindings.get(cast<Placeholder>(fc->getWeights()))->assign(weights);
1014 bindings.get(cast<Placeholder>(fc->getBias()))->assign(bias);
1015 auto *softmax = F->createSoftMax("softmax", fc, var2);
1016 F->createSave("ret", softmax);
1017 }
1018
1019 auto *TF = glow::differentiate(F, TC);
1020 auto tfName = TF->getName();
1021 auto fName = F->getName();
1022
1023 EET.compile(CompilationMode::Train);
1024 trainingBindings.allocate(EET.getModule().getPlaceholders());
1025 bindings.copyTrainableWeightsTo(trainingBindings);
1026 runBatch(EET, trainingBindings, 30, sampleCounter, {var1, var2},
1027 {inputs, selected}, tfName);
1028 EEI.compile(CompilationMode::Infer);
1029 inferBindings.allocate(EEI.getModule().getPlaceholders());
1030 trainingBindings.copyTrainableWeightsTo(inferBindings);
1031 auto *res =
1032 inferBindings.get(EEI.getModule().getPlaceholderByNameSlow("ret"));
1033 var1 = inferBindings.getPlaceholderByNameSlow("var1");
1034 var2 = inferBindings.getPlaceholderByNameSlow("var2");
1035 updateInputPlaceholders(inferBindings, {var1, var2}, {inputs, selected});
1036 EEI.run(inferBindings, fName);
1037 out->assign(res);
1038 }
1039
inferTanhConcatNet(Tensor * input1,Tensor * input2,Tensor * input3,Tensor * out,llvm::StringRef kind)1040 void inferTanhConcatNet(Tensor *input1, Tensor *input2, Tensor *input3,
1041 Tensor *out, llvm::StringRef kind) {
1042 PlaceholderBindings bindings;
1043 ExecutionEngine EE(kind);
1044 auto &mod = EE.getModule();
1045 Function *F = mod.createFunction("main");
1046 auto *var1 = createPlaceholder(mod, bindings, input1, "var1");
1047 auto *var2 = createPlaceholder(mod, bindings, input2, "var2");
1048 auto *var3 = createPlaceholder(mod, bindings, input3, "var3");
1049 auto *T1 = F->createTanh("tanh1", var1);
1050 auto *T2 = F->createTanh("tanh2", var2);
1051 auto *T3 = F->createTanh("tanh3", var3);
1052 Node *C1 = F->createConcat("concat", {T1, T2}, 0);
1053 Node *C2 = F->createConcat("concat", {T2, T3, C1, T2}, 0);
1054 auto *result = F->createSave("ret", C2);
1055 auto *resultTensor = bindings.allocate(result->getPlaceholder());
1056
1057 EE.compile(CompilationMode::Infer);
1058
1059 updateInputPlaceholders(bindings, {var1, var2, var3},
1060 {input1, input2, input3});
1061 EE.run(bindings);
1062 out->assign(resultTensor);
1063 }
1064
inferBasicConvNet(Tensor * inputs,Tensor * out,llvm::StringRef kind,size_t convDepth)1065 void inferBasicConvNet(Tensor *inputs, Tensor *out, llvm::StringRef kind,
1066 size_t convDepth) {
1067 PlaceholderBindings bindings;
1068 ExecutionEngine EE(kind);
1069 auto &mod = EE.getModule();
1070 Function *F = mod.createFunction("main");
1071 auto *var = createPlaceholder(mod, bindings, inputs, "var", "NCHW");
1072 auto *tr = F->createTranspose("tr", var, NCHW2NHWC);
1073 auto *conv = F->createConv(bindings, "conv", tr, convDepth, {5, 5}, {2, 2},
1074 {1, 1, 1, 1}, 1);
1075 bindings.get(cast<Placeholder>(conv->getFilter()))->getHandle().clear(0.1);
1076 bindings.get(cast<Placeholder>(conv->getBias()))->getHandle().clear(0.2);
1077 auto *pool = F->createMaxPool("pool", conv, 2, 2, 0);
1078 auto *result = F->createSave("ret", pool->getResult());
1079 auto *resultTensor = bindings.allocate(result->getPlaceholder());
1080 convertPlaceholdersToConstants(F, bindings, {var, result->getPlaceholder()});
1081
1082 EE.compile(CompilationMode::Infer);
1083
1084 updateInputPlaceholders(bindings, {var}, {inputs});
1085 EE.run(bindings);
1086 out->assign(resultTensor);
1087 }
1088
createAndInitBasicFCNet(PlaceholderBindings & bindings,ExecutionEngine & EE)1089 FunctionTensorPair createAndInitBasicFCNet(PlaceholderBindings &bindings,
1090 ExecutionEngine &EE) {
1091 auto &mod = EE.getModule();
1092 Function *F = mod.createFunction("main");
1093
1094 auto *var = mod.createPlaceholder(ElemKind::FloatTy, {2, 3, 16, 16}, "var",
1095 false, "NCHW");
1096 auto *tr = F->createTranspose("tr", var, NCHW2NHWC);
1097 auto *fc = F->createFullyConnected(bindings, "fc", tr, 16);
1098 auto *rl0 = F->createRELU("relu", fc);
1099 auto *fc2 = F->createFullyConnected(bindings, "fc2", rl0, 8);
1100 auto *rl1 = F->createRELU("relu", fc2);
1101 bindings.get(cast<Placeholder>(fc->getWeights()))->getHandle().clear(0.8);
1102 bindings.get(cast<Placeholder>(fc2->getWeights()))->getHandle().clear(1.5);
1103 auto *result = F->createSave("ret", rl1);
1104 auto *resultTensor = bindings.allocate(result->getPlaceholder());
1105
1106 PseudoRNG PRNG;
1107 bindings.allocate(var)->getHandle().initXavier(1, PRNG);
1108
1109 return std::make_pair(F, resultTensor);
1110 }
1111
inferMixedNet(Tensor * inputs,Tensor * out,llvm::StringRef kind)1112 void inferMixedNet(Tensor *inputs, Tensor *out, llvm::StringRef kind) {
1113 PlaceholderBindings bindings;
1114 ExecutionEngine EE(kind);
1115 auto &mod = EE.getModule();
1116 Function *F = mod.createFunction("main");
1117 auto *var = createPlaceholder(mod, bindings, inputs, "var", "NCHW");
1118 auto *selected =
1119 mod.createPlaceholder(ElemKind::Int64ITy, {2, 1}, "selected", false);
1120
1121 auto *tr = F->createTranspose("tr", var, NCHW2NHWC);
1122 auto *fc = F->createFullyConnected(bindings, "fc", tr, 16);
1123 auto *th0 = F->createTanh("tanh", fc);
1124 auto *sg0 = F->createSigmoid("sig", fc);
1125 auto *A1 = F->createAdd("add", th0, sg0);
1126 auto *fc2 = F->createFullyConnected(bindings, "fc2", A1, 16);
1127
1128 auto *R = F->createRegression("reg", fc2, fc2);
1129 auto *SM = F->createSoftMax("SM", R, selected);
1130 auto *result = F->createSave("ret", SM);
1131 auto *resultTensor = bindings.allocate(result->getPlaceholder());
1132
1133 bindings.get(cast<Placeholder>(fc->getWeights()))->getHandle().clear(0.4);
1134 bindings.get(cast<Placeholder>(fc2->getWeights()))->getHandle().clear(3.5);
1135
1136 EE.compile(CompilationMode::Infer);
1137
1138 updateInputPlaceholders(bindings, {var}, {inputs});
1139 EE.run(bindings);
1140 out->assign(resultTensor);
1141 }
1142
inferComplexNet1(Tensor * inputs1,Tensor * inputs2,Tensor * inputs3,Tensor * inputs4,Tensor * out,llvm::StringRef kind)1143 void inferComplexNet1(Tensor *inputs1, Tensor *inputs2, Tensor *inputs3,
1144 Tensor *inputs4, Tensor *out, llvm::StringRef kind) {
1145 PlaceholderBindings bindings;
1146 ExecutionEngine EE(kind);
1147 auto &mod = EE.getModule();
1148 Function *F = mod.createFunction("main");
1149 auto *var1 = createPlaceholder(mod, bindings, inputs1, "var1");
1150 auto *var2 = createPlaceholder(mod, bindings, inputs2, "var2");
1151 auto *var3 = createPlaceholder(mod, bindings, inputs3, "var3");
1152 auto *var4 = createPlaceholder(mod, bindings, inputs4, "var4");
1153 auto *conv1 = F->createConv(bindings, "conv1", var1, 6, 4, 1, 2, 1);
1154 bindings.get(cast<Placeholder>(conv1->getFilter()))->getHandle().clear(0.5);
1155 bindings.get(cast<Placeholder>(conv1->getBias()))->getHandle().clear(0.7);
1156 auto *sigmoid1 = F->createSigmoid("sigmoid1", conv1);
1157 auto *fc1 = F->createFullyConnected(bindings, "fc1", var2, 2352);
1158 bindings.get(cast<Placeholder>(fc1->getWeights()))->getHandle().clear(0.6);
1159 auto *reshape1 = F->createReshape("reshape1", fc1, {8, 14, 28, 6}, "NHWC");
1160 auto *relu1 = F->createRELU("relu1", reshape1);
1161 auto *pool1 = F->createMaxPool("pool1", relu1, 2, 2, 1);
1162 auto *add = F->createAdd("add", sigmoid1, pool1->getResult());
1163 auto *tanh = F->createTanh("tanh", add);
1164 auto *fc2 = F->createFullyConnected(bindings, "fc2", var3, 720);
1165 bindings.get(cast<Placeholder>(fc2->getWeights()))->getHandle().clear(1.1);
1166 auto *reshape2 = F->createReshape("reshape2", fc2, {8, 8, 15, 6}, "NHWC");
1167 auto *mul = F->createMul("mul", tanh, reshape2);
1168 auto *sigmoid2 = F->createSigmoid("sigmoid2", mul);
1169 auto *conv2 = F->createConv(bindings, "conv2", sigmoid2, 7, 3, 2, 1, 1);
1170 bindings.get(cast<Placeholder>(conv2->getFilter()))->getHandle().clear(0.3);
1171 bindings.get(cast<Placeholder>(conv2->getBias()))->getHandle().clear(1.3);
1172 auto *reshape3 = F->createReshape("reshape3", conv2, {8, 8, 7, 4}, "NHWC");
1173 auto *sub = F->createSub("sub", reshape3, var4);
1174 auto *relu2 = F->createRELU("relu2", sub);
1175 auto *pool2 = F->createAvgPool("pool2", relu2, 3, 2, 1);
1176 auto *sigmoid3 = F->createSigmoid("sigmoid3", pool2);
1177 auto *result = F->createSave("ret", sigmoid3);
1178 auto *resultTensor = bindings.allocate(result->getPlaceholder());
1179
1180 EE.compile(CompilationMode::Infer);
1181
1182 updateInputPlaceholders(bindings, {var1, var2, var3, var4},
1183 {inputs1, inputs2, inputs3, inputs4});
1184 EE.run(bindings);
1185 out->assign(resultTensor);
1186 }
1187
1188 namespace {
1189 // Helper for initializing conv node filter/bias from input tensors.
initConv(PlaceholderBindings & bindings,ConvolutionNode * C,Tensor & filter,Tensor & bias)1190 static void initConv(PlaceholderBindings &bindings, ConvolutionNode *C,
1191 Tensor &filter, Tensor &bias) {
1192 bindings.get(cast<Placeholder>(C->getFilter()))->assign(&filter);
1193 bindings.get(cast<Placeholder>(C->getBias()))->assign(&bias);
1194 }
1195 } // namespace
1196
inferTinyResnet(Tensor * input,Tensor * out,std::vector<Tensor> & weights,llvm::StringRef kind)1197 void inferTinyResnet(Tensor *input, Tensor *out, std::vector<Tensor> &weights,
1198 llvm::StringRef kind) {
1199 PlaceholderBindings bindings;
1200 ExecutionEngine EE(kind);
1201 auto &mod = EE.getModule();
1202 auto *F = mod.createFunction("main");
1203
1204 auto *in = createPlaceholder(mod, bindings, input, "in", "NHWC");
1205 auto *conv1 = F->createConv(bindings, "conv1", in, 256, 1, 1, 0, 1);
1206 auto *conv2a = F->createConv(bindings, "conv2a", conv1, 64, 1, 1, 0, 1);
1207 auto *relu2a = F->createRELU("relu2a", conv2a);
1208 auto *conv2b = F->createConv(bindings, "conv2b", relu2a, 64, 3, 1, 1, 1);
1209 auto *relu2b = F->createRELU("relu2b", conv2b);
1210 auto *conv2c = F->createConv(bindings, "conv2c", relu2b, 256, 1, 1, 0, 1);
1211 auto *add = F->createAdd("add", conv2c, conv1);
1212 auto *relu = F->createRELU("res2a_relu", add);
1213 auto *result = F->createSave("ret", relu);
1214 auto *resultTensor = bindings.allocate(result->getPlaceholder());
1215
1216 initConv(bindings, conv1, weights[0], weights[1]);
1217 initConv(bindings, conv2a, weights[2], weights[3]);
1218 initConv(bindings, conv2b, weights[4], weights[5]);
1219 initConv(bindings, conv2c, weights[6], weights[7]);
1220 convertPlaceholdersToConstants(F, bindings, {in, result->getPlaceholder()});
1221
1222 EE.compile(CompilationMode::Infer);
1223
1224 updateInputPlaceholders(bindings, {in}, {input});
1225 EE.run(bindings);
1226 out->assign(resultTensor);
1227 }
1228
inferExtract3D(Tensor * input,Tensor * out,llvm::StringRef kind)1229 void inferExtract3D(Tensor *input, Tensor *out, llvm::StringRef kind) {
1230 PlaceholderBindings bindings;
1231 ExecutionEngine EE(kind);
1232 auto &mod = EE.getModule();
1233 auto *F = mod.createFunction("main");
1234
1235 auto *inputs = createPlaceholder(mod, bindings, input, "inputs");
1236
1237 auto *x1 = F->createSlice("ex1", inputs, {0, 5, 0}, {1, 100, 100});
1238 auto *x2 = F->createSlice("ex2", inputs, {1, 5, 0}, {2, 100, 100});
1239 auto *x3 = F->createSlice("ex3", inputs, {2, 5, 0}, {3, 100, 100});
1240 auto *x4 = F->createSlice("ex4", inputs, {3, 5, 0}, {4, 100, 100});
1241
1242 auto *x12 = F->createConcat("x12", {x1, x2}, 1);
1243 auto *x34 = F->createConcat("x34", {x3, x4}, 1);
1244 auto *x13 = F->createConcat("x34", {x1, x3}, 1);
1245 auto *x24 = F->createConcat("x34", {x2, x4}, 1);
1246
1247 auto *add1 = F->createAdd("add1", x12, x34);
1248 auto *add2 = F->createAdd("add1", x13, x24);
1249 auto *add3 = F->createAdd("add1", add1, add2);
1250
1251 auto *e = F->createSlice("slice", add3, {0, 55, 50}, {1, 150, 100});
1252 auto *result = F->createSave("ret", e);
1253 auto *resultTensor = bindings.allocate(result->getPlaceholder());
1254
1255 EE.compile(CompilationMode::Infer);
1256
1257 updateInputPlaceholders(bindings, {inputs}, {input});
1258 EE.run(bindings);
1259 out->assign(resultTensor);
1260 }
1261
inferMaxSplat(Tensor * input,Tensor * out,llvm::StringRef kind)1262 void inferMaxSplat(Tensor *input, Tensor *out, llvm::StringRef kind) {
1263 PlaceholderBindings bindings;
1264 ExecutionEngine EE(kind);
1265 auto &mod = EE.getModule();
1266 Function *F = mod.createFunction("main");
1267
1268 auto T = mod.uniqueType(ElemKind::Int8QTy, input->getType().dims(),
1269 2 * input->getType().getScale(),
1270 -input->getType().getOffset());
1271 auto *var = createQuantizedPlaceholder(mod, bindings, input,
1272 input->getType().getScale(),
1273 input->getType().getOffset(), "var");
1274 auto *rescale = F->createRescaleQuantized("rescale", var, T);
1275
1276 auto *splat1 = F->createSplat("splat1", T, 0.0);
1277 auto *splat2 = F->createSplat("splat2", T, 5.0);
1278
1279 auto *max1 = F->createMax("max1", rescale, splat1);
1280 auto *max2 = F->createMax("max2", splat2, max1);
1281
1282 auto *result = F->createSave("ret", max2);
1283 auto *resultTensor = bindings.allocate(result->getPlaceholder());
1284
1285 EE.compile(CompilationMode::Infer);
1286
1287 updateInputPlaceholders(bindings, {var}, {input});
1288 EE.run(bindings);
1289 out->assign(resultTensor);
1290 }
1291
insertCompiledFunction(llvm::StringRef name,CompiledFunction * func,runtime::DeviceManager * device,Module * mod)1292 void insertCompiledFunction(llvm::StringRef name, CompiledFunction *func,
1293 runtime::DeviceManager *device, Module *mod) {
1294 runtime::FunctionMapTy functionMap;
1295 functionMap[name] = func;
1296
1297 std::promise<void> addPromise;
1298 auto fut = addPromise.get_future();
1299 Error addErr = Error::empty();
1300 device->addNetwork(mod, std::move(functionMap),
1301 [&addPromise, &addErr](const Module *, Error err) {
1302 addErr = std::move(err);
1303 addPromise.set_value();
1304 });
1305 fut.wait();
1306 EXIT_ON_ERR(std::move(addErr));
1307 }
1308
runOnDevice(ExecutionContext & context,llvm::StringRef name,runtime::DeviceManager * device)1309 void runOnDevice(ExecutionContext &context, llvm::StringRef name,
1310 runtime::DeviceManager *device) {
1311 std::unique_ptr<ExecutionContext> contextPtr(&context);
1312 std::promise<void> runPromise;
1313 auto fut = runPromise.get_future();
1314 Error runErr = Error::empty();
1315 device->runFunction(
1316 name, std::move(contextPtr),
1317 [&runPromise, &runErr](runtime::RunIdentifierTy, Error err,
1318 std::unique_ptr<ExecutionContext> contextPtr) {
1319 // Don't delete context.
1320 contextPtr.release();
1321 runErr = std::move(err);
1322 runPromise.set_value();
1323 });
1324 fut.wait();
1325 EXIT_ON_ERR(std::move(runErr));
1326 }
1327
createRandomizedConstant(Module & mod,TypeRef type,llvm::ArrayRef<dim_t> dims,llvm::StringRef name)1328 Constant *createRandomizedConstant(Module &mod, TypeRef type,
1329 llvm::ArrayRef<dim_t> dims,
1330 llvm::StringRef name) {
1331 auto *c = mod.createConstant(mod.uniqueTypeWithNewShape(type, dims), name);
1332
1333 switch (type->getElementType()) {
1334 case ElemKind::FloatTy: {
1335 c->getHandle<float>().initXavier(c->getType()->size() * 2, mod.getPRNG());
1336 break;
1337 }
1338 case ElemKind::Float16Ty: {
1339 c->getHandle<float16_t>().initXavier(c->getType()->size() * 2,
1340 mod.getPRNG());
1341 break;
1342 }
1343 case ElemKind::BFloat16Ty: {
1344 c->getHandle<bfloat16_t>().initXavier(c->getType()->size() * 2,
1345 mod.getPRNG());
1346 break;
1347 }
1348 case ElemKind::Int32QTy: {
1349 c->getHandle<int32_t>().randomize(INT32_MIN, INT32_MAX, mod.getPRNG());
1350 break;
1351 }
1352 case ElemKind::Int8QTy: {
1353 c->getHandle<int8_t>().randomize(INT8_MIN, INT8_MAX, mod.getPRNG());
1354 break;
1355 }
1356 case ElemKind::UInt8FusedQTy:
1357 case ElemKind::UInt8FusedFP16QTy: {
1358 c->getHandle<uint8_t>().randomize(UINT8_MIN, UINT8_MAX, mod.getPRNG());
1359 break;
1360 }
1361 default:
1362 LOG(FATAL) << "Unsupported type: " << type->getElementName().str();
1363 }
1364
1365 return c;
1366 }
1367
createRandomFusedRowwiseQuantizedConstant(Module & mod,llvm::ArrayRef<dim_t> dims,llvm::StringRef name,bool useFusedFP16)1368 Constant *createRandomFusedRowwiseQuantizedConstant(Module &mod,
1369 llvm::ArrayRef<dim_t> dims,
1370 llvm::StringRef name,
1371 bool useFusedFP16) {
1372 auto T = mod.uniqueType(
1373 (useFusedFP16 ? ElemKind::UInt8FusedFP16QTy : ElemKind::UInt8FusedQTy),
1374 {1}, 1, 0);
1375 const dim_t sizeScaleOffset =
1376 useFusedFP16 ? sizeof(float16_t) : sizeof(float);
1377 Constant *c = createRandomizedConstant(
1378 mod, T, {dims[0], dims[1] + 2 * sizeScaleOffset}, name);
1379
1380 // Range (0, 255) -> (-0.1, 0.1)
1381 constexpr float scale = 1.0f / 1275;
1382 constexpr float offset = -0.1;
1383 auto cH = c->getPayload().getHandle<uint8_t>();
1384 for (unsigned i = 0, e = c->dims()[0]; i < e; i++) {
1385 if (useFusedFP16) {
1386 cH.setFusedScaleOffsetInRow<float16_t>(i, scale, offset);
1387 } else {
1388 cH.setFusedScaleOffsetInRow<float>(i, scale, offset);
1389 }
1390 }
1391
1392 return c;
1393 }
1394
createFusedRowwiseQuantizedPlaceholder(Module & mod,llvm::ArrayRef<dim_t> dims,llvm::StringRef name,bool useFusedFP16)1395 Placeholder *createFusedRowwiseQuantizedPlaceholder(Module &mod,
1396 llvm::ArrayRef<dim_t> dims,
1397 llvm::StringRef name,
1398 bool useFusedFP16) {
1399 auto T = useFusedFP16 ? ElemKind::UInt8FusedFP16QTy : ElemKind::UInt8FusedQTy;
1400 const dim_t sizeScaleOffset =
1401 useFusedFP16 ? sizeof(float16_t) : sizeof(float);
1402 constexpr float scale = 1.0f / 1275;
1403 constexpr float offset = -0.1;
1404 Placeholder *ph = mod.createPlaceholder(
1405 T, {dims[0], dims[1] + 2 * sizeScaleOffset}, scale, offset, name, false);
1406
1407 return ph;
1408 }
1409 } // namespace glow
1410