1 // Copyright (C) 2020 by Yuri Victorovich. All rights reserved.
2 
3 #include "compute.h"
4 #include "plugin-interface.h"
5 #include "nn-types.h"
6 #include "tensor.h"
7 #include "nn-operators.h"
8 #include "image.h"
9 #include "misc.h"
10 #include "util.h"
11 
12 #include <string>
13 #include <vector>
14 #include <array>
15 #include <memory>
16 #include <functional>
17 #include <cmath>
18 #include <cstring>
19 
20 #include <assert.h>
21 
22 #if defined(DEBUG)
23 #define PRINT_OPTS(opts...) PRINT(opts)
24 #else
25 #define PRINT_OPTS(opts...)
26 #endif
27 
28 namespace Compute {
29 
30 typedef PluginInterface PI;
31 
32 //
33 // local helpers
34 //
35 
36 class OperatorOptions {
37 public:
38 	template<PI::OperatorOptionName Option, PI::OperatorOptionType OType, typename CType>
GetOption1(const PI::OperatorOptionsList & opts,CType * val1)39 	static bool GetOption1(const PI::OperatorOptionsList &opts, CType *val1) {
40 		for (auto &o : opts)
41 			if (o.name == Option) {
42 				assert(o.value.type == OType);
43 				*val1 = o.value.as<CType>();
44 				return true; // found
45 			}
46 		return false; // not found
47 	}
48 };
49 
50 // helper for operators Concatenate and Split
51 template<typename OneFloat, typename ManyFloat>
CopyTensorSlices(const PI::Model * model,PI::TensorId one,const std::vector<PI::TensorId> & many,OneFloat * oneTensorData,std::shared_ptr<ManyFloat> * manyTensorData,int axis,std::function<void (OneFloat * & one,ManyFloat * & split,unsigned num)> fnCopy)52 void CopyTensorSlices(
53 	const PI::Model *model
54 	, PI::TensorId one
55 	, const std::vector<PI::TensorId> &many
56 	, OneFloat *oneTensorData
57 	, std::shared_ptr<ManyFloat> *manyTensorData
58 	, int axis
59 	, std::function<void(OneFloat* &one, ManyFloat* &split, unsigned num)> fnCopy)
60 {
61 	// compute inside and outside tensor sizes
62 	TensorShape oneShape = model->getTensorShape(one);
63 	unsigned outsideTensorSize = Tensor::sizeBetweenDims(oneShape, 0, axis-1);
64 	unsigned insideTensorSize  = Tensor::sizeBetweenDims(oneShape, axis+1, oneShape.size()-1);
65 
66 	// create output data
67 	unsigned manySize = many.size();
68 	ManyFloat* manyDataPtr[manySize];
69 	unsigned outputSliceSize[manySize];
70 	for (unsigned o = 0; o < manySize; o++) {
71 		TensorShape manyShape = model->getTensorShape(many[o]);
72 		outputSliceSize[o] = manyShape[axis]*insideTensorSize;
73 		manyDataPtr[o] = manyTensorData[o].get();
74 	}
75 
76 	OneFloat *oneDataPtr0 = oneTensorData, *oneDataPtr = oneDataPtr0;
77 	for (unsigned io = 0; io < outsideTensorSize; io++)
78 		for (unsigned o = 0; o < manySize; o++)
79 			fnCopy(oneDataPtr, manyDataPtr[o], outputSliceSize[o]);
80 	assert(oneDataPtr == oneDataPtr0+Tensor::flatSize(oneShape));
81 }
82 
83 //
84 // exported functions
85 //
86 
buildComputeInputs(const PI::Model * model,std::array<unsigned,4> imageRegion,std::tuple<InputNormalizationRange,InputNormalizationColorOrder> inputNormalization,std::shared_ptr<float> & inputTensor,const TensorShape & inputShape,std::map<PI::TensorId,std::shared_ptr<const float>> & inputs,std::function<void (PI::TensorId)> cbTensorComputed,std::function<void (const std::string &)> cbWarningMessage)87 bool buildComputeInputs(
88 	const PI::Model *model,
89 	std::array<unsigned,4> imageRegion,
90 	std::tuple<InputNormalizationRange,InputNormalizationColorOrder> inputNormalization,
91 	std::shared_ptr<float> &inputTensor, const TensorShape &inputShape,
92 	std::map<PI::TensorId, std::shared_ptr<const float>> &inputs, // output the set of inputs
93 	std::function<void(PI::TensorId)> cbTensorComputed,
94 	std::function<void(const std::string&)> cbWarningMessage)
95 {
96 	assert(inputShape.size()==3);
97 
98 	/// find the model's input
99 
100 	auto modelInputs = model->getInputs();
101 
102 	// input tensor is either reused, or reallocated when alterations are needed
103 	auto convertInputImage = [&](PI::TensorId tensorId, TensorShape requiredShape, std::shared_ptr<const float> &inputImage) {
104 		inputImage = inputTensor; // initially assign with inputShape, but replace later with a newly allocated one if any transformations are performed
105 		float *inputAllocated = nullptr; // keep track of new allocations
106 		TensorShape myInputShape = inputShape;
107 
108 		/// extract the region if required
109 
110 		if (imageRegion[0]!=0 || imageRegion[1]!=0 || imageRegion[2]+1!=myInputShape[1] || imageRegion[3]+1!=myInputShape[0]) {
111 			inputImage.reset((inputAllocated = Image::regionOfImage(inputImage.get(), myInputShape, imageRegion)));
112 			myInputShape = {imageRegion[3]-imageRegion[1]+1, imageRegion[2]-imageRegion[0]+1, myInputShape[2]};
113 		}
114 
115 		/// resize the source image
116 
117 		{
118 			// adjust the required shape to the form [H,W,C]
119 			if (requiredShape.size() == 4) { // assume [B,H,W,C]
120 				if (requiredShape[0] != 1) {
121 					cbWarningMessage(STR("Model's required shape " << requiredShape << " has 4 elements but doesn't begin with B=1,"
122 					                     " don't know how to adjust the image for it"));
123 					return false;
124 				}
125 				requiredShape = Tensor::getLastDims(requiredShape, 3);
126 			} else if (requiredShape.size() == 3) {
127 				if (requiredShape[0] == 1) { // assume [B=1,H,W], remove B and add C=1 for monochrome image
128 					requiredShape = Tensor::getLastDims(requiredShape, 2);
129 					requiredShape.push_back(1);
130 				} else { // see if the shape is image-like
131 					if (requiredShape[2]!=1 && requiredShape[2]!=3) { // expect C=1 or C=3, otherwise we can't handle it
132 						cbWarningMessage(STR("Model's required shape " << requiredShape << " has 3 elements but has C=1 or C=3,"
133 						                     " it doesn't look like it describes an image,"
134 						                     " don't know how to adjust the image for it"));
135 						return false;
136 					}
137 				}
138 			} else {
139 				cbWarningMessage(STR("Model's required shape " << requiredShape << " isn't standard, don't know how to adjust the image for it"));
140 				return false;
141 			}
142 
143 			// now we have requiredShape=[H,W,C], resize the image if needed
144 			if (myInputShape != requiredShape)
145 				inputImage.reset((inputAllocated = Image::resizeImage(inputImage.get(), myInputShape, requiredShape)));
146 		}
147 
148 		/// normalize input
149 
150 		if (inputNormalization != InputNormalization{InputNormalizationRange_0_255,InputNormalizationColorOrder_RGB}) { // 0..255/RGB is how images are imported from files
151 			auto inputTensorSize = Tensor::flatSize(requiredShape);
152 
153 			const float *src = inputImage.get();
154 			if (!inputAllocated) // need to allocate because we change the data, otherwise use the allocated above one
155 				inputImage.reset((inputAllocated = new float[inputTensorSize]));
156 
157 			// helpers
158 			auto normalizeRange = [](const float *src, float *dst, size_t sz, float min, float max) {
159 				float m = (max-min)/256.; // XXX or 255.?
160 				for (auto srce = src+sz; src<srce; )
161 					*dst++ = min + (*src++)*m;
162 			};
163 			auto normalizeSub = [](const float *src, float *dst, size_t sz, const std::vector<float> &sub) {
164 				unsigned i = 0;
165 				for (auto srce = src+sz; src<srce; ) {
166 					*dst++ = *src++ - sub[i];
167 					if (++i == sub.size())
168 						i = 0;
169 				}
170 			};
171 			auto reorderArrays = [](const float *src, float *dst, size_t sz, const std::vector<unsigned> &permutation) {
172 				float tmp[permutation.size()];
173 				for (auto srce = src+sz; src<srce; src+=permutation.size()) {
174 					float *ptmp = tmp;
175 					for (auto idx : permutation)
176 						*ptmp++ = src[idx];
177 					for (auto t : tmp)
178 						*dst++ = t;
179 				}
180 			};
181 
182 			// normalize value range
183 			switch (std::get<0>(inputNormalization)) {
184 			case InputNormalizationRange_0_1:
185 				normalizeRange(src, inputAllocated, inputTensorSize, 0, 1);
186 				src = inputAllocated;
187 				break;
188 			case InputNormalizationRange_0_255:
189 				break; // already at 0..255
190 			case InputNormalizationRange_0_128:
191 				normalizeRange(src, inputAllocated, inputTensorSize, 0, 128);
192 				src = inputAllocated;
193 				break;
194 			case InputNormalizationRange_0_64:
195 				normalizeRange(src, inputAllocated, inputTensorSize, 0, 64);
196 				src = inputAllocated;
197 				break;
198 			case InputNormalizationRange_0_32:
199 				normalizeRange(src, inputAllocated, inputTensorSize, 0, 32);
200 				src = inputAllocated;
201 				break;
202 			case InputNormalizationRange_0_16:
203 				normalizeRange(src, inputAllocated, inputTensorSize, 0, 16);
204 				src = inputAllocated;
205 				break;
206 			case InputNormalizationRange_0_8:
207 				normalizeRange(src, inputAllocated, inputTensorSize, 0, 8);
208 				src = inputAllocated;
209 				break;
210 			case InputNormalizationRange_M1_P1:
211 				normalizeRange(src, inputAllocated, inputTensorSize, -1, 1);
212 				src = inputAllocated;
213 				break;
214 			case InputNormalizationRange_M05_P05:
215 				normalizeRange(src, inputAllocated, inputTensorSize, -0.5, 0.5);
216 				src = inputAllocated;
217 				break;
218 			case InputNormalizationRange_14_34:
219 				normalizeRange(src, inputAllocated, inputTensorSize, 0.25, 0.75);
220 				src = inputAllocated;
221 				break;
222 			case InputNormalizationRange_ImageNet:
223 				assert(*requiredShape.rbegin()==3);
224 				normalizeSub(src, inputAllocated, inputTensorSize, {123.68, 116.78, 103.94});
225 				src = inputAllocated;
226 				break;
227 			}
228 
229 			// normalize color order
230 			switch (std::get<1>(inputNormalization)) {
231 			case InputNormalizationColorOrder_RGB:
232 				break; // already RGB
233 			case InputNormalizationColorOrder_BGR:
234 				reorderArrays(src, inputAllocated, inputTensorSize, {2,1,0});
235 				break;
236 			}
237 		}
238 
239 		return true;
240 	};
241 	auto convertInputFromJsonFile = [](PI::TensorId tensorId, const TensorShape &requiredShape, std::shared_ptr<const float> &inputTensor) {
242 		std::shared_ptr<const float> foundTensor;
243 		if (Tensor::readTensorDataAsJson(CSTR("tensor#" << tensorId << ".json"), requiredShape, foundTensor)) { // match the name with one in main-window.cpp
244 			inputTensor = foundTensor;
245 			return true;
246 		}
247 
248 		return false; // failed to read the tensor with the requested shape
249 	};
250 
251 	/// convert inputs
252 
253 	bool imageImported = false;
254 	for (auto tensorId : modelInputs) {
255 		const auto &shape = model->getTensorShape(tensorId);
256 
257 		// first, try the file
258 		if (convertInputFromJsonFile(tensorId, shape, inputs[tensorId])) {
259 			cbTensorComputed(tensorId); // notify the caller that the input tensor has been computed
260 			continue; // imported
261 		}
262 		// second, try the supplied image
263 		if (!imageImported && convertInputImage(tensorId, shape, inputs[tensorId])) {
264 			cbTensorComputed(tensorId); // notify the caller that the input tensor has been computed
265 			imageImported = true;
266 			continue; // imported
267 		}
268 
269 		// failed to find data for the input tensor
270 		cbWarningMessage(STR("couldn't find input data for the tensor#" << tensorId << " with shape=" << shape));
271 		return false;
272 	}
273 
274 
275 	return true;
276 }
277 
fillInputs(std::map<PI::TensorId,std::shared_ptr<const float>> & inputs,std::unique_ptr<std::vector<std::shared_ptr<const float>>> & tensorData)278 void fillInputs(
279 	std::map<PI::TensorId, std::shared_ptr<const float>> &inputs,
280 	std::unique_ptr<std::vector<std::shared_ptr<const float>>> &tensorData)
281 {
282 	for (auto it : inputs)
283 		(*tensorData)[it.first] = it.second;
284 
285 }
286 
compute(const PI::Model * model,std::unique_ptr<std::vector<std::shared_ptr<const float>>> & tensorData,std::function<void (PI::TensorId)> cbTensorComputed,std::function<void (const std::string &)> cbWarningMessage)287 bool compute(
288 	const PI::Model *model,
289 	std::unique_ptr<std::vector<std::shared_ptr<const float>>> &tensorData,
290 	std::function<void(PI::TensorId)> cbTensorComputed,
291 	std::function<void(const std::string&)> cbWarningMessage)
292 {
293 	/// compute operators
294 
295 	for (PI::OperatorId oid = 0, oide = (PI::OperatorId)model->numOperators(); oid<oide; oid++) {
296 		// get operator's inputs/outputs
297 		std::vector<PI::TensorId> inputs, outputs;
298 		model->getOperatorIo(oid, inputs, outputs);
299 
300 		// get operator options from the model
301 		std::unique_ptr<PI::OperatorOptionsList> opts(model->getOperatorOptions(oid));
302 
303 		// helpers
304 		auto getTensorDataDynamicOrStatic = [model,&tensorData](PI::TensorId tensorId) -> const float* {
305 			auto &dynamic = (*tensorData)[tensorId];
306 			assert(dynamic || model->getTensorHasData(tensorId)); // at least one of dynamic and static should be available
307 			assert(!(dynamic && model->getTensorHasData(tensorId))); // both dynamic and static can't be available
308 			return dynamic ? dynamic.get() : model->getTensorDataF32(tensorId);
309 		};
310 		auto translatePadding = [](unsigned stride, unsigned dilationRate,
311 		                           WidthHeight wh, const TensorShape &inputShape, const TensorShape &filterShape, const TensorShape &outputShape) {
312 			//return filterShape[wh==WIDTH ? 2:1]/2;
313 			unsigned shapeIdx = wh==WIDTH ? 2:1;
314 			return std::get<0>(computePaddingValues(stride, dilationRate, inputShape[shapeIdx], filterShape[shapeIdx], outputShape[shapeIdx]));
315 		};
316 		auto computeSingleOperator = [&](float(*fn)(float f)) {
317 			assert(inputs.size()==1 && outputs.size()==1);
318 			assert(!opts || opts->empty()); // h-swish has no options
319 			assert((*tensorData)[inputs[0]]); // need to have the input data present
320 
321 			// tensors
322 			auto inputShape = model->getTensorShape(inputs[0]);
323 			auto outputShape = model->getTensorShape(outputs[0]);
324 			auto inputShapeSize = Tensor::flatSize(inputShape);
325 			assert(inputShape==outputShape);
326 			UNUSED(outputShape)
327 
328 			// create output data
329 			std::unique_ptr<float> outputData(new float[inputShapeSize]);
330 
331 			// compute
332 			auto input = (*tensorData)[inputs[0]].get();
333 			auto output = outputData.get();
334 			for (auto inpute = input+inputShapeSize; input<inpute; input++, output++)
335 				*output = fn(*input);
336 
337 			// save the data
338 			(*tensorData)[outputs[0]].reset(outputData.release());
339 
340 			// notify the caller
341 			cbTensorComputed(outputs[0]);
342 		};
343 		auto computeDualOperator = [](
344 			const float *input1, const TensorShape &input1Shape,
345 			const float *input2, const TensorShape &input2Shape,
346 			float *output, const TensorShape &outputShape,
347 			float(*fn)(float i1, float i2)
348 		) {
349 			// some values
350 			auto input1ShapeSize = Tensor::flatSize(input1Shape);
351 			const float *input1e = input1+input1ShapeSize;
352 
353 			// by type of inputs
354 			if (input1Shape==input2Shape) { // 2 streams of the same size produce another stream of the same size
355 				// input2 can only be dynamic here
356 				for (; input1<input1e; )
357 					*output++ = fn(*input1++, *input2++);
358 				return true;
359 			} else if (input2Shape.size()==1 && input2Shape[0]==1) { // operation with a constant from the model
360 				auto Const = input2[0]; // input2 can only be static here
361 				for (; input1<input1e; )
362 					*output++ = fn(*input1++, Const);
363 				return true;
364 			} else if (Tensor::isSubset(input1Shape, input2Shape)) { // operation with a smaller vector
365 				// input2 can be dynamic or static here
366 				auto input1e = input1+input1ShapeSize;
367 				auto input2b = input2;
368 				auto input2e = input2+Tensor::flatSize(input2Shape);
369 				for (; input1<input1e; input1++, output++) {
370 					*output = fn(*input1, *input2);
371 					if (++input2 >= input2e)
372 						input2 = input2b;
373 				}
374 				return true;
375 			} else {
376 				return false;
377 			}
378 		};
379 		auto applyActivationFunction = [](size_t size, float *data, PI::ActivationFunction activationFunction) {
380 			auto applyRELU = [](float &val) {
381 				if (val < 0)
382 					val = 0;
383 			};
384 			auto applyRELU_N1_TO_1 = [](float &val) {
385 				if (val < -1)
386 					val = -1;
387 				else if (val > 1)
388 					val = 1;
389 			};
390 			auto applyRELU6 = [](float &val) {
391 				if (val < 0)
392 					val = 0;
393 				else if (val > 6)
394 					val = 6;
395 			};
396 			auto applyTANH = [](float &val) {
397 				val = std::tanh(val);
398 			};
399 			auto applySIGN_BIT = [](float &val) {
400 				val = std::signbit(val) ? 1 : 0;
401 			};
402 			switch (activationFunction) {
403 			case PI::ActivationFunction_RELU:
404 				for (auto e = data+size; data<e; data++)
405 					applyRELU(*data);
406 				return;
407 			case PI::ActivationFunction_RELU_N1_TO_1:
408 				for (auto e = data+size; data<e; data++)
409 					applyRELU_N1_TO_1(*data);
410 				return;
411 			case PI::ActivationFunction_RELU6:
412 				for (auto e = data+size; data<e; data++)
413 					applyRELU6(*data);
414 				return;
415 			case PI::ActivationFunction_TANH:
416 				for (auto e = data+size; data<e; data++)
417 					applyTANH(*data);
418 				return;
419 			case PI::ActivationFunction_SIGN_BIT:
420 				for (auto e = data+size; data<e; data++)
421 					applySIGN_BIT(*data);
422 				return;
423 			case PI::ActivationFunction_NONE:
424 				return;
425 			}
426 		};
427 		auto doArgMxx = [&](float v0, std::function<bool(float,float)> cmp) {
428 			assert(inputs.size()==1);
429 			assert(outputs.size()==1);
430 			assert(opts); // need to have options present // TODO check the output_type operator option
431 
432 			auto inputShape = model->getTensorShape(inputs[0]);
433 			assert(Tensor::flatSize(model->getTensorShape(outputs[0])) == 1);
434 
435 			// create output data
436 			std::unique_ptr<float> outputData(new float[1]); // always return one number
437 
438 			// compute
439 			auto input = (*tensorData)[inputs[0]].get();
440 			int idx = -1;
441 			for (unsigned i = 0, ie = Tensor::flatSize(inputShape); i < ie; i++) {
442 				auto v = *input++;
443 				if (cmp(v, v0)) {
444 					idx = i;
445 					v0 = v;
446 				}
447 			}
448 			outputData.get()[0] = idx;
449 
450 			// save the data
451 			(*tensorData)[outputs[0]].reset(outputData.release());
452 
453 			// notify the caller
454 			cbTensorComputed(outputs[0]);
455 		};
456 
457 		// by operator kind
458 		auto operatorKind = model->getOperatorKind(oid);
459 		switch (operatorKind) {
460 		case PI::KindConv2D: {
461 			assert(inputs.size()==3 && outputs.size()==1);
462 			assert(opts); // need to have options present
463 			assert((*tensorData)[inputs[0]]); // need to have the input data present
464 
465 			// operator options required to run this operator
466 			int strideWidth=0, strideHeight=0;
467 			int dilationWidth=0, dilationHeight=0;
468 			PI::PaddingType paddingType;
469 			PI::ActivationFunction activationFunction = PI::ActivationFunction_NONE;
470 
471 			// parse the operator options supplied by the model into the above variables
472 			unsigned numParsed =
473 				OperatorOptions::GetOption1<PI::OperatorOption_STRIDE_W,            PI::OperatorOption_TypeInt,int>(*opts, &strideWidth)
474 				+ OperatorOptions::GetOption1<PI::OperatorOption_STRIDE_H,          PI::OperatorOption_TypeInt,int>(*opts, &strideHeight)
475 				+ OperatorOptions::GetOption1<PI::OperatorOption_DILATION_W_FACTOR, PI::OperatorOption_TypeInt,int>(*opts, &dilationWidth)
476 				+ OperatorOptions::GetOption1<PI::OperatorOption_DILATION_H_FACTOR, PI::OperatorOption_TypeInt,int>(*opts, &dilationHeight)
477 				+ OperatorOptions::GetOption1<PI::OperatorOption_PADDING, PI::OperatorOption_TypePaddingType,PI::PaddingType>(*opts, &paddingType)
478 				+ OperatorOptions::GetOption1<PI::OperatorOption_FUSED_ACTIVATION_FUNCTION,
479 					PI::OperatorOption_TypeActivationFunction,PI::ActivationFunction>(*opts, &activationFunction);
480 			assert(numParsed==6); // need to have 6 options
481 			assert(numParsed==opts->size()); // all options are parsed
482 			UNUSED(numParsed)
483 
484 			PRINT_OPTS("KindConv2D: have " << opts->size() << " options:"
485 			           " strideWidth=" << strideWidth <<
486 			           " strideHeight=" << strideHeight <<
487 			           " dilationWidth=" << dilationWidth <<
488 			           " strideHeight=" << strideHeight <<
489 			           " paddingType=" << paddingType <<
490 			           " activationFunction=" << activationFunction
491 			)
492 
493 			// tensors
494 			auto inputShape  = model->getTensorShape(inputs[0]);
495 			auto filterShape = model->getTensorShape(inputs[1]);
496 			auto outputShape = model->getTensorShape(outputs[0]);
497 			auto outputShapeSize = Tensor::flatSize(outputShape);
498 
499 			// create output data
500 			std::unique_ptr<float> outputData(new float[outputShapeSize]);
501 
502 			// compute
503 			NnOperators::Conv2D(
504 				inputShape, (*tensorData)[inputs[0]].get(), // input
505 				filterShape, model->getTensorDataF32(inputs[1]), // filter - assume that it is always a static tensor
506 				model->getTensorShape(inputs[2]), model->getTensorDataF32(inputs[2]), // bias - assume that it is always a static tensor
507 				outputShape, outputData.get(), // output
508 				translatePadding(strideWidth,  dilationWidth,  WIDTH,  inputShape, filterShape, outputShape),
509 				translatePadding(strideHeight, dilationHeight, HEIGHT, inputShape, filterShape, outputShape),
510 				strideWidth, strideHeight,
511 				dilationWidth, dilationHeight
512 			);
513 
514 			// activation function
515 			applyActivationFunction(outputShapeSize, outputData.get(), activationFunction);
516 
517 			// save the data
518 			(*tensorData)[outputs[0]].reset(outputData.release());
519 
520 			// notify the caller
521 			cbTensorComputed(outputs[0]);
522 
523 			break;
524 		} case PI::KindDepthwiseConv2D: {
525 			assert(inputs.size()==3 && outputs.size()==1);
526 			assert(opts); // need to have options present
527 			assert((*tensorData)[inputs[0]]); // need to have the input data present
528 
529 			// operator options required to run this operator
530 			int depthMultiplier=0;
531 			int strideWidth=0, strideHeight=0;
532 			int dilationWidth=0, dilationHeight=0;
533 			PI::PaddingType paddingType;
534 			PI::ActivationFunction activationFunction = PI::ActivationFunction_NONE;
535 
536 			// parse the operator options supplied by the model into the above variables
537 			unsigned numParsed =
538 				OperatorOptions::GetOption1<PI::OperatorOption_DEPTH_MULTIPLIER,    PI::OperatorOption_TypeInt,int>(*opts, &depthMultiplier)
539 				+ OperatorOptions::GetOption1<PI::OperatorOption_STRIDE_W,          PI::OperatorOption_TypeInt,int>(*opts, &strideWidth)
540 				+ OperatorOptions::GetOption1<PI::OperatorOption_STRIDE_H,          PI::OperatorOption_TypeInt,int>(*opts, &strideHeight)
541 				+ OperatorOptions::GetOption1<PI::OperatorOption_DILATION_W_FACTOR, PI::OperatorOption_TypeInt,int>(*opts, &dilationWidth)
542 				+ OperatorOptions::GetOption1<PI::OperatorOption_DILATION_H_FACTOR, PI::OperatorOption_TypeInt,int>(*opts, &dilationHeight)
543 				+ OperatorOptions::GetOption1<PI::OperatorOption_PADDING, PI::OperatorOption_TypePaddingType,PI::PaddingType>(*opts, &paddingType)
544 				+ OperatorOptions::GetOption1<PI::OperatorOption_FUSED_ACTIVATION_FUNCTION,
545 					PI::OperatorOption_TypeActivationFunction,PI::ActivationFunction>(*opts, &activationFunction);
546 			assert(numParsed==7); // need to have 7 options
547 			assert(numParsed==opts->size()); // all options are parsed
548 			UNUSED(numParsed)
549 
550 			PRINT_OPTS("KindDepthwiseConv2D: have " << opts->size() << " options:"
551 			           " depthMultiplier=" << depthMultiplier <<
552 			           " strideWidth=" << strideWidth <<
553 			           " strideHeight=" << strideHeight <<
554 			           " dilationWidth=" << dilationWidth <<
555 			           " strideHeight=" << strideHeight <<
556 			           " paddingType=" << paddingType <<
557 			           " activationFunction=" << activationFunction
558 			)
559 
560 			// tensors
561 			auto inputShape  = model->getTensorShape(inputs[0]);
562 			auto filterShape = model->getTensorShape(inputs[1]);
563 			auto outputShape = model->getTensorShape(outputs[0]);
564 			auto outputShapeSize = Tensor::flatSize(outputShape);
565 
566 			// create output data
567 			std::unique_ptr<float> outputData(new float[outputShapeSize]);
568 
569 			// compute
570 			NnOperators::DepthwiseConv2D(
571 				inputShape, (*tensorData)[inputs[0]].get(), // input
572 				filterShape, model->getTensorDataF32(inputs[1]), // filter
573 				model->getTensorShape(inputs[2]), model->getTensorDataF32(inputs[2]), // bias
574 				outputShape, outputData.get(), // output
575 				translatePadding(strideWidth,  dilationWidth,  WIDTH,  inputShape, filterShape, outputShape),
576 				translatePadding(strideHeight, dilationHeight, HEIGHT, inputShape, filterShape, outputShape),
577 				strideWidth, strideHeight,
578 				dilationWidth, dilationHeight,
579 				depthMultiplier
580 			);
581 
582 			// activation function
583 			applyActivationFunction(outputShapeSize, outputData.get(), activationFunction);
584 
585 			// save the data
586 			(*tensorData)[outputs[0]].reset(outputData.release());
587 
588 			// notify the caller
589 			cbTensorComputed(outputs[0]);
590 
591 			break;
592 		} case PI::KindPad: {
593 			// tensors
594 			auto inputDataShape = model->getTensorShape(inputs[0]);
595 			auto inputPaddingsShape = model->getTensorShape(inputs[1]);
596 			auto outputShape = model->getTensorShape(outputs[0]);
597 
598 			// check that shapes are consistent
599 			assert(inputDataShape.size() <= 4); // TfLite has max=4 hardcoded in PadParams
600 			assert(inputPaddingsShape.size()==2 && inputPaddingsShape[0]==inputDataShape.size() && inputPaddingsShape[1]==2);
601 
602 			// inputs
603 			assert(model->getTensorType(inputs[1]) == PI::DataType_Int32);
604 			auto paddings = static_cast<const std::array<int32_t,2>*>(model->getTensorData(inputs[1]));
605 
606 			// create output data
607 			std::unique_ptr<float> outputData(new float[Tensor::flatSize(outputShape)]);
608 
609 			// compute
610 			NnOperators::Pad(
611 				paddings,
612 				inputDataShape, (*tensorData)[inputs[0]].get(), // input
613 				outputShape, outputData.get() // output
614 			);
615 
616 			// save the data
617 			(*tensorData)[outputs[0]].reset(outputData.release());
618 
619 			// notify the caller
620 			cbTensorComputed(outputs[0]);
621 
622 			break;
623 		} case PI::KindFullyConnected: {
624 			assert(inputs.size()==3 && outputs.size()==1);
625 			assert(opts); // need to have options present
626 			assert((*tensorData)[inputs[0]]); // need to have the input data present
627 
628 			// operator options required to run this operator
629 			bool keepNumDims = false;
630 			int  weightsFormat = 0;
631 			PI::ActivationFunction activationFunction = PI::ActivationFunction_NONE;
632 
633 			// parse the operator options supplied by the model into the above variables
634 			unsigned numParsed =
635 				OperatorOptions::GetOption1<PI::OperatorOption_KEEP_NUM_DIMS,    PI::OperatorOption_TypeBool,bool>(*opts, &keepNumDims)
636 				+ OperatorOptions::GetOption1<PI::OperatorOption_WEIGHTS_FORMAT, PI::OperatorOption_TypeInt, int> (*opts, &weightsFormat)
637 				+ OperatorOptions::GetOption1<PI::OperatorOption_FUSED_ACTIVATION_FUNCTION,
638 					PI::OperatorOption_TypeActivationFunction,PI::ActivationFunction>(*opts, &activationFunction);
639 			assert(numParsed==3); // need to have 3 options
640 			assert(numParsed==opts->size()); // all options are parsed
641 			UNUSED(numParsed)
642 
643 			if (weightsFormat != 0) {
644 				cbWarningMessage(STR("Computation didn't succeed: operator #" << (oid+1) << ": " << operatorKind << " option weights_format isn't zero"));
645 				return false; // failed to compute the model to the end
646 			}
647 
648 			PRINT_OPTS("FullyConnected: have " << opts->size() << " options:"
649 			           " keepNumDims=" << keepNumDims <<
650 			           " weightsFormat=" << weightsFormat <<
651 			           " activationFunction=" << activationFunction
652 			)
653 
654 			// tensors
655 			auto inputShape  = model->getTensorShape(inputs[0]);
656 			auto filterShape = model->getTensorShape(inputs[1]);
657 			auto outputShape = model->getTensorShape(outputs[0]);
658 			auto outputShapeSize = Tensor::flatSize(outputShape);
659 
660 			// create output data
661 			std::unique_ptr<float> outputData(new float[outputShapeSize]);
662 
663 			// compute
664 			NnOperators::FullyConnected(
665 				inputShape, (*tensorData)[inputs[0]].get(), // input
666 				filterShape, model->getTensorDataF32(inputs[1]), // filter
667 				model->getTensorShape(inputs[2]), model->getTensorDataF32(inputs[2]), // bias
668 				outputShape, outputData.get() // output
669 			);
670 
671 			// activation function
672 			applyActivationFunction(outputShapeSize, outputData.get(), activationFunction);
673 
674 			// save the data
675 			(*tensorData)[outputs[0]].reset(outputData.release());
676 
677 			// notify the caller
678 			cbTensorComputed(outputs[0]);
679 
680 			break;
681 		} case PI::KindLocalResponseNormalization: {
682 			assert(inputs.size()==1 && outputs.size()==1);
683 			assert(opts); // need to have options present
684 			assert((*tensorData)[inputs[0]]); // need to have the input data present
685 
686 			// operator options required to run this operator
687 			int radius = 0;
688 			float alpha = 0, beta = 0, bias = 0;
689 
690 			// parse the operator options supplied by the model into the above variables
691 			unsigned numParsed =
692 				OperatorOptions::GetOption1<PI::OperatorOption_RADIUS,    PI::OperatorOption_TypeInt,int>(*opts, &radius)
693 				+ OperatorOptions::GetOption1<PI::OperatorOption_ALPHA,   PI::OperatorOption_TypeFloat,float> (*opts, &alpha)
694 				+ OperatorOptions::GetOption1<PI::OperatorOption_BETA,    PI::OperatorOption_TypeFloat,float> (*opts, &beta)
695 				+ OperatorOptions::GetOption1<PI::OperatorOption_BIAS,    PI::OperatorOption_TypeFloat,float> (*opts, &bias);
696 			assert(numParsed==4); // need to have 4 options
697 			assert(numParsed==opts->size()); // all options are parsed
698 			UNUSED(numParsed)
699 
700 			PRINT_OPTS("LocalResponseNormalization: have " << opts->size() << " options:"
701 			           " radius=" << radius <<
702 			           " alpha=" << alpha <<
703 			           " beta=" << beta <<
704 			           " bias=" << bias
705 			)
706 
707 			// create output data
708 			std::unique_ptr<float> outputData(new float[Tensor::flatSize(model->getTensorShape(outputs[0]))]);
709 
710 			// compute
711 			NnOperators::LocalResponseNormalization(
712 				model->getTensorShape(inputs[0]), (*tensorData)[inputs[0]].get(), // input
713 				model->getTensorShape(outputs[0]), outputData.get(), // output
714 				radius, alpha, beta, bias
715 			);
716 
717 			// save the data
718 			(*tensorData)[outputs[0]].reset(outputData.release());
719 
720 			// notify the caller
721 			cbTensorComputed(outputs[0]);
722 
723 			break;
724 		} case PI::KindMaxPool:
725 		  case PI::KindAveragePool: {
726 			assert(inputs.size()==1 && outputs.size()==1);
727 			assert(opts); // need to have options present
728 			assert((*tensorData)[inputs[0]]); // need to have the input data present
729 
730 			// operator options required to run this operator
731 			int strideWidth=0, strideHeight=0;
732 			int filterWidth=0, filterHeight=0;
733 			PI::PaddingType paddingType;
734 			PI::ActivationFunction activationFunction = PI::ActivationFunction_NONE;
735 
736 			// parse the operator options supplied by the model into the above variables
737 			unsigned numParsed =
738 				OperatorOptions::GetOption1<PI::OperatorOption_STRIDE_W,            PI::OperatorOption_TypeInt,int>(*opts, &strideWidth)
739 				+ OperatorOptions::GetOption1<PI::OperatorOption_STRIDE_H,          PI::OperatorOption_TypeInt,int>(*opts, &strideHeight)
740 				+ OperatorOptions::GetOption1<PI::OperatorOption_FILTER_WIDTH,      PI::OperatorOption_TypeInt,int>(*opts, &filterWidth)
741 				+ OperatorOptions::GetOption1<PI::OperatorOption_FILTER_HEIGHT,     PI::OperatorOption_TypeInt,int>(*opts, &filterHeight)
742 				+ OperatorOptions::GetOption1<PI::OperatorOption_PADDING, PI::OperatorOption_TypePaddingType,PI::PaddingType>(*opts, &paddingType)
743 				+ OperatorOptions::GetOption1<PI::OperatorOption_FUSED_ACTIVATION_FUNCTION,
744 					PI::OperatorOption_TypeActivationFunction,PI::ActivationFunction>(*opts, &activationFunction);
745 			assert(numParsed==6); // need to have 6 options
746 			assert(numParsed==opts->size()); // all options are parsed
747 			UNUSED(numParsed)
748 
749 			PRINT_OPTS(operatorKind << ": have " << opts->size() << " options:"
750 			           " strideHeight=" << strideHeight <<
751 			           " strideHeight=" << strideHeight <<
752 			           " filterWidth=" << filterWidth <<
753 			           " filterHeight=" << filterHeight <<
754 			           " paddingType=" << paddingType <<
755 			           " activationFunction=" << activationFunction
756 			)
757 
758 			// tensors
759 			auto inputShape  = model->getTensorShape(inputs[0]);
760 			TensorShape filterShape = {0,(unsigned)filterHeight,(unsigned)filterWidth,0};
761 			auto outputShape = model->getTensorShape(outputs[0]);
762 			auto outputShapeSize = Tensor::flatSize(outputShape);
763 
764 			// create output data
765 			std::unique_ptr<float> outputData(new float[outputShapeSize]);
766 
767 			// compute
768 			(operatorKind==PI::KindMaxPool ? NnOperators::MaxPool : NnOperators::AveragePool)(
769 				inputShape, (*tensorData)[inputs[0]].get(), // input
770 				outputShape, outputData.get(), // output
771 				translatePadding(strideWidth,  1/*dilationWidth*/,  WIDTH,  inputShape, filterShape, outputShape),
772 				translatePadding(strideHeight, 1/*dilationHeight*/, HEIGHT, inputShape, filterShape, outputShape),
773 				strideWidth, strideHeight,
774 				filterWidth, filterHeight
775 			);
776 
777 			// activation function
778 			applyActivationFunction(outputShapeSize, outputData.get(), activationFunction);
779 
780 			// save the data
781 			(*tensorData)[outputs[0]].reset(outputData.release());
782 
783 			// notify the caller
784 			cbTensorComputed(outputs[0]);
785 
786 			break;
787 		} case PI::KindTanh: {
788 			assert(inputs.size()==1 && outputs.size()==1);
789 			assert(!opts || opts->empty()); // tanh has no options
790 			assert((*tensorData)[inputs[0]]); // need to have the input data present
791 
792 			PRINT_OPTS("Tanh: activation function")
793 
794 			// tensors
795 			auto inputShape = model->getTensorShape(inputs[0]);
796 			auto outputShape = model->getTensorShape(outputs[0]);
797 			auto inputShapeSize = Tensor::flatSize(inputShape);
798 			assert(inputShape==outputShape);
799 			UNUSED(outputShape)
800 
801 			// create output data
802 			std::unique_ptr<float> outputData(new float[inputShapeSize]);
803 
804 			// compute
805 			auto input = (*tensorData)[inputs[0]].get();
806 			auto output = outputData.get();
807 			for (auto inpute = input+inputShapeSize; input<inpute; input++, output++)
808 				*output = std::tanh(*input);
809 
810 			// save the data
811 			(*tensorData)[outputs[0]].reset(outputData.release());
812 
813 			// notify the caller
814 			cbTensorComputed(outputs[0]);
815 
816 			break;
817 		} case PI::KindLogistic: {
818 			assert(inputs.size()==1 && outputs.size()==1);
819 			assert(!opts || opts->empty()); // tanh has no options
820 			assert((*tensorData)[inputs[0]]); // need to have the input data present
821 
822 			PRINT_OPTS("Logistic: activation function")
823 
824 			// tensors
825 			auto inputShape = model->getTensorShape(inputs[0]);
826 			auto outputShape = model->getTensorShape(outputs[0]);
827 			auto inputShapeSize = Tensor::flatSize(inputShape);
828 			assert(inputShape==outputShape);
829 			UNUSED(outputShape)
830 
831 			// create output data
832 			std::unique_ptr<float> outputData(new float[inputShapeSize]);
833 
834 			// compute
835 			auto input = (*tensorData)[inputs[0]].get();
836 			auto output = outputData.get();
837 			for (auto inpute = input+inputShapeSize; input<inpute; input++, output++)
838 				*output = 1./(1. + std::exp(*input));
839 
840 			// save the data
841 			(*tensorData)[outputs[0]].reset(outputData.release());
842 
843 			// notify the caller
844 			cbTensorComputed(outputs[0]);
845 
846 			break;
847 		} case PI::KindReshape: {
848 			assert((inputs.size()==1 || inputs.size()==2) && outputs.size()==1); // XXX now sure why the 'new_shape' is in both input[1] and 'new_shape' option
849 			assert(opts); // need to have options present, but we ignore them for now ...
850 			assert((*tensorData)[inputs[0]]); // need to have the input data present
851 			assert(Tensor::flatSize(model->getTensorShape(outputs[0])) == Tensor::flatSize(model->getTensorShape(inputs[0])));
852 
853 			PRINT_OPTS("Reshape: have " << opts->size() << " options, but we ignored them for now")
854 
855 			// just share the data array
856 			(*tensorData)[outputs[0]] = (*tensorData)[inputs[0]];
857 
858 			// notify the caller
859 			cbTensorComputed(outputs[0]);
860 
861 			break;
862 		} case PI::KindHardSwish: {
863 			assert(inputs.size()==1 && outputs.size()==1);
864 			assert(!opts || opts->empty()); // h-swish has no options
865 			assert((*tensorData)[inputs[0]]); // need to have the input data present
866 
867 			PRINT_OPTS("HardSwish: activation function")
868 
869 			// tensors
870 			auto inputShape = model->getTensorShape(inputs[0]);
871 			auto outputShape = model->getTensorShape(outputs[0]);
872 			auto inputShapeSize = Tensor::flatSize(inputShape);
873 			assert(inputShape==outputShape);
874 			UNUSED(outputShape)
875 
876 			// create output data
877 			std::unique_ptr<float> outputData(new float[inputShapeSize]);
878 
879 			// compute
880 			auto input = (*tensorData)[inputs[0]].get();
881 			auto output = outputData.get();
882 			auto hardSwish = [](float x) {
883 				// defined in the "Searching for MobileNet3" paper (https://arxiv.org/pdf/1905.02244.pdf)
884 				// h-swish(x) = x*(ReLU6(x+3)/6)
885 				if (x>=3)
886 					return x;
887 				else if (x<=-3)
888 					return (float)0;
889 				else
890 					return x*(x+3)/6;
891 			};
892 			for (auto inpute = input+inputShapeSize; input<inpute; input++, output++)
893 				*output = hardSwish(*input);
894 
895 			// save the data
896 			(*tensorData)[outputs[0]].reset(outputData.release());
897 
898 			// notify the caller
899 			cbTensorComputed(outputs[0]);
900 
901 			break;
902 		} case PI::KindRSqrt: {
903 			computeSingleOperator([](float f) -> float {return 1./std::sqrt(f);});
904 			break;
905 		} case PI::KindAdd:
906 		  case PI::KindMul: {
907 			assert(inputs.size()==2 && outputs.size()==1);
908 			assert(opts); // need to have options present
909 			assert((*tensorData)[inputs[0]]); // need to have the input data present
910 			assert(model->getTensorShape(inputs[0]) == model->getTensorShape(outputs[0])); // produces the same shape as consumes TODO should be in the model validation stage
911 
912 			// operator options required to run this operator
913 			PI::ActivationFunction activationFunction = PI::ActivationFunction_NONE;
914 
915 			unsigned numParsed =
916 				OperatorOptions::GetOption1<PI::OperatorOption_FUSED_ACTIVATION_FUNCTION,
917 					PI::OperatorOption_TypeActivationFunction,PI::ActivationFunction>(*opts, &activationFunction);
918 			assert(numParsed==1); // need to have 1 options
919 			assert(numParsed==opts->size()); // all options are parsed
920 			UNUSED(numParsed)
921 
922 			PRINT_OPTS(operatorKind << ": have " << opts->size() << " options:"
923 			           " activationFunction=" << activationFunction)
924 
925 			// tensors
926 			auto input1Shape = model->getTensorShape(inputs[0]);
927 			auto input2Shape = model->getTensorShape(inputs[1]);
928 			auto outputShape = model->getTensorShape(outputs[0]);
929 			auto input1ShapeSize = Tensor::flatSize(input1Shape);
930 
931 			// create output data
932 			std::unique_ptr<float> outputData(new float[input1ShapeSize]);
933 
934 			// compute
935 			bool succ = operatorKind==PI::KindAdd ?
936 				computeDualOperator( // KindAdd
937 					(*tensorData)[inputs[0]].get(), model->getTensorShape(inputs[0]),
938 					getTensorDataDynamicOrStatic(inputs[1]), model->getTensorShape(inputs[1]),
939 					outputData.get(), outputShape,
940 					[](float f1, float f2) {return f1+f2;})
941 				:
942 				computeDualOperator( // KindMul
943 					(*tensorData)[inputs[0]].get(), model->getTensorShape(inputs[0]),
944 					getTensorDataDynamicOrStatic(inputs[1]), model->getTensorShape(inputs[1]),
945 					outputData.get(), outputShape,
946 					[](float f1, float f2) {return f1*f2;});
947 			if (!succ) {
948 				cbWarningMessage(STR("Computation didn't succeed: operator #" << (oid+1) <<
949 				                     ": " << operatorKind << " isn't yet implemented for shapes " << input1Shape << " and " << input2Shape));
950 				return false; // failed to compute the model to the end
951 			}
952 
953 			// activation function
954 			applyActivationFunction(input1ShapeSize, outputData.get(), activationFunction);
955 
956 			// save the data
957 			(*tensorData)[outputs[0]].reset(outputData.release());
958 
959 			// notify the caller
960 			cbTensorComputed(outputs[0]);
961 
962 			break;
963 		} case PI::KindSoftmax: {
964 			assert(inputs.size()==1 && outputs.size()==1);
965 			assert(opts); // need to have options present
966 			assert((*tensorData)[inputs[0]]); // need to have the input data present
967 
968 			// operator options required to run this operator
969 			float beta=0;
970 
971 			unsigned numParsed =
972 				OperatorOptions::GetOption1<PI::OperatorOption_BETA,    PI::OperatorOption_TypeFloat,float>(*opts, &beta);
973 			assert(numParsed==1); // need to have 1 options
974 			assert(numParsed==opts->size()); // all options are parsed
975 			UNUSED(numParsed)
976 
977 			PRINT_OPTS("Softmax: have " << opts->size() << " options:"
978 			           " beta=" <<  beta)
979 
980 			// create output data
981 			std::unique_ptr<float> outputData(new float[Tensor::flatSize(model->getTensorShape(outputs[0]))]);
982 
983 			// compute
984 			NnOperators::Softmax(
985 				model->getTensorShape(inputs[0]), (*tensorData)[inputs[0]].get(), // input
986 				model->getTensorShape(outputs[0]), outputData.get(), // output
987 				beta
988 			);
989 
990 			// save the data
991 			(*tensorData)[outputs[0]].reset(outputData.release());
992 
993 			// notify the caller
994 			cbTensorComputed(outputs[0]);
995 
996 			break;
997 		} case PI::KindConcatenation: {
998 			assert(outputs.size()==1);
999 			assert(opts); // need to have options present
1000 
1001 			// operator options required to run this operator
1002 			int axis = 0;
1003 			PI::ActivationFunction activationFunction = PI::ActivationFunction_NONE;
1004 
1005 			// parse the operator options supplied by the model into the above variables
1006 			unsigned numParsed =
1007 				OperatorOptions::GetOption1<PI::OperatorOption_AXIS, PI::OperatorOption_TypeInt,int>(*opts, &axis)
1008 				+ OperatorOptions::GetOption1<PI::OperatorOption_FUSED_ACTIVATION_FUNCTION,
1009 					PI::OperatorOption_TypeActivationFunction,PI::ActivationFunction>(*opts, &activationFunction);
1010 			assert(numParsed==2); // need to have 2 options
1011 			assert(numParsed==opts->size()); // all options are parsed
1012 			UNUSED(numParsed)
1013 
1014 			// input tensors
1015 			std::shared_ptr<const float> inputTensorData[inputs.size()];
1016 			for (unsigned o = 0, oe = sizeof(inputTensorData)/sizeof(inputTensorData[0]); o < oe; o++)
1017 				inputTensorData[o] = (*tensorData)[inputs[o]];
1018 
1019 			// input buffers and sizes array
1020 			std::tuple<const float*,unsigned> ins[inputs.size()];
1021 			for (unsigned i = 0, ie = inputs.size(); i<ie; i++) {
1022 				auto inputTensorId = inputs[i];
1023 				auto inputShape = model->getTensorShape(inputTensorId);
1024 				ins[i] = {(*tensorData)[inputTensorId].get(), Tensor::flatSize(Tensor::getLastDims(inputShape, inputShape.size()-axis))};
1025 			}
1026 
1027 			// create output data
1028 			auto outputShapeSize = Tensor::flatSize(model->getTensorShape(outputs[0]));
1029 			std::unique_ptr<float> outputData(new float[outputShapeSize]);
1030 
1031 			// compute
1032 			CopyTensorSlices<float,const float>(model, outputs[0], inputs, outputData.get(), inputTensorData, axis,
1033 				[](float* &one, const float* &split, unsigned num) {
1034 					std::memcpy(one, split, num*sizeof(float));
1035 					one += num;
1036 					split += num;
1037 				}
1038 			);
1039 
1040 			// activation function
1041 			applyActivationFunction(outputShapeSize, outputData.get(), activationFunction);
1042 
1043 			// save the data
1044 			(*tensorData)[outputs[0]].reset(outputData.release());
1045 
1046 			// notify the caller
1047 			cbTensorComputed(outputs[0]);
1048 
1049 			break;
1050 		} case PI::KindSplit: {
1051 			assert(inputs.size()==2);
1052 			assert(opts); // need to have options present
1053 
1054 			// operator options required to run this operator
1055 			int num_splits = 0;
1056 
1057 			// parse the operator options supplied by the model into the above variables
1058 			unsigned numParsed = OperatorOptions::GetOption1<PI::OperatorOption_NUM_SPLITS, PI::OperatorOption_TypeInt,int>(*opts, &num_splits);
1059 			PRINT("numParsed=" << numParsed)
1060 			assert(numParsed==1); // need to have 1 option
1061 			assert(numParsed==opts->size()); // all options are parsed
1062 			UNUSED(numParsed)
1063 
1064 			// checks
1065 			assert(num_splits == outputs.size()); // runtime check should be in the model verifier
1066 
1067 			// argument1 has the axis index
1068 			assert(model->getTensorShape(inputs[0]) == TensorShape({1}));
1069 			const int axis = model->getTensorDataF32(inputs[0])[0];
1070 
1071 			// create output data
1072 			std::shared_ptr<float> outputTensorData[outputs.size()];
1073 			for (unsigned o = 0, oe = sizeof(outputTensorData)/sizeof(outputTensorData[0]); o < oe; o++)
1074 				outputTensorData[o].reset(new float[Tensor::flatSize(model->getTensorShape(outputs[o]))]);
1075 
1076 			// compute
1077 			CopyTensorSlices<const float,float>(model, inputs[1], outputs, (*tensorData)[inputs[1]].get(), outputTensorData, axis,
1078 				[](const float* &one, float* &split, unsigned num) {
1079 					std::memcpy(split, one, num*sizeof(float));
1080 					one += num;
1081 					split += num;
1082 				}
1083 			);
1084 
1085 			// save the data and notify the caller
1086 			for (unsigned o = 0, oe = outputs.size(); o < oe; o++) {
1087 				(*tensorData)[outputs[o]] = outputTensorData[o];
1088 				cbTensorComputed(outputs[o]);
1089 			}
1090 
1091 			break;
1092 		} case PI::KindMean: {
1093 			assert(inputs.size()==2);
1094 			assert(outputs.size()==1);
1095 			assert(model->getTensorType(inputs[1]) == PI::DataType_Int32);
1096 			assert(opts); // need to have options present
1097 
1098 			// tensors
1099 			auto outputShape = model->getTensorShape(outputs[0]);
1100 			auto outputShapeSize = Tensor::flatSize(outputShape);
1101 
1102 			// create output data
1103 			std::unique_ptr<float> outputData(new float[outputShapeSize]);
1104 
1105 			// compute
1106 			NnOperators::Mean(
1107 				model->getTensorShape(inputs[0]), (*tensorData)[inputs[0]].get(), // input
1108 				outputShape, outputData.get(), // output
1109 				static_cast<const int32_t*>(model->getTensorData(inputs[1])), Tensor::flatSize(model->getTensorShape(inputs[1]))
1110 			);
1111 
1112 			// save the data
1113 			(*tensorData)[outputs[0]].reset(outputData.release());
1114 
1115 			// notify the caller
1116 			cbTensorComputed(outputs[0]);
1117 
1118 			break;
1119 		} case PI::KindArgMax: {
1120 			doArgMxx(std::numeric_limits<float>::lowest(), [](float f1,float f2) {return f1>f2;});
1121 			break;
1122 		} case PI::KindArgMin: {
1123 			doArgMxx(std::numeric_limits<float>::max(), [](float f1,float f2) {return f1<f2;});
1124 			break;
1125 		} case PI::KindSquaredDifference: {
1126 			assert(inputs.size()==2 && outputs.size()==1);
1127 			assert(opts); // need to have options present
1128 			assert((*tensorData)[inputs[0]]); // need to have the input data present
1129 			assert(model->getTensorShape(inputs[0]) == model->getTensorShape(outputs[0])); // produces the same shape as consumes TODO should be in the model validation stage
1130 
1131 			assert(opts->size() == 0); // all options are parsed
1132 
1133 			PRINT_OPTS(operatorKind << ": have " << opts->size() << " options")
1134 
1135 			// tensors
1136 			auto input1Shape = model->getTensorShape(inputs[0]);
1137 			auto input2Shape = model->getTensorShape(inputs[1]);
1138 			auto outputShape = model->getTensorShape(outputs[0]);
1139 			auto input1ShapeSize = Tensor::flatSize(input1Shape);
1140 
1141 			// create output data
1142 			std::unique_ptr<float> outputData(new float[input1ShapeSize]);
1143 
1144 			// compute
1145 			if (!computeDualOperator(
1146 					(*tensorData)[inputs[0]].get(), model->getTensorShape(inputs[0]),
1147 					getTensorDataDynamicOrStatic(inputs[1]), model->getTensorShape(inputs[1]),
1148 					outputData.get(), outputShape,
1149 					[](float f1, float f2) {return (f1-f2)*(f1-f2);}))
1150 			{
1151 				cbWarningMessage(STR("Computation didn't succeed: operator #" << (oid+1) <<
1152 				                     ": " << operatorKind << " isn't yet implemented for shapes " << input1Shape << " and " << input2Shape));
1153 				return false; // failed to compute the model to the end
1154 			}
1155 
1156 			// save the data
1157 			(*tensorData)[outputs[0]].reset(outputData.release());
1158 
1159 			// notify the caller
1160 			cbTensorComputed(outputs[0]);
1161 
1162 			break;
1163 		} case PI::KindResizeBilinear: {
1164 			assert(inputs.size()==1 && outputs.size()==1);
1165 			assert(opts); // need to have options present
1166 			assert((*tensorData)[inputs[0]]); // need to have the input data present
1167 
1168 			// operator options required to run this operator
1169 			bool alignCorners = false;
1170 
1171 			unsigned numParsed =
1172 				OperatorOptions::GetOption1<PI::OperatorOption_ALIGN_CORNERS, PI::OperatorOption_TypeFloat,bool>(*opts, &alignCorners);
1173 			assert(numParsed==1); // need to have 1 options
1174 			assert(numParsed==opts->size()); // all options are parsed
1175 			UNUSED(numParsed)
1176 
1177 			PRINT_OPTS("ResizeBilinear: have " << opts->size() << " options:"
1178 			           " alignCorners=" << alignCorners)
1179 
1180 			// create output data
1181 			std::unique_ptr<float> outputData(new float[Tensor::flatSize(model->getTensorShape(outputs[0]))]);
1182 
1183 			// compute
1184 			NnOperators::ResizeBilinear(
1185 				model->getTensorShape(inputs[0]), (*tensorData)[inputs[0]].get(), // input
1186 				model->getTensorShape(outputs[0]), outputData.get(), // output
1187 				alignCorners
1188 			);
1189 
1190 			// save the data
1191 			(*tensorData)[outputs[0]].reset(outputData.release());
1192 
1193 			// notify the caller
1194 			cbTensorComputed(outputs[0]);
1195 
1196 			break;
1197 		} case PI::KindResizeNearestNeighbor: {
1198 			assert(inputs.size()==1 && outputs.size()==1);
1199 			assert(opts); // need to have options present
1200 			assert((*tensorData)[inputs[0]]); // need to have the input data present
1201 
1202 			// operator options required to run this operator
1203 			bool alignCorners = false;
1204 
1205 			unsigned numParsed =
1206 				OperatorOptions::GetOption1<PI::OperatorOption_ALIGN_CORNERS, PI::OperatorOption_TypeFloat,bool>(*opts, &alignCorners);
1207 			assert(numParsed==1); // need to have 1 options
1208 			assert(numParsed==opts->size()); // all options are parsed
1209 			UNUSED(numParsed)
1210 
1211 			PRINT_OPTS("ResizeBilinear: have " << opts->size() << " options:"
1212 			           " alignCorners=" << alignCorners)
1213 
1214 			// create output data
1215 			std::unique_ptr<float> outputData(new float[Tensor::flatSize(model->getTensorShape(outputs[0]))]);
1216 
1217 			// compute
1218 			NnOperators::ResizeNearestNeighbor(
1219 				model->getTensorShape(inputs[0]), (*tensorData)[inputs[0]].get(), // input
1220 				model->getTensorShape(outputs[0]), outputData.get(), // output
1221 				alignCorners
1222 			);
1223 
1224 			// save the data
1225 			(*tensorData)[outputs[0]].reset(outputData.release());
1226 
1227 			// notify the caller
1228 			cbTensorComputed(outputs[0]);
1229 
1230 			break;
1231 		} default: {
1232 			cbWarningMessage(STR("Computation didn't succeed: operator #" << (oid+1) << ": " << operatorKind << " isn't yet implemented"));
1233 			return false; // failed to compute the model to the end
1234 		}}
1235 	}
1236 
1237 	return true; // successfully computed the model to the end
1238 }
1239 
1240 }
1241