1 // Copyright (C) 2020 by Yuri Victorovich. All rights reserved.
2 
3 //
4 // tflite-reference-implementation.cpp contains portions of the Apache 2.0 licensed code from the TensorFlow source tree
5 //
6 
7 #include <array>
8 #include <algorithm>
9 #include <cstdint>
10 #include <cstring>
11 #include <vector>
12 #include <cmath>
13 
14 #include <assert.h>
15 
16 namespace tflite {
17 
18 /// some common macros used in the code
19 
20 #define TFLITE_DCHECK(condition) assert(condition); // (condition) ? (void)0 : TFLITE_ASSERT_FALSE
21 #define TFLITE_DCHECK_EQ(a,b) assert(a == b);
22 #define TFLITE_DCHECK_LE(a,b) assert(a <= b);
23 #define TFLITE_DCHECK_GE(a,b) assert(a >= b);
24 #define TFLITE_CHECK_EQ(x, y) ((x) == (y)) ? (void)0 : TFLITE_ABORT
25 #define TFLITE_CHECK_LE(x, y) ((x) <= (y)) ? (void)0 : TFLITE_ABORT
26 #define TFLITE_ABORT abort()
27 
28 /// common types used in this code
29 
30 typedef int8_t   int8;
31 typedef uint8_t  uint8;
32 typedef int16_t  int16;
33 typedef uint16_t uint16;
34 typedef int32_t  int32;
35 typedef uint32_t uint32;
36 
37 enum class FusedActivationFunctionType : uint8 { kNone, kRelu6, kRelu1, kRelu };
38 enum class PaddingType : uint8 { kNone, kSame, kValid };
39 
40 struct PaddingValues {
41   int16 width;
42   int16 height;
43   // offset is used for calculating "remaining" padding, for example, `width`
44   // is 1 and `width_offset` is 1, so padding_left is 1 while padding_right is
45   // 1 + 1 = 2.
46   int16 width_offset; // unused
47   // Same as width_offset except it's over the height dimension.
48   int16 height_offset; // unused
49 
PaddingValuestflite::PaddingValues50 	PaddingValues()
51 	: width(0)
52 	, height(0)
53 	, width_offset(0)
54 	, height_offset(0)
55 	{ }
56 };
57 
58 // from: tensorflow/lite/kernels/internal/types.h
59 class RuntimeShape : public std::vector<unsigned> {
60 public:
RuntimeShape()61 	RuntimeShape() { }
RuntimeShape(const std::vector<unsigned> & v)62 	RuntimeShape(const std::vector<unsigned> &v) : std::vector<unsigned>(v) { }
RuntimeShape(unsigned d1)63 	RuntimeShape(unsigned d1) {push_back(d1);}
RuntimeShape(unsigned d1,unsigned d2)64 	RuntimeShape(unsigned d1, unsigned d2) {push_back(d1); push_back(d2);}
RuntimeShape(unsigned d1,unsigned d2,unsigned d3)65 	RuntimeShape(unsigned d1, unsigned d2, unsigned d3) {push_back(d1); push_back(d2); push_back(d3);}
RuntimeShape(unsigned d1,unsigned d2,unsigned d3,unsigned d4)66 	RuntimeShape(unsigned d1, unsigned d2, unsigned d3, unsigned d4) {push_back(d1); push_back(d2); push_back(d3); push_back(d4);}
Resize(unsigned sz_)67 	void Resize(unsigned sz_) {resize(sz_);}
SetDim(unsigned dim,unsigned val)68 	void SetDim(unsigned dim, unsigned val) {(*this)[dim] = val;}
Dims(unsigned n) const69 	unsigned Dims(unsigned n) const {return (*this)[n];}
DimensionsCount() const70 	int DimensionsCount() const {return size();}
FlatSize() const71 	int FlatSize() const {
72 		int sz = 1;
73 		for (auto i = begin(); i != end(); i++)
74 			sz *= *i;
75 		return sz;
76 	}
DimsData() const77 	inline const unsigned* DimsData() const { return &(*this)[0]; }
ExtendedShape(int new_shape_size,const RuntimeShape & shape)78 	inline static RuntimeShape ExtendedShape(int new_shape_size, const RuntimeShape& shape) {
79 		return RuntimeShape(new_shape_size, shape, 1);
80 	}
RuntimeShape(int new_shape_size,const RuntimeShape & shape,int pad_value)81 	RuntimeShape(int new_shape_size, const RuntimeShape& shape, int pad_value) {
82 		assert(new_shape_size >= shape.DimensionsCount());
83 		resize(new_shape_size);
84 		const int size_increase = new_shape_size - shape.DimensionsCount();
85 		int i = 0;
86 		for (; i < size_increase; ++i)
87 			*(begin()+i) = pad_value;
88 		for (auto n : shape)
89 			*(begin()+(i++)) = n;
90 	}
91 };
92 
MatchingDim(const RuntimeShape & shape1,unsigned dim1,const RuntimeShape & shape2,unsigned dim2)93 static unsigned MatchingDim(const RuntimeShape &shape1, unsigned dim1, const RuntimeShape &shape2, unsigned dim2) {
94 	assert(shape1[dim1] == shape2[dim2]);
95 	return shape1[dim1];
96 }
97 
MatchingFlatSize(const RuntimeShape & shape,const RuntimeShape & check_shape_0)98 inline int MatchingFlatSize(const RuntimeShape& shape,
99                             const RuntimeShape& check_shape_0) {
100   TFLITE_DCHECK_EQ(shape.DimensionsCount(), check_shape_0.DimensionsCount());
101   const int dims_count = shape.DimensionsCount();
102   for (int i = 0; i < dims_count; ++i) {
103     TFLITE_DCHECK_EQ(shape.Dims(i), check_shape_0.Dims(i));
104   }
105   return shape.FlatSize();
106 }
107 
MatchingFlatSize(const RuntimeShape & shape,const RuntimeShape & check_shape_0,const RuntimeShape & check_shape_1)108 inline int MatchingFlatSize(const RuntimeShape& shape,
109                             const RuntimeShape& check_shape_0,
110                             const RuntimeShape& check_shape_1) {
111   TFLITE_DCHECK_EQ(shape.DimensionsCount(), check_shape_0.DimensionsCount());
112   const int dims_count = shape.DimensionsCount();
113   for (int i = 0; i < dims_count; ++i) {
114     TFLITE_DCHECK_EQ(shape.Dims(i), check_shape_0.Dims(i));
115   }
116   return MatchingFlatSize(shape, check_shape_1);
117 }
118 
FlatSizeSkipDim(const RuntimeShape & shape,int skip_dim)119 inline int FlatSizeSkipDim(const RuntimeShape& shape, int skip_dim) {
120   const int dims_count = shape.DimensionsCount();
121   TFLITE_DCHECK(skip_dim >= 0 && skip_dim < dims_count);
122   const auto* dims_data = shape.DimsData();
123   int flat_size = 1;
124   for (int i = 0; i < dims_count; ++i) {
125     flat_size *= (i == skip_dim) ? 1 : dims_data[i];
126   }
127   return flat_size;
128 }
129 
MatchingFlatSizeSkipDim(const RuntimeShape & shape,int skip_dim,const RuntimeShape & check_shape_0)130 inline int MatchingFlatSizeSkipDim(const RuntimeShape& shape, int skip_dim,
131                                    const RuntimeShape& check_shape_0) {
132   const int dims_count = shape.DimensionsCount();
133   for (int i = 0; i < dims_count; ++i) {
134     if (i != skip_dim) {
135       TFLITE_DCHECK_EQ(shape.Dims(i), check_shape_0.Dims(i));
136     }
137   }
138   return FlatSizeSkipDim(shape, skip_dim);
139 }
140 
141 /// supporting functions
142 
Offset(const RuntimeShape & shape,unsigned batch,unsigned y,unsigned x,unsigned channel)143 static unsigned Offset(const RuntimeShape &shape, unsigned batch, unsigned y, unsigned x, unsigned channel) {
144 	assert(shape.size()==4);
145 	return	batch*shape.Dims(1)*shape.Dims(2)*shape.Dims(3) +
146 		y*shape.Dims(2)*shape.Dims(3) +
147 		x*shape.Dims(3) +
148 		channel;
149 	//return (((batch*Dim(1))+y)*Dim(2)+x)*shape.Dim(3)+channel;
150 }
151 
ActivationFunctionWithMinMax(float x,float output_activation_min,float output_activation_max)152 inline float ActivationFunctionWithMinMax(float x, float output_activation_min,
153                                           float output_activation_max) {
154   return std::min(std::max(x, output_activation_min), output_activation_max);
155 }
156 
157 /// parameter structures that operators accept
158 
159 // from: tensorflow/lite/kernels/internal/types.h
160 struct ConvParams { // <= BuiltinOptions_Conv2DOptions = 1
161   PaddingType padding_type; // unused
162   PaddingValues padding_values;
163   // TODO(starka): This was just "stride", so check that width+height is OK.
164   int16 stride_width;
165   int16 stride_height;
166   int16 dilation_width_factor;
167   int16 dilation_height_factor;
168   // uint8 inference params.
169   // TODO(b/65838351): Use smaller types if appropriate.
170   int32 input_offset; // unused
171   int32 weights_offset; // unused
172   int32 output_offset; // unused
173   int32 output_multiplier; // unused
174   int output_shift; // unused
175   // uint8, etc, activation params.
176   int32 quantized_activation_min; // unused
177   int32 quantized_activation_max; // unused
178   // float activation params.
179   float float_activation_min;
180   float float_activation_max;
181 };
182 
183 // from: tensorflow/lite/kernels/internal/types.h
184 struct DepthwiseParams { // <= BuiltinOptions_DepthwiseConv2DOptions = 2
185   PaddingType padding_type; // unused
186   PaddingValues padding_values;
187   int16 stride_width;
188   int16 stride_height;
189   int16 dilation_width_factor;
190   int16 dilation_height_factor;
191   int16 depth_multiplier;
192   // uint8 inference params.
193   // TODO(b/65838351): Use smaller types if appropriate.
194   int32 input_offset; // unused
195   int32 weights_offset; // unused
196   int32 output_offset; // unused
197   int32 output_multiplier; // unused
198   int output_shift; // unused
199   // uint8, etc, activation params.
200   int32 quantized_activation_min; // unused
201   int32 quantized_activation_max; // unused
202   float activation_params; // unused
203   float float_activation_min;
204   float float_activation_max;
205 };
206 
207 struct FullyConnectedParams { // <= BuiltinOptions_FullyConnectedOptions = 8
208   // uint8 inference params.
209   // TODO(b/65838351): Use smaller types if appropriate.
210   int32 input_offset; // unused
211   int32 weights_offset; // unused
212   int32 output_offset; // unused
213   int32 output_multiplier; // unused
214   int output_shift; // unused
215   // uint8, etc, activation params.
216   int32 quantized_activation_min; // unused
217   int32 quantized_activation_max; // unused
218   // float activation params.
219   float float_activation_min;
220   float float_activation_max;
221   //FullyConnectedWeightsFormat weights_format; // // DEFAULT,SHUFFLED4x16INT8
222 };
223 
224 // from: tensorflow/lite/kernels/internal/types.h
225 struct PoolParams { // <= BuiltinOptions_Pool2DOptions = 5
226   FusedActivationFunctionType activation; // unused
227   PaddingType padding_type; // unused
228   PaddingValues padding_values;
229   int stride_height;
230   int stride_width;
231   int filter_height;
232   int filter_width;
233   // uint8, etc, activation params.
234   int32 quantized_activation_min; // unused
235   int32 quantized_activation_max; // unused
236   // float activation params.
237   float float_activation_min;
238   float float_activation_max;
239 };
240 
241 // from: tensorflow/lite/kernels/internal/types.htensorflow/lite/kernels/internal/types.h
242 // For Add, Sub, Mul ops.
243 struct ArithmeticParams { // <= BuiltinOptions_AddOptions = 11 / <= BuiltinOptions_MulOptions = 21 / <= BuiltinOptions_SubOptions = 28 / <= BuiltinOptions_DivOptions = 29
244   // Shape dependent / common to data / op types.
245   //BroadcastableOpCategory broadcast_category; // unused
246   // uint8 inference params.
247   int32 input1_offset; // unused
248   int32 input2_offset; // unused
249   int32 output_offset; // unused
250   int32 output_multiplier; // unused
251   int output_shift; // unused
252   // Add / Sub, not Mul, uint8 inference params.
253   int left_shift; // unused
254   int32 input1_multiplier; // unused
255   int input1_shift; // unused
256   int32 input2_multiplier; // unused
257   int input2_shift; // unused
258   // uint8, etc, activation params.
259   int32 quantized_activation_min; // unused (ony used used in int-versions)
260   int32 quantized_activation_max; // unused
261   // float activation params.
262   float float_activation_min;
263   float float_activation_max;
264 
265   // Processed output dimensions.
266   // Let input "a" be the one that broadcasts in the faster-changing dimension.
267   // Then, after coalescing, for shapes {a0, a1, a2, a3, a4} and
268   // {b0, b1, b2, b3, b4},
269   // broadcast_shape[4] = b0 = a0.
270   // broadcast_shape[3] = b1; a1 = 1.
271   // broadcast_shape[2] = b2 = a2.
272   // broadcast_shape[1] = a3; b3 = 1.
273   // broadcast_shape[0] = b4 = a4.
274   int broadcast_shape[5];
275 };
276 
277 // from: tensorflow/lite/kernels/internal/types.h
278 struct SoftmaxParams { // <= BuiltinOptions_SoftmaxOptions = 9
279   // beta is not really used (not a Tensorflow parameter) and not implemented
280   // for LogSoftmax.
281   double beta;
282   // uint8 inference params.  Used even when beta defaults to 1.0.
283   int32 input_multiplier; // unused
284   int32 input_left_shift; // unused
285   // Reverse scaling is only used by LogSoftmax.
286   int32 reverse_scaling_divisor; // unused
287   int32 reverse_scaling_right_shift; // unused
288   int diff_min; // unused
289 };
290 
291 // from kernels/internal/types.h
292 struct ResizeBilinearParams {
293   bool align_corners;
294 };
295 
296 struct ResizeNearestNeighborParams {
297   bool align_corners;
298 };
299 
300 struct LocalResponseNormalizationParams {
301   int32 range;
302   double bias;
303   double alpha;
304   double beta;
305 };
306 
307 struct MeanParams {
308   int8 axis_count;
309   int16 axis[4];
310 };
311 
312 // from tensorflow/lite/kernels/internal/types.h
313 
314 enum class ResizingCategory : uint8 {
315   kNone,
316   kImageStyle,  // 4D, operating on inner dimensions, say {0, a, b, 0}.
317   kGenericResize,
318 };
319 
320 struct PadParams {
321   int8 left_padding_count;
322   int32 left_padding[4];
323   int8 right_padding_count;
324   int32 right_padding[4];
325   ResizingCategory resizing_category; // unused
326 };
327 
328 
329 /// operator code
330 
331 // from: tensorflow/lite/kernels/internal/reference/conv.h
Conv(const ConvParams & params,const RuntimeShape & input_shape,const float * input_data,const RuntimeShape & filter_shape,const float * filter_data,const RuntimeShape & bias_shape,const float * bias_data,const RuntimeShape & output_shape,float * output_data,const RuntimeShape & im2col_shape,float * im2col_data)332 inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
333                  const float* input_data, const RuntimeShape& filter_shape,
334                  const float* filter_data, const RuntimeShape& bias_shape,
335                  const float* bias_data, const RuntimeShape& output_shape,
336                  float* output_data, const RuntimeShape& im2col_shape,
337                  float* im2col_data) {
338   const int stride_width = params.stride_width;
339   const int stride_height = params.stride_height;
340   const int dilation_width_factor = params.dilation_width_factor;
341   const int dilation_height_factor = params.dilation_height_factor;
342   const int pad_width = params.padding_values.width;
343   const int pad_height = params.padding_values.height;
344   //const float output_activation_min = params.float_activation_min;
345   //const float output_activation_max = params.float_activation_max;
346   TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
347   TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
348   TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
349 
350   (void)im2col_data;   // only used in optimized code.
351   (void)im2col_shape;  // only used in optimized code.
352   const int batches = MatchingDim(input_shape, 0, output_shape, 0);
353   const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
354   const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
355   if (bias_data) {
356     TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
357   }
358   const int input_height = input_shape.Dims(1);
359   const int input_width = input_shape.Dims(2);
360   const int filter_height = filter_shape.Dims(1);
361   const int filter_width = filter_shape.Dims(2);
362   const int output_height = output_shape.Dims(1);
363   const int output_width = output_shape.Dims(2);
364 
365   for (int batch = 0; batch < batches; ++batch) {
366     for (int out_y = 0; out_y < output_height; ++out_y) {
367       for (int out_x = 0; out_x < output_width; ++out_x) {
368         for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
369           const int in_x_origin = (out_x * stride_width) - pad_width;
370           const int in_y_origin = (out_y * stride_height) - pad_height;
371           float total = 0.f;
372           for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
373             for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
374               for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
375                 const int in_x = in_x_origin + dilation_width_factor * filter_x;
376                 const int in_y =
377                     in_y_origin + dilation_height_factor * filter_y;
378                 // If the location is outside the bounds of the input image,
379                 // use zero as a default value.
380                 if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
381                     (in_y < input_height)) {
382                   float input_value = input_data[Offset(
383                       input_shape, batch, in_y, in_x, in_channel)];
384                   float filter_value =
385                       filter_data[Offset(filter_shape, out_channel, filter_y,
386                                          filter_x, in_channel)];
387                   total += (input_value * filter_value);
388                 }
389               }
390             }
391           }
392           float bias_value = 0.0f;
393           if (bias_data) {
394             bias_value = bias_data[out_channel];
395           }
396           output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
397               // DISABLE ActivationFunctionWithMinMax
398               //ActivationFunctionWithMinMax(total + bias_value,
399               //                             output_activation_min,
400               //                             output_activation_max);
401 
402 	      // INSTEAD
403 	      total + bias_value;
404         }
405       }
406     }
407   }
408 }
409 
410 // from: tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h
DepthwiseConv(const DepthwiseParams & params,const RuntimeShape & input_shape,const float * input_data,const RuntimeShape & filter_shape,const float * filter_data,const RuntimeShape & bias_shape,const float * bias_data,const RuntimeShape & output_shape,float * output_data)411 inline void DepthwiseConv(
412     const DepthwiseParams& params, const RuntimeShape& input_shape,
413     const float* input_data, const RuntimeShape& filter_shape,
414     const float* filter_data, const RuntimeShape& bias_shape,
415     const float* bias_data, const RuntimeShape& output_shape,
416     float* output_data) {
417   const int stride_width = params.stride_width;
418   const int stride_height = params.stride_height;
419   const int dilation_width_factor = params.dilation_width_factor;
420   const int dilation_height_factor = params.dilation_height_factor;
421   const int pad_width = params.padding_values.width;
422   const int pad_height = params.padding_values.height;
423   const int depth_multiplier = params.depth_multiplier;
424   //const float output_activation_min = params.float_activation_min;
425   //const float output_activation_max = params.float_activation_max;
426   TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
427   TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
428   TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
429 
430   const int batches = MatchingDim(input_shape, 0, output_shape, 0);
431   const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
432   const int input_height = input_shape.Dims(1);
433   const int input_width = input_shape.Dims(2);
434   const int input_depth = input_shape.Dims(3);
435   const int filter_height = filter_shape.Dims(1);
436   const int filter_width = filter_shape.Dims(2);
437   const int output_height = output_shape.Dims(1);
438   const int output_width = output_shape.Dims(2);
439   TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
440   TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
441 
442   for (int b = 0; b < batches; ++b) {
443     for (int out_y = 0; out_y < output_height; ++out_y) {
444       for (int out_x = 0; out_x < output_width; ++out_x) {
445         for (int ic = 0; ic < input_depth; ++ic) {
446           for (int m = 0; m < depth_multiplier; m++) {
447             const int oc = m + ic * depth_multiplier;
448             const int in_x_origin = (out_x * stride_width) - pad_width;
449             const int in_y_origin = (out_y * stride_height) - pad_height;
450             float total = 0.f;
451             for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
452               for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
453                 const int in_x = in_x_origin + dilation_width_factor * filter_x;
454                 const int in_y =
455                     in_y_origin + dilation_height_factor * filter_y;
456                 // If the location is outside the bounds of the input image,
457                 // use zero as a default value.
458                 if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
459                     (in_y < input_height)) {
460                   float input_value =
461                       input_data[Offset(input_shape, b, in_y, in_x, ic)];
462                   float filter_value = filter_data[Offset(
463                       filter_shape, 0, filter_y, filter_x, oc)];
464                   total += (input_value * filter_value);
465                 }
466               }
467             }
468             float bias_value = 0.0f;
469             if (bias_data) {
470               bias_value = bias_data[oc];
471             }
472             output_data[Offset(output_shape, b, out_y, out_x, oc)] =
473               // DISABLE ActivationFunctionWithMinMax
474               //  ActivationFunctionWithMinMax(total + bias_value,
475               //                               output_activation_min,
476               //                               output_activation_max);
477 
478 	      // INSTEAD
479 	      total + bias_value;
480           }
481         }
482       }
483     }
484   }
485 }
486 
487 // from: tensorflow/lite/kernels/internal/reference/fully_connected.h
FullyConnected(const FullyConnectedParams & params,const RuntimeShape & input_shape,const float * input_data,const RuntimeShape & weights_shape,const float * weights_data,const RuntimeShape & bias_shape,const float * bias_data,const RuntimeShape & output_shape,float * output_data)488 inline void FullyConnected(
489     const FullyConnectedParams& params, const RuntimeShape& input_shape,
490     const float* input_data, const RuntimeShape& weights_shape,
491     const float* weights_data, const RuntimeShape& bias_shape,
492     const float* bias_data, const RuntimeShape& output_shape,
493     float* output_data) {
494 //  const float output_activation_min = params.float_activation_min;
495 //  const float output_activation_max = params.float_activation_max;
496   // TODO(benoitjacob): This really should be:
497   //     const int batches = ArraySize(output_dims, 1);
498   // but the current --variable_batch hack consists in overwriting the 3rd
499   // dimension with the runtime batch size, as we don't keep track for each
500   // array of which dimension is the batch dimension in it.
501   const int output_dims_count = output_shape.DimensionsCount();
502   const int weights_dims_count = weights_shape.DimensionsCount();
503   const int batches = FlatSizeSkipDim(output_shape, output_dims_count - 1);
504   const int output_depth = MatchingDim(weights_shape, weights_dims_count - 2,
505                                        output_shape, output_dims_count - 1);
506   const int accum_depth = weights_shape.Dims(weights_dims_count - 1);
507   for (int b = 0; b < batches; ++b) {
508     for (int out_c = 0; out_c < output_depth; ++out_c) {
509       float total = 0.f;
510       for (int d = 0; d < accum_depth; ++d) {
511         total += input_data[b * accum_depth + d] *
512                  weights_data[out_c * accum_depth + d];
513       }
514       float bias_value = 0.0f;
515       if (bias_data) {
516         bias_value = bias_data[out_c];
517       }
518 
519       // DISABLE ActivationFunctionWithMinMax
520       //output_data[out_c + output_depth * b] = ActivationFunctionWithMinMax(
521       //    total + bias_value, output_activation_min, output_activation_max);
522 
523       // INSTEAD
524       output_data[out_c + output_depth * b] = total + bias_value;
525     }
526   }
527 }
528 
529 // from: tensorflow/lite/kernels/internal/reference/pooling.h
MaxPool(const PoolParams & params,const RuntimeShape & input_shape,const float * input_data,const RuntimeShape & output_shape,float * output_data)530 inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
531                     const float* input_data, const RuntimeShape& output_shape,
532                     float* output_data) {
533   TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
534   TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
535   const int batches = MatchingDim(input_shape, 0, output_shape, 0);
536   const int depth = MatchingDim(input_shape, 3, output_shape, 3);
537   const int input_height = input_shape.Dims(1);
538   const int input_width = input_shape.Dims(2);
539   const int output_height = output_shape.Dims(1);
540   const int output_width = output_shape.Dims(2);
541   const int stride_height = params.stride_height;
542   const int stride_width = params.stride_width;
543   for (int batch = 0; batch < batches; ++batch) {
544     for (int out_y = 0; out_y < output_height; ++out_y) {
545       for (int out_x = 0; out_x < output_width; ++out_x) {
546         for (int channel = 0; channel < depth; ++channel) {
547           const int in_x_origin =
548               (out_x * stride_width) - params.padding_values.width;
549           const int in_y_origin =
550               (out_y * stride_height) - params.padding_values.height;
551           // Compute the boundaries of the filter region clamped so as to
552           // ensure that the filter window fits in the input array.
553           const int filter_x_start = std::max(0, -in_x_origin);
554           const int filter_x_end =
555               std::min(params.filter_width, input_width - in_x_origin);
556           const int filter_y_start = std::max(0, -in_y_origin);
557           const int filter_y_end =
558               std::min(params.filter_height, input_height - in_y_origin);
559           float max = std::numeric_limits<float>::lowest();
560           for (int filter_y = filter_y_start; filter_y < filter_y_end;
561                ++filter_y) {
562             for (int filter_x = filter_x_start; filter_x < filter_x_end;
563                  ++filter_x) {
564               const int in_x = in_x_origin + filter_x;
565               const int in_y = in_y_origin + filter_y;
566               max = std::max(
567                   max,
568                   input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
569             }
570           }
571           output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
572               // DISABLE ActivationFunctionWithMinMax
573               //ActivationFunctionWithMinMax(max, params.float_activation_min,
574               //                             params.float_activation_max);
575 
576 	      // INSTEAD
577 	      max;
578         }
579       }
580     }
581   }
582 }
583 
584 // from: tensorflow/lite/kernels/internal/reference/pooling.h
AveragePool(const PoolParams & params,const RuntimeShape & input_shape,const float * input_data,const RuntimeShape & output_shape,float * output_data)585 inline void AveragePool(const PoolParams& params,
586                         const RuntimeShape& input_shape,
587                         const float* input_data,
588                         const RuntimeShape& output_shape, float* output_data) {
589   TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
590   TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
591   const int batches = MatchingDim(input_shape, 0, output_shape, 0);
592   const int depth = MatchingDim(input_shape, 3, output_shape, 3);
593   const int input_height = input_shape.Dims(1);
594   const int input_width = input_shape.Dims(2);
595   const int output_height = output_shape.Dims(1);
596   const int output_width = output_shape.Dims(2);
597   const int stride_height = params.stride_height;
598   const int stride_width = params.stride_width;
599   for (int batch = 0; batch < batches; ++batch) {
600     for (int out_y = 0; out_y < output_height; ++out_y) {
601       for (int out_x = 0; out_x < output_width; ++out_x) {
602         for (int channel = 0; channel < depth; ++channel) {
603           const int in_x_origin =
604               (out_x * stride_width) - params.padding_values.width;
605           const int in_y_origin =
606               (out_y * stride_height) - params.padding_values.height;
607           // Compute the boundaries of the filter region clamped so as to
608           // ensure that the filter window fits in the input array.
609           const int filter_x_start = std::max(0, -in_x_origin);
610           const int filter_x_end =
611               std::min(params.filter_width, input_width - in_x_origin);
612           const int filter_y_start = std::max(0, -in_y_origin);
613           const int filter_y_end =
614               std::min(params.filter_height, input_height - in_y_origin);
615           float total = 0.f;
616           float filter_count = 0;
617           for (int filter_y = filter_y_start; filter_y < filter_y_end;
618                ++filter_y) {
619             for (int filter_x = filter_x_start; filter_x < filter_x_end;
620                  ++filter_x) {
621               const int in_x = in_x_origin + filter_x;
622               const int in_y = in_y_origin + filter_y;
623               total +=
624                   input_data[Offset(input_shape, batch, in_y, in_x, channel)];
625               filter_count++;
626             }
627           }
628           const float average = total / filter_count;
629           output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
630               // DISABLE ActivationFunctionWithMinMax
631               //ActivationFunctionWithMinMax(average, params.float_activation_min,
632               //                             params.float_activation_max);
633 
634 	      // INSTEAD
635 	      average;
636         }
637       }
638     }
639   }
640 }
641 
642 // from: tensorflow/lite/kernels/internal/reference/softmax.h
Softmax(const SoftmaxParams & params,const RuntimeShape & input_shape,const float * input_data,const RuntimeShape & output_shape,float * output_data)643 inline void Softmax(const SoftmaxParams& params,
644                     const RuntimeShape& input_shape, const float* input_data,
645                     const RuntimeShape& output_shape, float* output_data) {
646   const int trailing_dim = input_shape.DimensionsCount() - 1;
647   const int outer_size =
648       MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
649   const int depth =
650       MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
651 
652   for (int i = 0; i < outer_size; ++i) {
653     // Find max element value which we'll use to ensure numerical stability
654     // taking advantage of the following equality:
655     // exp(x[i])/sum(exp(x[i])) == exp(x[i]+C)/sum(exp(x[i]+C))
656     float max = std::numeric_limits<float>::lowest();
657     for (int c = 0; c < depth; ++c) {
658       max = std::max(max, input_data[i * depth + c]);
659     }
660 
661     // Compute sum.
662     float sum = 0.f;
663     for (int c = 0; c < depth; ++c) {
664       sum += std::exp((input_data[i * depth + c] - max) * params.beta);
665     }
666 
667     // Compute result.
668     for (int c = 0; c < depth; ++c) {
669       output_data[i * depth + c] =
670           std::exp((input_data[i * depth + c] - max) * params.beta) / sum;
671     }
672   }
673 }
674 
675 // from kernels/internal/reference/reference_ops.h
676 template <typename T>
ResizeBilinear(const tflite::ResizeBilinearParams & op_params,const RuntimeShape & unextended_input_shape,const T * input_data,const RuntimeShape & unextended_output_size_shape,const int32 * output_size_data,const RuntimeShape & unextended_output_shape,T * output_data)677 inline void ResizeBilinear(const tflite::ResizeBilinearParams& op_params,
678                            const RuntimeShape& unextended_input_shape,
679                            const T* input_data,
680                            const RuntimeShape& unextended_output_size_shape,
681                            const int32* output_size_data,
682                            const RuntimeShape& unextended_output_shape,
683                            T* output_data) {
684   TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
685   TFLITE_DCHECK_LE(unextended_output_size_shape.DimensionsCount(), 4);
686   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
687   const RuntimeShape input_shape =
688       RuntimeShape::ExtendedShape(4, unextended_input_shape);
689   const RuntimeShape output_size_shape =
690       RuntimeShape::ExtendedShape(4, unextended_output_size_shape);
691   const RuntimeShape output_shape =
692       RuntimeShape::ExtendedShape(4, unextended_output_shape);
693 
694   int32 batches = MatchingDim(input_shape, 0, output_shape, 0);
695   int32 input_height = input_shape.Dims(1);
696   int32 input_width = input_shape.Dims(2);
697   int32 depth = MatchingDim(input_shape, 3, output_shape, 3);
698 
699   TFLITE_DCHECK_EQ(output_size_shape.Dims(0), 1);
700   TFLITE_DCHECK_EQ(output_size_shape.Dims(1), 1);
701   TFLITE_DCHECK_EQ(output_size_shape.Dims(2), 1);
702   TFLITE_DCHECK_EQ(output_size_shape.Dims(3), 2);
703   int32 output_height = output_size_data[Offset(output_size_shape, 0, 0, 0, 0)];
704   int32 output_width = output_size_data[Offset(output_size_shape, 0, 0, 0, 1)];
705 
706   float height_scale = static_cast<float>(input_height) / output_height;
707   float width_scale = static_cast<float>(input_width) / output_width;
708   if (op_params.align_corners && output_height > 1) {
709     height_scale = static_cast<float>(input_height - 1) / (output_height - 1);
710   }
711   if (op_params.align_corners && output_width > 1) {
712     width_scale = static_cast<float>(input_width - 1) / (output_width - 1);
713   }
714 
715   for (int b = 0; b < batches; ++b) {
716     for (int y = 0; y < output_height; ++y) {
717       float input_y = y * height_scale;
718       int32 y0 = static_cast<int32>(std::floor(input_y));
719       int32 y1 = std::min(y0 + 1, input_height - 1);
720       for (int x = 0; x < output_width; ++x) {
721         float input_x = x * width_scale;
722         int32 x0 = static_cast<int32>(std::floor(input_x));
723         int32 x1 = std::min(x0 + 1, input_width - 1);
724         for (int c = 0; c < depth; ++c) {
725           T interpolation =
726               static_cast<T>(input_data[Offset(input_shape, b, y0, x0, c)] *
727                                  (1 - (input_y - y0)) * (1 - (input_x - x0)) +
728                              input_data[Offset(input_shape, b, y1, x0, c)] *
729                                  (input_y - y0) * (1 - (input_x - x0)) +
730                              input_data[Offset(input_shape, b, y0, x1, c)] *
731                                  (1 - (input_y - y0)) * (input_x - x0) +
732                              input_data[Offset(input_shape, b, y1, x1, c)] *
733                                  (input_y - y0) * (input_x - x0));
734           output_data[Offset(output_shape, b, y, x, c)] = interpolation;
735         }
736       }
737     }
738   }
739 }
740 
741 // from kernels/internal/reference/reference_ops.h
742 template <typename T>
ResizeNearestNeighbor(const tflite::ResizeNearestNeighborParams & op_params,const RuntimeShape & unextended_input_shape,const T * input_data,const RuntimeShape & output_size_shape,const int32 * output_size_data,const RuntimeShape & unextended_output_shape,T * output_data)743 inline void ResizeNearestNeighbor(
744     const tflite::ResizeNearestNeighborParams& op_params,
745     const RuntimeShape& unextended_input_shape, const T* input_data,
746     const RuntimeShape& output_size_shape, const int32* output_size_data,
747     const RuntimeShape& unextended_output_shape, T* output_data) {
748   // Align corners = true is not supported.
749   TFLITE_DCHECK(!op_params.align_corners);
750   TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
751   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
752 
753   const RuntimeShape input_shape =
754       RuntimeShape::ExtendedShape(4, unextended_input_shape);
755   const RuntimeShape output_shape =
756       RuntimeShape::ExtendedShape(4, unextended_output_shape);
757 
758   int32 batches = MatchingDim(input_shape, 0, output_shape, 0);
759   int32 input_height = input_shape.Dims(1);
760   int32 input_width = input_shape.Dims(2);
761   int32 depth = MatchingDim(input_shape, 3, output_shape, 3);
762 
763   // The Tensorflow version of this op allows resize on the width and height
764   // axis only.
765   TFLITE_DCHECK_EQ(output_size_shape.FlatSize(), 2);
766   int32 output_height = output_size_data[0];
767   int32 output_width = output_size_data[1];
768 
769   // We use float to ensure agreement with the Tensorflow implementation.
770   const float height_scale = static_cast<float>(input_height) / output_height;
771   const float width_scale = static_cast<float>(input_width) / output_width;
772 
773   const int col_offset = input_shape.Dims(3);
774   const int row_offset = input_shape.Dims(2) * col_offset;
775   const int batch_offset = input_shape.Dims(1) * row_offset;
776 
777   const T* input_ptr = input_data;
778   T* output_ptr = output_data;
779   for (int b = 0; b < batches; ++b) {
780     for (int y = 0; y < output_height; ++y) {
781       int32 in_y = std::min(static_cast<int32>(std::floor(y * height_scale)),
782                             input_height - 1);
783       const T* y_input_ptr = input_ptr + in_y * row_offset;
784       for (int x = 0; x < output_width; ++x) {
785         int32 in_x = std::min(static_cast<int32>(std::floor(x * width_scale)),
786                               input_width - 1);
787         const T* x_input_ptr = y_input_ptr + in_x * col_offset;
788         memcpy(output_ptr, x_input_ptr, depth * sizeof(T));
789         output_ptr += depth;
790       }
791     }
792     input_ptr += batch_offset;
793   }
794 }
795 
796 // from: tensorflow/lite/kernels/internal/reference/reference_ops.h
LocalResponseNormalization(const tflite::LocalResponseNormalizationParams & op_params,const RuntimeShape & input_shape,const float * input_data,const RuntimeShape & output_shape,float * output_data)797 inline void LocalResponseNormalization(
798     const tflite::LocalResponseNormalizationParams& op_params,
799     const RuntimeShape& input_shape, const float* input_data,
800     const RuntimeShape& output_shape, float* output_data) {
801   const int trailing_dim = input_shape.DimensionsCount() - 1;
802   const int outer_size =
803       MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
804   const int depth =
805       MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
806 
807   for (int i = 0; i < outer_size; ++i) {
808     for (int c = 0; c < depth; ++c) {
809       const int begin_input_c = std::max(0, c - op_params.range);
810       const int end_input_c = std::min(depth, c + op_params.range);
811       float accum = 0.f;
812       for (int input_c = begin_input_c; input_c < end_input_c; ++input_c) {
813         const float input_val = input_data[i * depth + input_c];
814         accum += input_val * input_val;
815       }
816       const float multiplier =
817           std::pow(op_params.bias + op_params.alpha * accum, -op_params.beta);
818       output_data[i * depth + c] = input_data[i * depth + c] * multiplier;
819     }
820   }
821 }
822 
823 // from: tensorflow/lite/kernels/internal/reference/reference_ops.h
824 template <typename T>
Mean(const tflite::MeanParams & op_params,const RuntimeShape & unextended_input_shape,const T * input_data,const RuntimeShape & unextended_output_shape,T * output_data)825 inline void Mean(const tflite::MeanParams& op_params,
826                  const RuntimeShape& unextended_input_shape,
827                  const T* input_data,
828                  const RuntimeShape& unextended_output_shape, T* output_data) {
829   //gemmlowp::ScopedProfilingLabel label("Mean4D");
830 
831   // Current implementation only supports dimension equals 4 and simultaneous
832   // reduction over width and height.
833   TFLITE_CHECK_EQ(unextended_input_shape.DimensionsCount(), 4);
834   TFLITE_CHECK_LE(unextended_output_shape.DimensionsCount(), 4);
835   const RuntimeShape input_shape =
836       RuntimeShape::ExtendedShape(4, unextended_input_shape);
837   const RuntimeShape output_shape =
838       RuntimeShape::ExtendedShape(4, unextended_output_shape);
839 
840   const int output_batch = output_shape.Dims(0);
841   const int output_height = output_shape.Dims(1);
842   const int output_width = output_shape.Dims(2);
843   const int output_depth = output_shape.Dims(3);
844 
845   const int input_height = input_shape.Dims(1);
846   const int input_width = input_shape.Dims(2);
847 
848   TFLITE_DCHECK_EQ(op_params.axis_count, 2);
849   TFLITE_DCHECK((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
850                 (op_params.axis[0] == 2 && op_params.axis[1] == 1));
851   TFLITE_DCHECK_EQ(output_height, 1);
852   TFLITE_DCHECK_EQ(output_width, 1);
853 
854   for (int out_b = 0; out_b < output_batch; ++out_b) {
855     for (int out_d = 0; out_d < output_depth; ++out_d) {
856       float value = 0;
857       for (int in_h = 0; in_h < input_height; ++in_h) {
858         for (int in_w = 0; in_w < input_width; ++in_w) {
859           value += input_data[Offset(input_shape, out_b, in_h, in_w, out_d)];
860         }
861       }
862       output_data[Offset(output_shape, out_b, 0, 0, out_d)] =
863           value / (input_width * input_height);
864     }
865   }
866 }
867 
868 // from tensorflow/lite/kernels/internal/types.h
869 
870 // from tensorflow/lite/kernels/internal/reference/pad.h (TF V2)
871 
872 // TFLite Pad supports activation tensors with up to 4 dimensions.
PadKernelMaxDimensionCount()873 constexpr int PadKernelMaxDimensionCount() { return 4; }
874 
875 template <typename T, typename P>
PadImpl(const tflite::PadParams & op_params,const RuntimeShape & input_shape,const T * input_data,const P * pad_value_ptr,const RuntimeShape & output_shape,T * output_data)876 inline void PadImpl(const tflite::PadParams& op_params,
877                     const RuntimeShape& input_shape, const T* input_data,
878                     const P* pad_value_ptr, const RuntimeShape& output_shape,
879                     T* output_data) {
880   const RuntimeShape ext_input_shape =
881       RuntimeShape::ExtendedShape(PadKernelMaxDimensionCount(), input_shape);
882   const RuntimeShape ext_output_shape =
883       RuntimeShape::ExtendedShape(PadKernelMaxDimensionCount(), output_shape);
884   TFLITE_DCHECK_LE(op_params.left_padding_count, PadKernelMaxDimensionCount());
885   TFLITE_DCHECK_LE(op_params.right_padding_count, PadKernelMaxDimensionCount());
886 
887   // Runtime calls are currently fixed at 4 dimensions. Copy inputs so we can
888   // pad them to 4 dims (yes, we are "padding the padding").
889   int left_padding_copy[PadKernelMaxDimensionCount()];
890   for (int i = 0; i < PadKernelMaxDimensionCount(); i++) {
891     left_padding_copy[i] = 0;
892   }
893   for (int i = 0; i < op_params.left_padding_count; ++i) {
894     left_padding_copy[i + PadKernelMaxDimensionCount() -
895                       op_params.left_padding_count] = op_params.left_padding[i];
896   }
897   int right_padding_copy[PadKernelMaxDimensionCount()];
898   for (int i = 0; i < PadKernelMaxDimensionCount(); i++) {
899     right_padding_copy[i] = 0;
900   }
901   for (int i = 0; i < op_params.right_padding_count; ++i) {
902     right_padding_copy[i + PadKernelMaxDimensionCount() -
903                        op_params.right_padding_count] =
904         op_params.right_padding[i];
905   }
906 
907   const int output_batch = ext_output_shape.Dims(0);
908   const int output_height = ext_output_shape.Dims(1);
909   const int output_width = ext_output_shape.Dims(2);
910   const int output_depth = ext_output_shape.Dims(3);
911 
912   const int left_b_padding = left_padding_copy[0];
913   const int left_h_padding = left_padding_copy[1];
914   const int left_w_padding = left_padding_copy[2];
915   const int left_d_padding = left_padding_copy[3];
916 
917   const int right_b_padding = right_padding_copy[0];
918   const int right_h_padding = right_padding_copy[1];
919   const int right_w_padding = right_padding_copy[2];
920   const int right_d_padding = right_padding_copy[3];
921 
922   const T pad_value = *pad_value_ptr;
923 
924   const T* in_ptr = input_data;
925   T* out_ptr = output_data;
926   for (int out_b = 0; out_b < output_batch; ++out_b) {
927     for (int out_h = 0; out_h < output_height; ++out_h) {
928       for (int out_w = 0; out_w < output_width; ++out_w) {
929         for (int out_d = 0; out_d < output_depth; ++out_d) {
930           if (out_b < left_b_padding ||
931               out_b >= output_batch - right_b_padding ||
932               out_h < left_h_padding ||
933               out_h >= output_height - right_h_padding ||
934               out_w < left_w_padding ||
935               out_w >= output_width - right_w_padding ||
936               out_d < left_d_padding ||
937               out_d >= output_depth - right_d_padding) {
938             *out_ptr++ = pad_value;
939           } else {
940             *out_ptr++ = *in_ptr++;
941           }
942         }
943       }
944     }
945   }
946 }
947 
948 template <typename T, typename P>
Pad(const tflite::PadParams & op_params,const RuntimeShape & input_shape,const T * input_data,const P * pad_value_ptr,const RuntimeShape & output_shape,T * output_data)949 inline void Pad(const tflite::PadParams& op_params,
950                 const RuntimeShape& input_shape, const T* input_data,
951                 const P* pad_value_ptr, const RuntimeShape& output_shape,
952                 T* output_data) {
953   PadImpl(op_params, input_shape, input_data, pad_value_ptr, output_shape,
954           output_data);
955 }
956 
957 template <typename T>
Pad(const tflite::PadParams & op_params,const RuntimeShape & input_shape,const T * input_data,const int32 * pad_value_ptr,const RuntimeShape & output_shape,T * output_data)958 inline void Pad(const tflite::PadParams& op_params,
959                 const RuntimeShape& input_shape, const T* input_data,
960                 const int32* pad_value_ptr, const RuntimeShape& output_shape,
961                 T* output_data) {
962   const T converted_pad_value = static_cast<T>(*pad_value_ptr);
963   PadImpl(op_params, input_shape, input_data, &converted_pad_value,
964           output_shape, output_data);
965 }
966 
967 template <>
Pad(const tflite::PadParams & op_params,const RuntimeShape & input_shape,const int32 * input_data,const int32 * pad_value_ptr,const RuntimeShape & output_shape,int32 * output_data)968 inline void Pad(const tflite::PadParams& op_params,
969                 const RuntimeShape& input_shape, const int32* input_data,
970                 const int32* pad_value_ptr, const RuntimeShape& output_shape,
971                 int32* output_data) {
972   PadImpl(op_params, input_shape, input_data, pad_value_ptr, output_shape,
973           output_data);
974 }
975 
976 template <typename T, typename P>
PadImageStyle(const tflite::PadParams & op_params,const RuntimeShape & input_shape,const T * input_data,const P * pad_value_ptr,const RuntimeShape & output_shape,T * output_data)977 inline void PadImageStyle(const tflite::PadParams& op_params,
978                           const RuntimeShape& input_shape, const T* input_data,
979                           const P* pad_value_ptr,
980                           const RuntimeShape& output_shape, T* output_data) {
981   //TFLITE_ASSERT_FALSE;
982 }
983 
984 template <typename P>
PadImageStyle(const tflite::PadParams & op_params,const RuntimeShape & input_shape,const uint8 * input_data,const P * pad_value_ptr,const RuntimeShape & output_shape,uint8 * output_data)985 inline void PadImageStyle(const tflite::PadParams& op_params,
986                           const RuntimeShape& input_shape,
987                           const uint8* input_data, const P* pad_value_ptr,
988                           const RuntimeShape& output_shape,
989                           uint8* output_data) {
990   Pad(op_params, input_shape, input_data, pad_value_ptr, output_shape,
991       output_data);
992 }
993 
994 template <typename P>
PadImageStyle(const tflite::PadParams & op_params,const RuntimeShape & input_shape,const int8_t * input_data,const P * pad_value_ptr,const RuntimeShape & output_shape,int8_t * output_data)995 inline void PadImageStyle(const tflite::PadParams& op_params,
996                           const RuntimeShape& input_shape,
997                           const int8_t* input_data, const P* pad_value_ptr,
998                           const RuntimeShape& output_shape,
999                           int8_t* output_data) {
1000   Pad(op_params, input_shape, input_data, pad_value_ptr, output_shape,
1001       output_data);
1002 }
1003 
1004 template <typename P>
PadImageStyle(const tflite::PadParams & op_params,const RuntimeShape & input_shape,const float * input_data,const P * pad_value_ptr,const RuntimeShape & output_shape,float * output_data)1005 inline void PadImageStyle(const tflite::PadParams& op_params,
1006                           const RuntimeShape& input_shape,
1007                           const float* input_data, const P* pad_value_ptr,
1008                           const RuntimeShape& output_shape,
1009                           float* output_data) {
1010   Pad(op_params, input_shape, input_data, pad_value_ptr, output_shape,
1011       output_data);
1012 }
1013 
1014 }
1015 
1016 //
1017 // exporting this functionality by wrapping it in our API
1018 //
1019 
1020 #include "../../tensor.h"
1021 
1022 namespace NnOperators {
1023 
Conv2D(const TensorShape & inputShape,const float * inputData,const TensorShape & filterShape,const float * filterData,const TensorShape & biasShape,const float * biasData,const TensorShape & outputShape,float * outputData,unsigned paddingWidth,unsigned paddingHeight,unsigned strideWidth,unsigned strideHeight,unsigned dilationWidthFactor,unsigned dilationHeightFactor)1024 void Conv2D(
1025 	const TensorShape &inputShape, const float *inputData,
1026 	const TensorShape &filterShape, const float *filterData,
1027 	const TensorShape &biasShape, const float *biasData,
1028 	const TensorShape &outputShape, float *outputData,
1029 	unsigned paddingWidth, unsigned paddingHeight,
1030 	unsigned strideWidth, unsigned strideHeight,
1031 	unsigned dilationWidthFactor, unsigned dilationHeightFactor
1032 ) {
1033 	tflite::ConvParams params;
1034 	params.padding_values.width = paddingWidth;
1035 	params.padding_values.height = paddingHeight;
1036 	params.stride_width = strideWidth;
1037 	params.stride_height = strideHeight;
1038 	params.dilation_width_factor = dilationWidthFactor;
1039 	params.dilation_height_factor = dilationHeightFactor;
1040 
1041 	tflite::Conv(params,
1042 		tflite::RuntimeShape(inputShape),  inputData,
1043 		tflite::RuntimeShape(filterShape), filterData,
1044 		tflite::RuntimeShape(biasShape),   biasData,
1045 		tflite::RuntimeShape(outputShape), outputData,
1046 		tflite::RuntimeShape(0),
1047 		nullptr
1048 	);
1049 }
1050 
DepthwiseConv2D(const TensorShape & inputShape,const float * inputData,const TensorShape & filterShape,const float * filterData,const TensorShape & biasShape,const float * biasData,const TensorShape & outputShape,float * outputData,unsigned paddingWidth,unsigned paddingHeight,unsigned strideWidth,unsigned strideHeight,unsigned dilationWidthFactor,unsigned dilationHeightFactor,unsigned depthMultiplier)1051 void DepthwiseConv2D(
1052 	const TensorShape &inputShape, const float *inputData,
1053 	const TensorShape &filterShape, const float *filterData,
1054 	const TensorShape &biasShape, const float *biasData,
1055 	const TensorShape &outputShape, float *outputData,
1056 	unsigned paddingWidth, unsigned paddingHeight,
1057 	unsigned strideWidth, unsigned strideHeight,
1058 	unsigned dilationWidthFactor, unsigned dilationHeightFactor,
1059 	unsigned depthMultiplier
1060 ) {
1061 	tflite::DepthwiseParams params;
1062 	params.padding_values.width = paddingWidth;
1063 	params.padding_values.height = paddingHeight;
1064 	params.stride_width = strideWidth;
1065 	params.stride_height = strideHeight;
1066 	params.dilation_width_factor = dilationWidthFactor;
1067 	params.dilation_height_factor = dilationHeightFactor;
1068 	params.depth_multiplier = depthMultiplier;
1069 
1070 	tflite::DepthwiseConv(params,
1071 		tflite::RuntimeShape(inputShape),  inputData,
1072 		tflite::RuntimeShape(filterShape), filterData,
1073 		tflite::RuntimeShape(biasShape),   biasData,
1074 		tflite::RuntimeShape(outputShape), outputData
1075 	);
1076 }
1077 
FullyConnected(const TensorShape & inputShape,const float * inputData,const TensorShape & filterShape,const float * filterData,const TensorShape & biasShape,const float * biasData,const TensorShape & outputShape,float * outputData)1078 void FullyConnected(
1079 	const TensorShape &inputShape, const float *inputData,
1080 	const TensorShape &filterShape, const float *filterData,
1081 	const TensorShape &biasShape, const float *biasData,
1082 	const TensorShape &outputShape, float *outputData
1083 ) {
1084 	tflite::FullyConnectedParams params;
1085 
1086 	tflite::FullyConnected(params,
1087 		tflite::RuntimeShape(inputShape),  inputData,
1088 		tflite::RuntimeShape(filterShape), filterData,
1089 		tflite::RuntimeShape(biasShape),   biasData,
1090 		tflite::RuntimeShape(outputShape), outputData
1091 	);
1092 }
1093 
MaxPool(const TensorShape & inputShape,const float * inputData,const TensorShape & outputShape,float * outputData,unsigned paddingWidth,unsigned paddingHeight,unsigned strideWidth,unsigned strideHeight,unsigned filterWidth,unsigned filterHeight)1094 void MaxPool(
1095 	const TensorShape &inputShape, const float *inputData,
1096 	const TensorShape &outputShape, float *outputData,
1097 	unsigned paddingWidth, unsigned paddingHeight,
1098 	unsigned strideWidth, unsigned strideHeight,
1099 	unsigned filterWidth, unsigned filterHeight
1100 ) {
1101 	tflite::PoolParams params;
1102 	params.padding_values.width = paddingWidth;
1103 	params.padding_values.height = paddingHeight;
1104 	params.stride_width = strideWidth;
1105 	params.stride_height = strideHeight;
1106 	params.filter_width = filterWidth;
1107 	params.filter_height = filterHeight;
1108 
1109 	tflite::MaxPool(params,
1110 		tflite::RuntimeShape(inputShape),  inputData,
1111 		tflite::RuntimeShape(outputShape), outputData
1112 	);
1113 }
1114 
AveragePool(const TensorShape & inputShape,const float * inputData,const TensorShape & outputShape,float * outputData,unsigned paddingWidth,unsigned paddingHeight,unsigned strideWidth,unsigned strideHeight,unsigned filterWidth,unsigned filterHeight)1115 void AveragePool(
1116 	const TensorShape &inputShape, const float *inputData,
1117 	const TensorShape &outputShape, float *outputData,
1118 	unsigned paddingWidth, unsigned paddingHeight,
1119 	unsigned strideWidth, unsigned strideHeight,
1120 	unsigned filterWidth, unsigned filterHeight
1121 ) {
1122 	tflite::PoolParams params;
1123 	params.padding_values.width = paddingWidth;
1124 	params.padding_values.height = paddingHeight;
1125 	params.stride_width = strideWidth;
1126 	params.stride_height = strideHeight;
1127 	params.filter_width = filterWidth;
1128 	params.filter_height = filterHeight;
1129 
1130 	tflite::AveragePool(params,
1131 		tflite::RuntimeShape(inputShape),  inputData,
1132 		tflite::RuntimeShape(outputShape), outputData
1133 	);
1134 }
1135 
Softmax(const TensorShape & inputShape,const float * inputData,const TensorShape & outputShape,float * outputData,float beta)1136 void Softmax(
1137 	const TensorShape &inputShape, const float *inputData,
1138 	const TensorShape &outputShape, float *outputData,
1139 	float beta
1140 ) {
1141 	tflite::SoftmaxParams params;
1142 	params.beta = beta;
1143 
1144 	tflite::Softmax(params,
1145 		tflite::RuntimeShape(inputShape),  inputData,
1146 		tflite::RuntimeShape(outputShape), outputData
1147 	);
1148 }
1149 
ResizeBilinear(const TensorShape & inputShape,const float * inputData,const TensorShape & outputShape,float * outputData,bool alignCorners)1150 void ResizeBilinear(
1151 	const TensorShape &inputShape, const float *inputData,
1152 	const TensorShape &outputShape, float *outputData,
1153 	bool alignCorners
1154 ) {
1155 	tflite::ResizeBilinearParams params;
1156 	params.align_corners = alignCorners;
1157 
1158 	// not sure why the operation was defiened to need these
1159 	tflite::RuntimeShape outputSizeDims = {1, 1, 1, 2};
1160 	tflite::int32 outputSizeData[2] = {(tflite::int32)outputShape[1/*height*/], (tflite::int32)outputShape[2/*width*/]};
1161 
1162 	tflite::ResizeBilinear(params,
1163 		tflite::RuntimeShape(inputShape),  inputData,
1164 		outputSizeDims, outputSizeData,
1165 		tflite::RuntimeShape(outputShape), outputData
1166 	);
1167 }
1168 
ResizeNearestNeighbor(const TensorShape & inputShape,const float * inputData,const TensorShape & outputShape,float * outputData,bool alignCorners)1169 void ResizeNearestNeighbor(
1170 	const TensorShape &inputShape, const float *inputData,
1171 	const TensorShape &outputShape, float *outputData,
1172 	bool alignCorners
1173 ) {
1174 	tflite::ResizeNearestNeighborParams params;
1175 	params.align_corners = alignCorners;
1176 
1177 	// not sure why the operation was defiened to need these
1178 	tflite::RuntimeShape outputSizeDims = {1, 1, 1, 2};
1179 	tflite::int32 outputSizeData[2] = {(tflite::int32)outputShape[1/*height*/], (tflite::int32)outputShape[2/*width*/]};
1180 
1181 	tflite::ResizeNearestNeighbor(params,
1182 		tflite::RuntimeShape(inputShape),  inputData,
1183 		outputSizeDims, outputSizeData,
1184 		tflite::RuntimeShape(outputShape), outputData
1185 	);
1186 }
1187 
LocalResponseNormalization(const TensorShape & inputShape,const float * inputData,const TensorShape & outputShape,float * outputData,int radius,float alpha,float beta,float bias)1188 void LocalResponseNormalization(
1189 	const TensorShape &inputShape, const float *inputData,
1190 	const TensorShape &outputShape, float *outputData,
1191 	int radius, float alpha, float beta, float bias
1192 ) {
1193 	tflite::LocalResponseNormalizationParams params;
1194 	params.range = radius; // XXX in TF Lite sources the operator option is called "radius" but the parameter in the structure is called "range"
1195 	params.alpha = alpha;
1196 	params.beta = beta;
1197 	params.bias = bias;
1198 
1199 	tflite::LocalResponseNormalization(params,
1200 		tflite::RuntimeShape(inputShape),  inputData,
1201 		tflite::RuntimeShape(outputShape), outputData
1202 	);
1203 }
1204 
Mean(const TensorShape & inputShape,const float * inputData,const TensorShape & outputShape,float * outputData,const int32_t * axis,unsigned axis_count)1205 void Mean(
1206 	const TensorShape &inputShape, const float *inputData,
1207 	const TensorShape &outputShape, float *outputData,
1208 	const int32_t *axis, unsigned axis_count
1209 ) {
1210 	tflite::MeanParams params;
1211 	params.axis_count = axis_count;
1212 	std::copy(axis, axis+axis_count, params.axis);
1213 
1214 	tflite::Mean<float>(params,
1215 		tflite::RuntimeShape(inputShape),  inputData,
1216 		tflite::RuntimeShape(outputShape), outputData
1217 	);
1218 }
1219 
Pad(const std::array<int32_t,2> * paddings,const TensorShape & inputShape,const float * inputData,const TensorShape & outputShape,float * outputData)1220 void Pad(
1221 	const std::array<int32_t,2>* paddings,
1222 	const TensorShape &inputShape, const float *inputData,
1223 	const TensorShape &outputShape, float *outputData
1224 ) {
1225 	tflite::PadParams params;
1226 	params.left_padding_count = inputShape.size();
1227 	params.right_padding_count = params.left_padding_count;
1228 	for (unsigned i = 0; i < params.left_padding_count; i++, paddings++) {
1229 		params.left_padding[i] = (*paddings)[0];
1230 		params.right_padding[i] = (*paddings)[1];
1231 	}
1232 
1233 	float padValue = 0;
1234 
1235 	tflite::Pad(params,
1236 		tflite::RuntimeShape(inputShape),  inputData,
1237 		&padValue,
1238 		tflite::RuntimeShape(outputShape), outputData
1239 	);
1240 }
1241 
1242 } // NnOperators
1243