1 // Copyright (C) 2020 by Yuri Victorovich. All rights reserved.
2
3 //
4 // tflite-reference-implementation.cpp contains portions of the Apache 2.0 licensed code from the TensorFlow source tree
5 //
6
7 #include <array>
8 #include <algorithm>
9 #include <cstdint>
10 #include <cstring>
11 #include <vector>
12 #include <cmath>
13
14 #include <assert.h>
15
16 namespace tflite {
17
18 /// some common macros used in the code
19
20 #define TFLITE_DCHECK(condition) assert(condition); // (condition) ? (void)0 : TFLITE_ASSERT_FALSE
21 #define TFLITE_DCHECK_EQ(a,b) assert(a == b);
22 #define TFLITE_DCHECK_LE(a,b) assert(a <= b);
23 #define TFLITE_DCHECK_GE(a,b) assert(a >= b);
24 #define TFLITE_CHECK_EQ(x, y) ((x) == (y)) ? (void)0 : TFLITE_ABORT
25 #define TFLITE_CHECK_LE(x, y) ((x) <= (y)) ? (void)0 : TFLITE_ABORT
26 #define TFLITE_ABORT abort()
27
28 /// common types used in this code
29
30 typedef int8_t int8;
31 typedef uint8_t uint8;
32 typedef int16_t int16;
33 typedef uint16_t uint16;
34 typedef int32_t int32;
35 typedef uint32_t uint32;
36
37 enum class FusedActivationFunctionType : uint8 { kNone, kRelu6, kRelu1, kRelu };
38 enum class PaddingType : uint8 { kNone, kSame, kValid };
39
40 struct PaddingValues {
41 int16 width;
42 int16 height;
43 // offset is used for calculating "remaining" padding, for example, `width`
44 // is 1 and `width_offset` is 1, so padding_left is 1 while padding_right is
45 // 1 + 1 = 2.
46 int16 width_offset; // unused
47 // Same as width_offset except it's over the height dimension.
48 int16 height_offset; // unused
49
PaddingValuestflite::PaddingValues50 PaddingValues()
51 : width(0)
52 , height(0)
53 , width_offset(0)
54 , height_offset(0)
55 { }
56 };
57
58 // from: tensorflow/lite/kernels/internal/types.h
59 class RuntimeShape : public std::vector<unsigned> {
60 public:
RuntimeShape()61 RuntimeShape() { }
RuntimeShape(const std::vector<unsigned> & v)62 RuntimeShape(const std::vector<unsigned> &v) : std::vector<unsigned>(v) { }
RuntimeShape(unsigned d1)63 RuntimeShape(unsigned d1) {push_back(d1);}
RuntimeShape(unsigned d1,unsigned d2)64 RuntimeShape(unsigned d1, unsigned d2) {push_back(d1); push_back(d2);}
RuntimeShape(unsigned d1,unsigned d2,unsigned d3)65 RuntimeShape(unsigned d1, unsigned d2, unsigned d3) {push_back(d1); push_back(d2); push_back(d3);}
RuntimeShape(unsigned d1,unsigned d2,unsigned d3,unsigned d4)66 RuntimeShape(unsigned d1, unsigned d2, unsigned d3, unsigned d4) {push_back(d1); push_back(d2); push_back(d3); push_back(d4);}
Resize(unsigned sz_)67 void Resize(unsigned sz_) {resize(sz_);}
SetDim(unsigned dim,unsigned val)68 void SetDim(unsigned dim, unsigned val) {(*this)[dim] = val;}
Dims(unsigned n) const69 unsigned Dims(unsigned n) const {return (*this)[n];}
DimensionsCount() const70 int DimensionsCount() const {return size();}
FlatSize() const71 int FlatSize() const {
72 int sz = 1;
73 for (auto i = begin(); i != end(); i++)
74 sz *= *i;
75 return sz;
76 }
DimsData() const77 inline const unsigned* DimsData() const { return &(*this)[0]; }
ExtendedShape(int new_shape_size,const RuntimeShape & shape)78 inline static RuntimeShape ExtendedShape(int new_shape_size, const RuntimeShape& shape) {
79 return RuntimeShape(new_shape_size, shape, 1);
80 }
RuntimeShape(int new_shape_size,const RuntimeShape & shape,int pad_value)81 RuntimeShape(int new_shape_size, const RuntimeShape& shape, int pad_value) {
82 assert(new_shape_size >= shape.DimensionsCount());
83 resize(new_shape_size);
84 const int size_increase = new_shape_size - shape.DimensionsCount();
85 int i = 0;
86 for (; i < size_increase; ++i)
87 *(begin()+i) = pad_value;
88 for (auto n : shape)
89 *(begin()+(i++)) = n;
90 }
91 };
92
MatchingDim(const RuntimeShape & shape1,unsigned dim1,const RuntimeShape & shape2,unsigned dim2)93 static unsigned MatchingDim(const RuntimeShape &shape1, unsigned dim1, const RuntimeShape &shape2, unsigned dim2) {
94 assert(shape1[dim1] == shape2[dim2]);
95 return shape1[dim1];
96 }
97
MatchingFlatSize(const RuntimeShape & shape,const RuntimeShape & check_shape_0)98 inline int MatchingFlatSize(const RuntimeShape& shape,
99 const RuntimeShape& check_shape_0) {
100 TFLITE_DCHECK_EQ(shape.DimensionsCount(), check_shape_0.DimensionsCount());
101 const int dims_count = shape.DimensionsCount();
102 for (int i = 0; i < dims_count; ++i) {
103 TFLITE_DCHECK_EQ(shape.Dims(i), check_shape_0.Dims(i));
104 }
105 return shape.FlatSize();
106 }
107
MatchingFlatSize(const RuntimeShape & shape,const RuntimeShape & check_shape_0,const RuntimeShape & check_shape_1)108 inline int MatchingFlatSize(const RuntimeShape& shape,
109 const RuntimeShape& check_shape_0,
110 const RuntimeShape& check_shape_1) {
111 TFLITE_DCHECK_EQ(shape.DimensionsCount(), check_shape_0.DimensionsCount());
112 const int dims_count = shape.DimensionsCount();
113 for (int i = 0; i < dims_count; ++i) {
114 TFLITE_DCHECK_EQ(shape.Dims(i), check_shape_0.Dims(i));
115 }
116 return MatchingFlatSize(shape, check_shape_1);
117 }
118
FlatSizeSkipDim(const RuntimeShape & shape,int skip_dim)119 inline int FlatSizeSkipDim(const RuntimeShape& shape, int skip_dim) {
120 const int dims_count = shape.DimensionsCount();
121 TFLITE_DCHECK(skip_dim >= 0 && skip_dim < dims_count);
122 const auto* dims_data = shape.DimsData();
123 int flat_size = 1;
124 for (int i = 0; i < dims_count; ++i) {
125 flat_size *= (i == skip_dim) ? 1 : dims_data[i];
126 }
127 return flat_size;
128 }
129
MatchingFlatSizeSkipDim(const RuntimeShape & shape,int skip_dim,const RuntimeShape & check_shape_0)130 inline int MatchingFlatSizeSkipDim(const RuntimeShape& shape, int skip_dim,
131 const RuntimeShape& check_shape_0) {
132 const int dims_count = shape.DimensionsCount();
133 for (int i = 0; i < dims_count; ++i) {
134 if (i != skip_dim) {
135 TFLITE_DCHECK_EQ(shape.Dims(i), check_shape_0.Dims(i));
136 }
137 }
138 return FlatSizeSkipDim(shape, skip_dim);
139 }
140
141 /// supporting functions
142
Offset(const RuntimeShape & shape,unsigned batch,unsigned y,unsigned x,unsigned channel)143 static unsigned Offset(const RuntimeShape &shape, unsigned batch, unsigned y, unsigned x, unsigned channel) {
144 assert(shape.size()==4);
145 return batch*shape.Dims(1)*shape.Dims(2)*shape.Dims(3) +
146 y*shape.Dims(2)*shape.Dims(3) +
147 x*shape.Dims(3) +
148 channel;
149 //return (((batch*Dim(1))+y)*Dim(2)+x)*shape.Dim(3)+channel;
150 }
151
ActivationFunctionWithMinMax(float x,float output_activation_min,float output_activation_max)152 inline float ActivationFunctionWithMinMax(float x, float output_activation_min,
153 float output_activation_max) {
154 return std::min(std::max(x, output_activation_min), output_activation_max);
155 }
156
157 /// parameter structures that operators accept
158
159 // from: tensorflow/lite/kernels/internal/types.h
160 struct ConvParams { // <= BuiltinOptions_Conv2DOptions = 1
161 PaddingType padding_type; // unused
162 PaddingValues padding_values;
163 // TODO(starka): This was just "stride", so check that width+height is OK.
164 int16 stride_width;
165 int16 stride_height;
166 int16 dilation_width_factor;
167 int16 dilation_height_factor;
168 // uint8 inference params.
169 // TODO(b/65838351): Use smaller types if appropriate.
170 int32 input_offset; // unused
171 int32 weights_offset; // unused
172 int32 output_offset; // unused
173 int32 output_multiplier; // unused
174 int output_shift; // unused
175 // uint8, etc, activation params.
176 int32 quantized_activation_min; // unused
177 int32 quantized_activation_max; // unused
178 // float activation params.
179 float float_activation_min;
180 float float_activation_max;
181 };
182
183 // from: tensorflow/lite/kernels/internal/types.h
184 struct DepthwiseParams { // <= BuiltinOptions_DepthwiseConv2DOptions = 2
185 PaddingType padding_type; // unused
186 PaddingValues padding_values;
187 int16 stride_width;
188 int16 stride_height;
189 int16 dilation_width_factor;
190 int16 dilation_height_factor;
191 int16 depth_multiplier;
192 // uint8 inference params.
193 // TODO(b/65838351): Use smaller types if appropriate.
194 int32 input_offset; // unused
195 int32 weights_offset; // unused
196 int32 output_offset; // unused
197 int32 output_multiplier; // unused
198 int output_shift; // unused
199 // uint8, etc, activation params.
200 int32 quantized_activation_min; // unused
201 int32 quantized_activation_max; // unused
202 float activation_params; // unused
203 float float_activation_min;
204 float float_activation_max;
205 };
206
207 struct FullyConnectedParams { // <= BuiltinOptions_FullyConnectedOptions = 8
208 // uint8 inference params.
209 // TODO(b/65838351): Use smaller types if appropriate.
210 int32 input_offset; // unused
211 int32 weights_offset; // unused
212 int32 output_offset; // unused
213 int32 output_multiplier; // unused
214 int output_shift; // unused
215 // uint8, etc, activation params.
216 int32 quantized_activation_min; // unused
217 int32 quantized_activation_max; // unused
218 // float activation params.
219 float float_activation_min;
220 float float_activation_max;
221 //FullyConnectedWeightsFormat weights_format; // // DEFAULT,SHUFFLED4x16INT8
222 };
223
224 // from: tensorflow/lite/kernels/internal/types.h
225 struct PoolParams { // <= BuiltinOptions_Pool2DOptions = 5
226 FusedActivationFunctionType activation; // unused
227 PaddingType padding_type; // unused
228 PaddingValues padding_values;
229 int stride_height;
230 int stride_width;
231 int filter_height;
232 int filter_width;
233 // uint8, etc, activation params.
234 int32 quantized_activation_min; // unused
235 int32 quantized_activation_max; // unused
236 // float activation params.
237 float float_activation_min;
238 float float_activation_max;
239 };
240
241 // from: tensorflow/lite/kernels/internal/types.htensorflow/lite/kernels/internal/types.h
242 // For Add, Sub, Mul ops.
243 struct ArithmeticParams { // <= BuiltinOptions_AddOptions = 11 / <= BuiltinOptions_MulOptions = 21 / <= BuiltinOptions_SubOptions = 28 / <= BuiltinOptions_DivOptions = 29
244 // Shape dependent / common to data / op types.
245 //BroadcastableOpCategory broadcast_category; // unused
246 // uint8 inference params.
247 int32 input1_offset; // unused
248 int32 input2_offset; // unused
249 int32 output_offset; // unused
250 int32 output_multiplier; // unused
251 int output_shift; // unused
252 // Add / Sub, not Mul, uint8 inference params.
253 int left_shift; // unused
254 int32 input1_multiplier; // unused
255 int input1_shift; // unused
256 int32 input2_multiplier; // unused
257 int input2_shift; // unused
258 // uint8, etc, activation params.
259 int32 quantized_activation_min; // unused (ony used used in int-versions)
260 int32 quantized_activation_max; // unused
261 // float activation params.
262 float float_activation_min;
263 float float_activation_max;
264
265 // Processed output dimensions.
266 // Let input "a" be the one that broadcasts in the faster-changing dimension.
267 // Then, after coalescing, for shapes {a0, a1, a2, a3, a4} and
268 // {b0, b1, b2, b3, b4},
269 // broadcast_shape[4] = b0 = a0.
270 // broadcast_shape[3] = b1; a1 = 1.
271 // broadcast_shape[2] = b2 = a2.
272 // broadcast_shape[1] = a3; b3 = 1.
273 // broadcast_shape[0] = b4 = a4.
274 int broadcast_shape[5];
275 };
276
277 // from: tensorflow/lite/kernels/internal/types.h
278 struct SoftmaxParams { // <= BuiltinOptions_SoftmaxOptions = 9
279 // beta is not really used (not a Tensorflow parameter) and not implemented
280 // for LogSoftmax.
281 double beta;
282 // uint8 inference params. Used even when beta defaults to 1.0.
283 int32 input_multiplier; // unused
284 int32 input_left_shift; // unused
285 // Reverse scaling is only used by LogSoftmax.
286 int32 reverse_scaling_divisor; // unused
287 int32 reverse_scaling_right_shift; // unused
288 int diff_min; // unused
289 };
290
291 // from kernels/internal/types.h
292 struct ResizeBilinearParams {
293 bool align_corners;
294 };
295
296 struct ResizeNearestNeighborParams {
297 bool align_corners;
298 };
299
300 struct LocalResponseNormalizationParams {
301 int32 range;
302 double bias;
303 double alpha;
304 double beta;
305 };
306
307 struct MeanParams {
308 int8 axis_count;
309 int16 axis[4];
310 };
311
312 // from tensorflow/lite/kernels/internal/types.h
313
314 enum class ResizingCategory : uint8 {
315 kNone,
316 kImageStyle, // 4D, operating on inner dimensions, say {0, a, b, 0}.
317 kGenericResize,
318 };
319
320 struct PadParams {
321 int8 left_padding_count;
322 int32 left_padding[4];
323 int8 right_padding_count;
324 int32 right_padding[4];
325 ResizingCategory resizing_category; // unused
326 };
327
328
329 /// operator code
330
331 // from: tensorflow/lite/kernels/internal/reference/conv.h
Conv(const ConvParams & params,const RuntimeShape & input_shape,const float * input_data,const RuntimeShape & filter_shape,const float * filter_data,const RuntimeShape & bias_shape,const float * bias_data,const RuntimeShape & output_shape,float * output_data,const RuntimeShape & im2col_shape,float * im2col_data)332 inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
333 const float* input_data, const RuntimeShape& filter_shape,
334 const float* filter_data, const RuntimeShape& bias_shape,
335 const float* bias_data, const RuntimeShape& output_shape,
336 float* output_data, const RuntimeShape& im2col_shape,
337 float* im2col_data) {
338 const int stride_width = params.stride_width;
339 const int stride_height = params.stride_height;
340 const int dilation_width_factor = params.dilation_width_factor;
341 const int dilation_height_factor = params.dilation_height_factor;
342 const int pad_width = params.padding_values.width;
343 const int pad_height = params.padding_values.height;
344 //const float output_activation_min = params.float_activation_min;
345 //const float output_activation_max = params.float_activation_max;
346 TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
347 TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
348 TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
349
350 (void)im2col_data; // only used in optimized code.
351 (void)im2col_shape; // only used in optimized code.
352 const int batches = MatchingDim(input_shape, 0, output_shape, 0);
353 const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
354 const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
355 if (bias_data) {
356 TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
357 }
358 const int input_height = input_shape.Dims(1);
359 const int input_width = input_shape.Dims(2);
360 const int filter_height = filter_shape.Dims(1);
361 const int filter_width = filter_shape.Dims(2);
362 const int output_height = output_shape.Dims(1);
363 const int output_width = output_shape.Dims(2);
364
365 for (int batch = 0; batch < batches; ++batch) {
366 for (int out_y = 0; out_y < output_height; ++out_y) {
367 for (int out_x = 0; out_x < output_width; ++out_x) {
368 for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
369 const int in_x_origin = (out_x * stride_width) - pad_width;
370 const int in_y_origin = (out_y * stride_height) - pad_height;
371 float total = 0.f;
372 for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
373 for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
374 for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
375 const int in_x = in_x_origin + dilation_width_factor * filter_x;
376 const int in_y =
377 in_y_origin + dilation_height_factor * filter_y;
378 // If the location is outside the bounds of the input image,
379 // use zero as a default value.
380 if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
381 (in_y < input_height)) {
382 float input_value = input_data[Offset(
383 input_shape, batch, in_y, in_x, in_channel)];
384 float filter_value =
385 filter_data[Offset(filter_shape, out_channel, filter_y,
386 filter_x, in_channel)];
387 total += (input_value * filter_value);
388 }
389 }
390 }
391 }
392 float bias_value = 0.0f;
393 if (bias_data) {
394 bias_value = bias_data[out_channel];
395 }
396 output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
397 // DISABLE ActivationFunctionWithMinMax
398 //ActivationFunctionWithMinMax(total + bias_value,
399 // output_activation_min,
400 // output_activation_max);
401
402 // INSTEAD
403 total + bias_value;
404 }
405 }
406 }
407 }
408 }
409
410 // from: tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h
DepthwiseConv(const DepthwiseParams & params,const RuntimeShape & input_shape,const float * input_data,const RuntimeShape & filter_shape,const float * filter_data,const RuntimeShape & bias_shape,const float * bias_data,const RuntimeShape & output_shape,float * output_data)411 inline void DepthwiseConv(
412 const DepthwiseParams& params, const RuntimeShape& input_shape,
413 const float* input_data, const RuntimeShape& filter_shape,
414 const float* filter_data, const RuntimeShape& bias_shape,
415 const float* bias_data, const RuntimeShape& output_shape,
416 float* output_data) {
417 const int stride_width = params.stride_width;
418 const int stride_height = params.stride_height;
419 const int dilation_width_factor = params.dilation_width_factor;
420 const int dilation_height_factor = params.dilation_height_factor;
421 const int pad_width = params.padding_values.width;
422 const int pad_height = params.padding_values.height;
423 const int depth_multiplier = params.depth_multiplier;
424 //const float output_activation_min = params.float_activation_min;
425 //const float output_activation_max = params.float_activation_max;
426 TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
427 TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
428 TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
429
430 const int batches = MatchingDim(input_shape, 0, output_shape, 0);
431 const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
432 const int input_height = input_shape.Dims(1);
433 const int input_width = input_shape.Dims(2);
434 const int input_depth = input_shape.Dims(3);
435 const int filter_height = filter_shape.Dims(1);
436 const int filter_width = filter_shape.Dims(2);
437 const int output_height = output_shape.Dims(1);
438 const int output_width = output_shape.Dims(2);
439 TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
440 TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
441
442 for (int b = 0; b < batches; ++b) {
443 for (int out_y = 0; out_y < output_height; ++out_y) {
444 for (int out_x = 0; out_x < output_width; ++out_x) {
445 for (int ic = 0; ic < input_depth; ++ic) {
446 for (int m = 0; m < depth_multiplier; m++) {
447 const int oc = m + ic * depth_multiplier;
448 const int in_x_origin = (out_x * stride_width) - pad_width;
449 const int in_y_origin = (out_y * stride_height) - pad_height;
450 float total = 0.f;
451 for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
452 for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
453 const int in_x = in_x_origin + dilation_width_factor * filter_x;
454 const int in_y =
455 in_y_origin + dilation_height_factor * filter_y;
456 // If the location is outside the bounds of the input image,
457 // use zero as a default value.
458 if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
459 (in_y < input_height)) {
460 float input_value =
461 input_data[Offset(input_shape, b, in_y, in_x, ic)];
462 float filter_value = filter_data[Offset(
463 filter_shape, 0, filter_y, filter_x, oc)];
464 total += (input_value * filter_value);
465 }
466 }
467 }
468 float bias_value = 0.0f;
469 if (bias_data) {
470 bias_value = bias_data[oc];
471 }
472 output_data[Offset(output_shape, b, out_y, out_x, oc)] =
473 // DISABLE ActivationFunctionWithMinMax
474 // ActivationFunctionWithMinMax(total + bias_value,
475 // output_activation_min,
476 // output_activation_max);
477
478 // INSTEAD
479 total + bias_value;
480 }
481 }
482 }
483 }
484 }
485 }
486
487 // from: tensorflow/lite/kernels/internal/reference/fully_connected.h
FullyConnected(const FullyConnectedParams & params,const RuntimeShape & input_shape,const float * input_data,const RuntimeShape & weights_shape,const float * weights_data,const RuntimeShape & bias_shape,const float * bias_data,const RuntimeShape & output_shape,float * output_data)488 inline void FullyConnected(
489 const FullyConnectedParams& params, const RuntimeShape& input_shape,
490 const float* input_data, const RuntimeShape& weights_shape,
491 const float* weights_data, const RuntimeShape& bias_shape,
492 const float* bias_data, const RuntimeShape& output_shape,
493 float* output_data) {
494 // const float output_activation_min = params.float_activation_min;
495 // const float output_activation_max = params.float_activation_max;
496 // TODO(benoitjacob): This really should be:
497 // const int batches = ArraySize(output_dims, 1);
498 // but the current --variable_batch hack consists in overwriting the 3rd
499 // dimension with the runtime batch size, as we don't keep track for each
500 // array of which dimension is the batch dimension in it.
501 const int output_dims_count = output_shape.DimensionsCount();
502 const int weights_dims_count = weights_shape.DimensionsCount();
503 const int batches = FlatSizeSkipDim(output_shape, output_dims_count - 1);
504 const int output_depth = MatchingDim(weights_shape, weights_dims_count - 2,
505 output_shape, output_dims_count - 1);
506 const int accum_depth = weights_shape.Dims(weights_dims_count - 1);
507 for (int b = 0; b < batches; ++b) {
508 for (int out_c = 0; out_c < output_depth; ++out_c) {
509 float total = 0.f;
510 for (int d = 0; d < accum_depth; ++d) {
511 total += input_data[b * accum_depth + d] *
512 weights_data[out_c * accum_depth + d];
513 }
514 float bias_value = 0.0f;
515 if (bias_data) {
516 bias_value = bias_data[out_c];
517 }
518
519 // DISABLE ActivationFunctionWithMinMax
520 //output_data[out_c + output_depth * b] = ActivationFunctionWithMinMax(
521 // total + bias_value, output_activation_min, output_activation_max);
522
523 // INSTEAD
524 output_data[out_c + output_depth * b] = total + bias_value;
525 }
526 }
527 }
528
529 // from: tensorflow/lite/kernels/internal/reference/pooling.h
MaxPool(const PoolParams & params,const RuntimeShape & input_shape,const float * input_data,const RuntimeShape & output_shape,float * output_data)530 inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
531 const float* input_data, const RuntimeShape& output_shape,
532 float* output_data) {
533 TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
534 TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
535 const int batches = MatchingDim(input_shape, 0, output_shape, 0);
536 const int depth = MatchingDim(input_shape, 3, output_shape, 3);
537 const int input_height = input_shape.Dims(1);
538 const int input_width = input_shape.Dims(2);
539 const int output_height = output_shape.Dims(1);
540 const int output_width = output_shape.Dims(2);
541 const int stride_height = params.stride_height;
542 const int stride_width = params.stride_width;
543 for (int batch = 0; batch < batches; ++batch) {
544 for (int out_y = 0; out_y < output_height; ++out_y) {
545 for (int out_x = 0; out_x < output_width; ++out_x) {
546 for (int channel = 0; channel < depth; ++channel) {
547 const int in_x_origin =
548 (out_x * stride_width) - params.padding_values.width;
549 const int in_y_origin =
550 (out_y * stride_height) - params.padding_values.height;
551 // Compute the boundaries of the filter region clamped so as to
552 // ensure that the filter window fits in the input array.
553 const int filter_x_start = std::max(0, -in_x_origin);
554 const int filter_x_end =
555 std::min(params.filter_width, input_width - in_x_origin);
556 const int filter_y_start = std::max(0, -in_y_origin);
557 const int filter_y_end =
558 std::min(params.filter_height, input_height - in_y_origin);
559 float max = std::numeric_limits<float>::lowest();
560 for (int filter_y = filter_y_start; filter_y < filter_y_end;
561 ++filter_y) {
562 for (int filter_x = filter_x_start; filter_x < filter_x_end;
563 ++filter_x) {
564 const int in_x = in_x_origin + filter_x;
565 const int in_y = in_y_origin + filter_y;
566 max = std::max(
567 max,
568 input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
569 }
570 }
571 output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
572 // DISABLE ActivationFunctionWithMinMax
573 //ActivationFunctionWithMinMax(max, params.float_activation_min,
574 // params.float_activation_max);
575
576 // INSTEAD
577 max;
578 }
579 }
580 }
581 }
582 }
583
584 // from: tensorflow/lite/kernels/internal/reference/pooling.h
AveragePool(const PoolParams & params,const RuntimeShape & input_shape,const float * input_data,const RuntimeShape & output_shape,float * output_data)585 inline void AveragePool(const PoolParams& params,
586 const RuntimeShape& input_shape,
587 const float* input_data,
588 const RuntimeShape& output_shape, float* output_data) {
589 TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
590 TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
591 const int batches = MatchingDim(input_shape, 0, output_shape, 0);
592 const int depth = MatchingDim(input_shape, 3, output_shape, 3);
593 const int input_height = input_shape.Dims(1);
594 const int input_width = input_shape.Dims(2);
595 const int output_height = output_shape.Dims(1);
596 const int output_width = output_shape.Dims(2);
597 const int stride_height = params.stride_height;
598 const int stride_width = params.stride_width;
599 for (int batch = 0; batch < batches; ++batch) {
600 for (int out_y = 0; out_y < output_height; ++out_y) {
601 for (int out_x = 0; out_x < output_width; ++out_x) {
602 for (int channel = 0; channel < depth; ++channel) {
603 const int in_x_origin =
604 (out_x * stride_width) - params.padding_values.width;
605 const int in_y_origin =
606 (out_y * stride_height) - params.padding_values.height;
607 // Compute the boundaries of the filter region clamped so as to
608 // ensure that the filter window fits in the input array.
609 const int filter_x_start = std::max(0, -in_x_origin);
610 const int filter_x_end =
611 std::min(params.filter_width, input_width - in_x_origin);
612 const int filter_y_start = std::max(0, -in_y_origin);
613 const int filter_y_end =
614 std::min(params.filter_height, input_height - in_y_origin);
615 float total = 0.f;
616 float filter_count = 0;
617 for (int filter_y = filter_y_start; filter_y < filter_y_end;
618 ++filter_y) {
619 for (int filter_x = filter_x_start; filter_x < filter_x_end;
620 ++filter_x) {
621 const int in_x = in_x_origin + filter_x;
622 const int in_y = in_y_origin + filter_y;
623 total +=
624 input_data[Offset(input_shape, batch, in_y, in_x, channel)];
625 filter_count++;
626 }
627 }
628 const float average = total / filter_count;
629 output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
630 // DISABLE ActivationFunctionWithMinMax
631 //ActivationFunctionWithMinMax(average, params.float_activation_min,
632 // params.float_activation_max);
633
634 // INSTEAD
635 average;
636 }
637 }
638 }
639 }
640 }
641
642 // from: tensorflow/lite/kernels/internal/reference/softmax.h
Softmax(const SoftmaxParams & params,const RuntimeShape & input_shape,const float * input_data,const RuntimeShape & output_shape,float * output_data)643 inline void Softmax(const SoftmaxParams& params,
644 const RuntimeShape& input_shape, const float* input_data,
645 const RuntimeShape& output_shape, float* output_data) {
646 const int trailing_dim = input_shape.DimensionsCount() - 1;
647 const int outer_size =
648 MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
649 const int depth =
650 MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
651
652 for (int i = 0; i < outer_size; ++i) {
653 // Find max element value which we'll use to ensure numerical stability
654 // taking advantage of the following equality:
655 // exp(x[i])/sum(exp(x[i])) == exp(x[i]+C)/sum(exp(x[i]+C))
656 float max = std::numeric_limits<float>::lowest();
657 for (int c = 0; c < depth; ++c) {
658 max = std::max(max, input_data[i * depth + c]);
659 }
660
661 // Compute sum.
662 float sum = 0.f;
663 for (int c = 0; c < depth; ++c) {
664 sum += std::exp((input_data[i * depth + c] - max) * params.beta);
665 }
666
667 // Compute result.
668 for (int c = 0; c < depth; ++c) {
669 output_data[i * depth + c] =
670 std::exp((input_data[i * depth + c] - max) * params.beta) / sum;
671 }
672 }
673 }
674
675 // from kernels/internal/reference/reference_ops.h
676 template <typename T>
ResizeBilinear(const tflite::ResizeBilinearParams & op_params,const RuntimeShape & unextended_input_shape,const T * input_data,const RuntimeShape & unextended_output_size_shape,const int32 * output_size_data,const RuntimeShape & unextended_output_shape,T * output_data)677 inline void ResizeBilinear(const tflite::ResizeBilinearParams& op_params,
678 const RuntimeShape& unextended_input_shape,
679 const T* input_data,
680 const RuntimeShape& unextended_output_size_shape,
681 const int32* output_size_data,
682 const RuntimeShape& unextended_output_shape,
683 T* output_data) {
684 TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
685 TFLITE_DCHECK_LE(unextended_output_size_shape.DimensionsCount(), 4);
686 TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
687 const RuntimeShape input_shape =
688 RuntimeShape::ExtendedShape(4, unextended_input_shape);
689 const RuntimeShape output_size_shape =
690 RuntimeShape::ExtendedShape(4, unextended_output_size_shape);
691 const RuntimeShape output_shape =
692 RuntimeShape::ExtendedShape(4, unextended_output_shape);
693
694 int32 batches = MatchingDim(input_shape, 0, output_shape, 0);
695 int32 input_height = input_shape.Dims(1);
696 int32 input_width = input_shape.Dims(2);
697 int32 depth = MatchingDim(input_shape, 3, output_shape, 3);
698
699 TFLITE_DCHECK_EQ(output_size_shape.Dims(0), 1);
700 TFLITE_DCHECK_EQ(output_size_shape.Dims(1), 1);
701 TFLITE_DCHECK_EQ(output_size_shape.Dims(2), 1);
702 TFLITE_DCHECK_EQ(output_size_shape.Dims(3), 2);
703 int32 output_height = output_size_data[Offset(output_size_shape, 0, 0, 0, 0)];
704 int32 output_width = output_size_data[Offset(output_size_shape, 0, 0, 0, 1)];
705
706 float height_scale = static_cast<float>(input_height) / output_height;
707 float width_scale = static_cast<float>(input_width) / output_width;
708 if (op_params.align_corners && output_height > 1) {
709 height_scale = static_cast<float>(input_height - 1) / (output_height - 1);
710 }
711 if (op_params.align_corners && output_width > 1) {
712 width_scale = static_cast<float>(input_width - 1) / (output_width - 1);
713 }
714
715 for (int b = 0; b < batches; ++b) {
716 for (int y = 0; y < output_height; ++y) {
717 float input_y = y * height_scale;
718 int32 y0 = static_cast<int32>(std::floor(input_y));
719 int32 y1 = std::min(y0 + 1, input_height - 1);
720 for (int x = 0; x < output_width; ++x) {
721 float input_x = x * width_scale;
722 int32 x0 = static_cast<int32>(std::floor(input_x));
723 int32 x1 = std::min(x0 + 1, input_width - 1);
724 for (int c = 0; c < depth; ++c) {
725 T interpolation =
726 static_cast<T>(input_data[Offset(input_shape, b, y0, x0, c)] *
727 (1 - (input_y - y0)) * (1 - (input_x - x0)) +
728 input_data[Offset(input_shape, b, y1, x0, c)] *
729 (input_y - y0) * (1 - (input_x - x0)) +
730 input_data[Offset(input_shape, b, y0, x1, c)] *
731 (1 - (input_y - y0)) * (input_x - x0) +
732 input_data[Offset(input_shape, b, y1, x1, c)] *
733 (input_y - y0) * (input_x - x0));
734 output_data[Offset(output_shape, b, y, x, c)] = interpolation;
735 }
736 }
737 }
738 }
739 }
740
741 // from kernels/internal/reference/reference_ops.h
742 template <typename T>
ResizeNearestNeighbor(const tflite::ResizeNearestNeighborParams & op_params,const RuntimeShape & unextended_input_shape,const T * input_data,const RuntimeShape & output_size_shape,const int32 * output_size_data,const RuntimeShape & unextended_output_shape,T * output_data)743 inline void ResizeNearestNeighbor(
744 const tflite::ResizeNearestNeighborParams& op_params,
745 const RuntimeShape& unextended_input_shape, const T* input_data,
746 const RuntimeShape& output_size_shape, const int32* output_size_data,
747 const RuntimeShape& unextended_output_shape, T* output_data) {
748 // Align corners = true is not supported.
749 TFLITE_DCHECK(!op_params.align_corners);
750 TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
751 TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
752
753 const RuntimeShape input_shape =
754 RuntimeShape::ExtendedShape(4, unextended_input_shape);
755 const RuntimeShape output_shape =
756 RuntimeShape::ExtendedShape(4, unextended_output_shape);
757
758 int32 batches = MatchingDim(input_shape, 0, output_shape, 0);
759 int32 input_height = input_shape.Dims(1);
760 int32 input_width = input_shape.Dims(2);
761 int32 depth = MatchingDim(input_shape, 3, output_shape, 3);
762
763 // The Tensorflow version of this op allows resize on the width and height
764 // axis only.
765 TFLITE_DCHECK_EQ(output_size_shape.FlatSize(), 2);
766 int32 output_height = output_size_data[0];
767 int32 output_width = output_size_data[1];
768
769 // We use float to ensure agreement with the Tensorflow implementation.
770 const float height_scale = static_cast<float>(input_height) / output_height;
771 const float width_scale = static_cast<float>(input_width) / output_width;
772
773 const int col_offset = input_shape.Dims(3);
774 const int row_offset = input_shape.Dims(2) * col_offset;
775 const int batch_offset = input_shape.Dims(1) * row_offset;
776
777 const T* input_ptr = input_data;
778 T* output_ptr = output_data;
779 for (int b = 0; b < batches; ++b) {
780 for (int y = 0; y < output_height; ++y) {
781 int32 in_y = std::min(static_cast<int32>(std::floor(y * height_scale)),
782 input_height - 1);
783 const T* y_input_ptr = input_ptr + in_y * row_offset;
784 for (int x = 0; x < output_width; ++x) {
785 int32 in_x = std::min(static_cast<int32>(std::floor(x * width_scale)),
786 input_width - 1);
787 const T* x_input_ptr = y_input_ptr + in_x * col_offset;
788 memcpy(output_ptr, x_input_ptr, depth * sizeof(T));
789 output_ptr += depth;
790 }
791 }
792 input_ptr += batch_offset;
793 }
794 }
795
796 // from: tensorflow/lite/kernels/internal/reference/reference_ops.h
LocalResponseNormalization(const tflite::LocalResponseNormalizationParams & op_params,const RuntimeShape & input_shape,const float * input_data,const RuntimeShape & output_shape,float * output_data)797 inline void LocalResponseNormalization(
798 const tflite::LocalResponseNormalizationParams& op_params,
799 const RuntimeShape& input_shape, const float* input_data,
800 const RuntimeShape& output_shape, float* output_data) {
801 const int trailing_dim = input_shape.DimensionsCount() - 1;
802 const int outer_size =
803 MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
804 const int depth =
805 MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
806
807 for (int i = 0; i < outer_size; ++i) {
808 for (int c = 0; c < depth; ++c) {
809 const int begin_input_c = std::max(0, c - op_params.range);
810 const int end_input_c = std::min(depth, c + op_params.range);
811 float accum = 0.f;
812 for (int input_c = begin_input_c; input_c < end_input_c; ++input_c) {
813 const float input_val = input_data[i * depth + input_c];
814 accum += input_val * input_val;
815 }
816 const float multiplier =
817 std::pow(op_params.bias + op_params.alpha * accum, -op_params.beta);
818 output_data[i * depth + c] = input_data[i * depth + c] * multiplier;
819 }
820 }
821 }
822
823 // from: tensorflow/lite/kernels/internal/reference/reference_ops.h
824 template <typename T>
Mean(const tflite::MeanParams & op_params,const RuntimeShape & unextended_input_shape,const T * input_data,const RuntimeShape & unextended_output_shape,T * output_data)825 inline void Mean(const tflite::MeanParams& op_params,
826 const RuntimeShape& unextended_input_shape,
827 const T* input_data,
828 const RuntimeShape& unextended_output_shape, T* output_data) {
829 //gemmlowp::ScopedProfilingLabel label("Mean4D");
830
831 // Current implementation only supports dimension equals 4 and simultaneous
832 // reduction over width and height.
833 TFLITE_CHECK_EQ(unextended_input_shape.DimensionsCount(), 4);
834 TFLITE_CHECK_LE(unextended_output_shape.DimensionsCount(), 4);
835 const RuntimeShape input_shape =
836 RuntimeShape::ExtendedShape(4, unextended_input_shape);
837 const RuntimeShape output_shape =
838 RuntimeShape::ExtendedShape(4, unextended_output_shape);
839
840 const int output_batch = output_shape.Dims(0);
841 const int output_height = output_shape.Dims(1);
842 const int output_width = output_shape.Dims(2);
843 const int output_depth = output_shape.Dims(3);
844
845 const int input_height = input_shape.Dims(1);
846 const int input_width = input_shape.Dims(2);
847
848 TFLITE_DCHECK_EQ(op_params.axis_count, 2);
849 TFLITE_DCHECK((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
850 (op_params.axis[0] == 2 && op_params.axis[1] == 1));
851 TFLITE_DCHECK_EQ(output_height, 1);
852 TFLITE_DCHECK_EQ(output_width, 1);
853
854 for (int out_b = 0; out_b < output_batch; ++out_b) {
855 for (int out_d = 0; out_d < output_depth; ++out_d) {
856 float value = 0;
857 for (int in_h = 0; in_h < input_height; ++in_h) {
858 for (int in_w = 0; in_w < input_width; ++in_w) {
859 value += input_data[Offset(input_shape, out_b, in_h, in_w, out_d)];
860 }
861 }
862 output_data[Offset(output_shape, out_b, 0, 0, out_d)] =
863 value / (input_width * input_height);
864 }
865 }
866 }
867
868 // from tensorflow/lite/kernels/internal/types.h
869
870 // from tensorflow/lite/kernels/internal/reference/pad.h (TF V2)
871
872 // TFLite Pad supports activation tensors with up to 4 dimensions.
PadKernelMaxDimensionCount()873 constexpr int PadKernelMaxDimensionCount() { return 4; }
874
875 template <typename T, typename P>
PadImpl(const tflite::PadParams & op_params,const RuntimeShape & input_shape,const T * input_data,const P * pad_value_ptr,const RuntimeShape & output_shape,T * output_data)876 inline void PadImpl(const tflite::PadParams& op_params,
877 const RuntimeShape& input_shape, const T* input_data,
878 const P* pad_value_ptr, const RuntimeShape& output_shape,
879 T* output_data) {
880 const RuntimeShape ext_input_shape =
881 RuntimeShape::ExtendedShape(PadKernelMaxDimensionCount(), input_shape);
882 const RuntimeShape ext_output_shape =
883 RuntimeShape::ExtendedShape(PadKernelMaxDimensionCount(), output_shape);
884 TFLITE_DCHECK_LE(op_params.left_padding_count, PadKernelMaxDimensionCount());
885 TFLITE_DCHECK_LE(op_params.right_padding_count, PadKernelMaxDimensionCount());
886
887 // Runtime calls are currently fixed at 4 dimensions. Copy inputs so we can
888 // pad them to 4 dims (yes, we are "padding the padding").
889 int left_padding_copy[PadKernelMaxDimensionCount()];
890 for (int i = 0; i < PadKernelMaxDimensionCount(); i++) {
891 left_padding_copy[i] = 0;
892 }
893 for (int i = 0; i < op_params.left_padding_count; ++i) {
894 left_padding_copy[i + PadKernelMaxDimensionCount() -
895 op_params.left_padding_count] = op_params.left_padding[i];
896 }
897 int right_padding_copy[PadKernelMaxDimensionCount()];
898 for (int i = 0; i < PadKernelMaxDimensionCount(); i++) {
899 right_padding_copy[i] = 0;
900 }
901 for (int i = 0; i < op_params.right_padding_count; ++i) {
902 right_padding_copy[i + PadKernelMaxDimensionCount() -
903 op_params.right_padding_count] =
904 op_params.right_padding[i];
905 }
906
907 const int output_batch = ext_output_shape.Dims(0);
908 const int output_height = ext_output_shape.Dims(1);
909 const int output_width = ext_output_shape.Dims(2);
910 const int output_depth = ext_output_shape.Dims(3);
911
912 const int left_b_padding = left_padding_copy[0];
913 const int left_h_padding = left_padding_copy[1];
914 const int left_w_padding = left_padding_copy[2];
915 const int left_d_padding = left_padding_copy[3];
916
917 const int right_b_padding = right_padding_copy[0];
918 const int right_h_padding = right_padding_copy[1];
919 const int right_w_padding = right_padding_copy[2];
920 const int right_d_padding = right_padding_copy[3];
921
922 const T pad_value = *pad_value_ptr;
923
924 const T* in_ptr = input_data;
925 T* out_ptr = output_data;
926 for (int out_b = 0; out_b < output_batch; ++out_b) {
927 for (int out_h = 0; out_h < output_height; ++out_h) {
928 for (int out_w = 0; out_w < output_width; ++out_w) {
929 for (int out_d = 0; out_d < output_depth; ++out_d) {
930 if (out_b < left_b_padding ||
931 out_b >= output_batch - right_b_padding ||
932 out_h < left_h_padding ||
933 out_h >= output_height - right_h_padding ||
934 out_w < left_w_padding ||
935 out_w >= output_width - right_w_padding ||
936 out_d < left_d_padding ||
937 out_d >= output_depth - right_d_padding) {
938 *out_ptr++ = pad_value;
939 } else {
940 *out_ptr++ = *in_ptr++;
941 }
942 }
943 }
944 }
945 }
946 }
947
948 template <typename T, typename P>
Pad(const tflite::PadParams & op_params,const RuntimeShape & input_shape,const T * input_data,const P * pad_value_ptr,const RuntimeShape & output_shape,T * output_data)949 inline void Pad(const tflite::PadParams& op_params,
950 const RuntimeShape& input_shape, const T* input_data,
951 const P* pad_value_ptr, const RuntimeShape& output_shape,
952 T* output_data) {
953 PadImpl(op_params, input_shape, input_data, pad_value_ptr, output_shape,
954 output_data);
955 }
956
957 template <typename T>
Pad(const tflite::PadParams & op_params,const RuntimeShape & input_shape,const T * input_data,const int32 * pad_value_ptr,const RuntimeShape & output_shape,T * output_data)958 inline void Pad(const tflite::PadParams& op_params,
959 const RuntimeShape& input_shape, const T* input_data,
960 const int32* pad_value_ptr, const RuntimeShape& output_shape,
961 T* output_data) {
962 const T converted_pad_value = static_cast<T>(*pad_value_ptr);
963 PadImpl(op_params, input_shape, input_data, &converted_pad_value,
964 output_shape, output_data);
965 }
966
967 template <>
Pad(const tflite::PadParams & op_params,const RuntimeShape & input_shape,const int32 * input_data,const int32 * pad_value_ptr,const RuntimeShape & output_shape,int32 * output_data)968 inline void Pad(const tflite::PadParams& op_params,
969 const RuntimeShape& input_shape, const int32* input_data,
970 const int32* pad_value_ptr, const RuntimeShape& output_shape,
971 int32* output_data) {
972 PadImpl(op_params, input_shape, input_data, pad_value_ptr, output_shape,
973 output_data);
974 }
975
976 template <typename T, typename P>
PadImageStyle(const tflite::PadParams & op_params,const RuntimeShape & input_shape,const T * input_data,const P * pad_value_ptr,const RuntimeShape & output_shape,T * output_data)977 inline void PadImageStyle(const tflite::PadParams& op_params,
978 const RuntimeShape& input_shape, const T* input_data,
979 const P* pad_value_ptr,
980 const RuntimeShape& output_shape, T* output_data) {
981 //TFLITE_ASSERT_FALSE;
982 }
983
984 template <typename P>
PadImageStyle(const tflite::PadParams & op_params,const RuntimeShape & input_shape,const uint8 * input_data,const P * pad_value_ptr,const RuntimeShape & output_shape,uint8 * output_data)985 inline void PadImageStyle(const tflite::PadParams& op_params,
986 const RuntimeShape& input_shape,
987 const uint8* input_data, const P* pad_value_ptr,
988 const RuntimeShape& output_shape,
989 uint8* output_data) {
990 Pad(op_params, input_shape, input_data, pad_value_ptr, output_shape,
991 output_data);
992 }
993
994 template <typename P>
PadImageStyle(const tflite::PadParams & op_params,const RuntimeShape & input_shape,const int8_t * input_data,const P * pad_value_ptr,const RuntimeShape & output_shape,int8_t * output_data)995 inline void PadImageStyle(const tflite::PadParams& op_params,
996 const RuntimeShape& input_shape,
997 const int8_t* input_data, const P* pad_value_ptr,
998 const RuntimeShape& output_shape,
999 int8_t* output_data) {
1000 Pad(op_params, input_shape, input_data, pad_value_ptr, output_shape,
1001 output_data);
1002 }
1003
1004 template <typename P>
PadImageStyle(const tflite::PadParams & op_params,const RuntimeShape & input_shape,const float * input_data,const P * pad_value_ptr,const RuntimeShape & output_shape,float * output_data)1005 inline void PadImageStyle(const tflite::PadParams& op_params,
1006 const RuntimeShape& input_shape,
1007 const float* input_data, const P* pad_value_ptr,
1008 const RuntimeShape& output_shape,
1009 float* output_data) {
1010 Pad(op_params, input_shape, input_data, pad_value_ptr, output_shape,
1011 output_data);
1012 }
1013
1014 }
1015
1016 //
1017 // exporting this functionality by wrapping it in our API
1018 //
1019
1020 #include "../../tensor.h"
1021
1022 namespace NnOperators {
1023
Conv2D(const TensorShape & inputShape,const float * inputData,const TensorShape & filterShape,const float * filterData,const TensorShape & biasShape,const float * biasData,const TensorShape & outputShape,float * outputData,unsigned paddingWidth,unsigned paddingHeight,unsigned strideWidth,unsigned strideHeight,unsigned dilationWidthFactor,unsigned dilationHeightFactor)1024 void Conv2D(
1025 const TensorShape &inputShape, const float *inputData,
1026 const TensorShape &filterShape, const float *filterData,
1027 const TensorShape &biasShape, const float *biasData,
1028 const TensorShape &outputShape, float *outputData,
1029 unsigned paddingWidth, unsigned paddingHeight,
1030 unsigned strideWidth, unsigned strideHeight,
1031 unsigned dilationWidthFactor, unsigned dilationHeightFactor
1032 ) {
1033 tflite::ConvParams params;
1034 params.padding_values.width = paddingWidth;
1035 params.padding_values.height = paddingHeight;
1036 params.stride_width = strideWidth;
1037 params.stride_height = strideHeight;
1038 params.dilation_width_factor = dilationWidthFactor;
1039 params.dilation_height_factor = dilationHeightFactor;
1040
1041 tflite::Conv(params,
1042 tflite::RuntimeShape(inputShape), inputData,
1043 tflite::RuntimeShape(filterShape), filterData,
1044 tflite::RuntimeShape(biasShape), biasData,
1045 tflite::RuntimeShape(outputShape), outputData,
1046 tflite::RuntimeShape(0),
1047 nullptr
1048 );
1049 }
1050
DepthwiseConv2D(const TensorShape & inputShape,const float * inputData,const TensorShape & filterShape,const float * filterData,const TensorShape & biasShape,const float * biasData,const TensorShape & outputShape,float * outputData,unsigned paddingWidth,unsigned paddingHeight,unsigned strideWidth,unsigned strideHeight,unsigned dilationWidthFactor,unsigned dilationHeightFactor,unsigned depthMultiplier)1051 void DepthwiseConv2D(
1052 const TensorShape &inputShape, const float *inputData,
1053 const TensorShape &filterShape, const float *filterData,
1054 const TensorShape &biasShape, const float *biasData,
1055 const TensorShape &outputShape, float *outputData,
1056 unsigned paddingWidth, unsigned paddingHeight,
1057 unsigned strideWidth, unsigned strideHeight,
1058 unsigned dilationWidthFactor, unsigned dilationHeightFactor,
1059 unsigned depthMultiplier
1060 ) {
1061 tflite::DepthwiseParams params;
1062 params.padding_values.width = paddingWidth;
1063 params.padding_values.height = paddingHeight;
1064 params.stride_width = strideWidth;
1065 params.stride_height = strideHeight;
1066 params.dilation_width_factor = dilationWidthFactor;
1067 params.dilation_height_factor = dilationHeightFactor;
1068 params.depth_multiplier = depthMultiplier;
1069
1070 tflite::DepthwiseConv(params,
1071 tflite::RuntimeShape(inputShape), inputData,
1072 tflite::RuntimeShape(filterShape), filterData,
1073 tflite::RuntimeShape(biasShape), biasData,
1074 tflite::RuntimeShape(outputShape), outputData
1075 );
1076 }
1077
FullyConnected(const TensorShape & inputShape,const float * inputData,const TensorShape & filterShape,const float * filterData,const TensorShape & biasShape,const float * biasData,const TensorShape & outputShape,float * outputData)1078 void FullyConnected(
1079 const TensorShape &inputShape, const float *inputData,
1080 const TensorShape &filterShape, const float *filterData,
1081 const TensorShape &biasShape, const float *biasData,
1082 const TensorShape &outputShape, float *outputData
1083 ) {
1084 tflite::FullyConnectedParams params;
1085
1086 tflite::FullyConnected(params,
1087 tflite::RuntimeShape(inputShape), inputData,
1088 tflite::RuntimeShape(filterShape), filterData,
1089 tflite::RuntimeShape(biasShape), biasData,
1090 tflite::RuntimeShape(outputShape), outputData
1091 );
1092 }
1093
MaxPool(const TensorShape & inputShape,const float * inputData,const TensorShape & outputShape,float * outputData,unsigned paddingWidth,unsigned paddingHeight,unsigned strideWidth,unsigned strideHeight,unsigned filterWidth,unsigned filterHeight)1094 void MaxPool(
1095 const TensorShape &inputShape, const float *inputData,
1096 const TensorShape &outputShape, float *outputData,
1097 unsigned paddingWidth, unsigned paddingHeight,
1098 unsigned strideWidth, unsigned strideHeight,
1099 unsigned filterWidth, unsigned filterHeight
1100 ) {
1101 tflite::PoolParams params;
1102 params.padding_values.width = paddingWidth;
1103 params.padding_values.height = paddingHeight;
1104 params.stride_width = strideWidth;
1105 params.stride_height = strideHeight;
1106 params.filter_width = filterWidth;
1107 params.filter_height = filterHeight;
1108
1109 tflite::MaxPool(params,
1110 tflite::RuntimeShape(inputShape), inputData,
1111 tflite::RuntimeShape(outputShape), outputData
1112 );
1113 }
1114
AveragePool(const TensorShape & inputShape,const float * inputData,const TensorShape & outputShape,float * outputData,unsigned paddingWidth,unsigned paddingHeight,unsigned strideWidth,unsigned strideHeight,unsigned filterWidth,unsigned filterHeight)1115 void AveragePool(
1116 const TensorShape &inputShape, const float *inputData,
1117 const TensorShape &outputShape, float *outputData,
1118 unsigned paddingWidth, unsigned paddingHeight,
1119 unsigned strideWidth, unsigned strideHeight,
1120 unsigned filterWidth, unsigned filterHeight
1121 ) {
1122 tflite::PoolParams params;
1123 params.padding_values.width = paddingWidth;
1124 params.padding_values.height = paddingHeight;
1125 params.stride_width = strideWidth;
1126 params.stride_height = strideHeight;
1127 params.filter_width = filterWidth;
1128 params.filter_height = filterHeight;
1129
1130 tflite::AveragePool(params,
1131 tflite::RuntimeShape(inputShape), inputData,
1132 tflite::RuntimeShape(outputShape), outputData
1133 );
1134 }
1135
Softmax(const TensorShape & inputShape,const float * inputData,const TensorShape & outputShape,float * outputData,float beta)1136 void Softmax(
1137 const TensorShape &inputShape, const float *inputData,
1138 const TensorShape &outputShape, float *outputData,
1139 float beta
1140 ) {
1141 tflite::SoftmaxParams params;
1142 params.beta = beta;
1143
1144 tflite::Softmax(params,
1145 tflite::RuntimeShape(inputShape), inputData,
1146 tflite::RuntimeShape(outputShape), outputData
1147 );
1148 }
1149
ResizeBilinear(const TensorShape & inputShape,const float * inputData,const TensorShape & outputShape,float * outputData,bool alignCorners)1150 void ResizeBilinear(
1151 const TensorShape &inputShape, const float *inputData,
1152 const TensorShape &outputShape, float *outputData,
1153 bool alignCorners
1154 ) {
1155 tflite::ResizeBilinearParams params;
1156 params.align_corners = alignCorners;
1157
1158 // not sure why the operation was defiened to need these
1159 tflite::RuntimeShape outputSizeDims = {1, 1, 1, 2};
1160 tflite::int32 outputSizeData[2] = {(tflite::int32)outputShape[1/*height*/], (tflite::int32)outputShape[2/*width*/]};
1161
1162 tflite::ResizeBilinear(params,
1163 tflite::RuntimeShape(inputShape), inputData,
1164 outputSizeDims, outputSizeData,
1165 tflite::RuntimeShape(outputShape), outputData
1166 );
1167 }
1168
ResizeNearestNeighbor(const TensorShape & inputShape,const float * inputData,const TensorShape & outputShape,float * outputData,bool alignCorners)1169 void ResizeNearestNeighbor(
1170 const TensorShape &inputShape, const float *inputData,
1171 const TensorShape &outputShape, float *outputData,
1172 bool alignCorners
1173 ) {
1174 tflite::ResizeNearestNeighborParams params;
1175 params.align_corners = alignCorners;
1176
1177 // not sure why the operation was defiened to need these
1178 tflite::RuntimeShape outputSizeDims = {1, 1, 1, 2};
1179 tflite::int32 outputSizeData[2] = {(tflite::int32)outputShape[1/*height*/], (tflite::int32)outputShape[2/*width*/]};
1180
1181 tflite::ResizeNearestNeighbor(params,
1182 tflite::RuntimeShape(inputShape), inputData,
1183 outputSizeDims, outputSizeData,
1184 tflite::RuntimeShape(outputShape), outputData
1185 );
1186 }
1187
LocalResponseNormalization(const TensorShape & inputShape,const float * inputData,const TensorShape & outputShape,float * outputData,int radius,float alpha,float beta,float bias)1188 void LocalResponseNormalization(
1189 const TensorShape &inputShape, const float *inputData,
1190 const TensorShape &outputShape, float *outputData,
1191 int radius, float alpha, float beta, float bias
1192 ) {
1193 tflite::LocalResponseNormalizationParams params;
1194 params.range = radius; // XXX in TF Lite sources the operator option is called "radius" but the parameter in the structure is called "range"
1195 params.alpha = alpha;
1196 params.beta = beta;
1197 params.bias = bias;
1198
1199 tflite::LocalResponseNormalization(params,
1200 tflite::RuntimeShape(inputShape), inputData,
1201 tflite::RuntimeShape(outputShape), outputData
1202 );
1203 }
1204
Mean(const TensorShape & inputShape,const float * inputData,const TensorShape & outputShape,float * outputData,const int32_t * axis,unsigned axis_count)1205 void Mean(
1206 const TensorShape &inputShape, const float *inputData,
1207 const TensorShape &outputShape, float *outputData,
1208 const int32_t *axis, unsigned axis_count
1209 ) {
1210 tflite::MeanParams params;
1211 params.axis_count = axis_count;
1212 std::copy(axis, axis+axis_count, params.axis);
1213
1214 tflite::Mean<float>(params,
1215 tflite::RuntimeShape(inputShape), inputData,
1216 tflite::RuntimeShape(outputShape), outputData
1217 );
1218 }
1219
Pad(const std::array<int32_t,2> * paddings,const TensorShape & inputShape,const float * inputData,const TensorShape & outputShape,float * outputData)1220 void Pad(
1221 const std::array<int32_t,2>* paddings,
1222 const TensorShape &inputShape, const float *inputData,
1223 const TensorShape &outputShape, float *outputData
1224 ) {
1225 tflite::PadParams params;
1226 params.left_padding_count = inputShape.size();
1227 params.right_padding_count = params.left_padding_count;
1228 for (unsigned i = 0; i < params.left_padding_count; i++, paddings++) {
1229 params.left_padding[i] = (*paddings)[0];
1230 params.right_padding[i] = (*paddings)[1];
1231 }
1232
1233 float padValue = 0;
1234
1235 tflite::Pad(params,
1236 tflite::RuntimeShape(inputShape), inputData,
1237 &padValue,
1238 tflite::RuntimeShape(outputShape), outputData
1239 );
1240 }
1241
1242 } // NnOperators
1243