1 //
2 //  GeometryComputerUtils.cpp
3 //  MNN
4 //
5 //  Created by MNN on 2020/05/11.
6 //  Copyright © 2018, Alibaba Group Holding Limited
7 //
8 
9 #include "GeometryComputerUtils.hpp"
10 #include "core/OpCommonUtils.hpp"
11 #include "core/RuntimeFactory.hpp"
12 #include "shape/SizeComputer.hpp"
13 #include "core/AutoStorage.h"
14 
15 #ifdef MNN_BUILD_CODEGEN
16 #include "OpFuse.hpp"
17 #endif
18 namespace MNN {
_hasZeroShapeOutput(const Schedule::PipelineInfo & info)19 static bool _hasZeroShapeOutput(const Schedule::PipelineInfo& info) {
20     for (auto t : info.outputs) {
21         for (int v = 0; v < t->dimensions(); ++v) {
22             if (t->length(v) <= 0) {
23                 return true;
24             }
25         }
26     }
27     return false;
28 }
makePool(flatbuffers::FlatBufferBuilder & builder,std::pair<int,int> kernel,std::pair<int,int> stride,PoolType type,MNN::PoolPadType pad,std::pair<int,int> pads,bool isglobal,AvgPoolCountType countType)29 flatbuffers::Offset<Op> GeometryComputerUtils::makePool(flatbuffers::FlatBufferBuilder& builder, std::pair<int, int> kernel, std::pair<int, int> stride, PoolType type, MNN::PoolPadType pad, std::pair<int, int> pads,  bool isglobal, AvgPoolCountType countType) {
30     PoolBuilder poolB(builder);
31     poolB.add_type(type);
32     poolB.add_padType(pad);
33     poolB.add_padX(pads.first);
34     poolB.add_padY(pads.second);
35     poolB.add_kernelX(kernel.first);
36     poolB.add_kernelY(kernel.second);
37     poolB.add_strideX(stride.first);
38     poolB.add_strideY(stride.second);
39     poolB.add_isGlobal(isglobal);
40     if (AvgPoolCountType_DEFAULT != countType) {
41         poolB.add_countType(countType);
42     }
43     auto poolOffset = poolB.Finish();
44     OpBuilder opB(builder);
45     opB.add_type(OpType_Pooling);
46     opB.add_main(poolOffset.Union());
47     opB.add_main_type(OpParameter_Pool);
48     return opB.Finish();
49 }
50 
buildConstantTensors(std::vector<Schedule::PipelineInfo> & infos,std::shared_ptr<Backend> backupBackend,bool netBufferHold,std::vector<Tensor * > & constTensors,std::vector<Tensor * > & midConstTensors)51 void GeometryComputerUtils::buildConstantTensors(std::vector<Schedule::PipelineInfo>& infos,
52                                                  std::shared_ptr<Backend> backupBackend, bool netBufferHold,
53                                                  std::vector<Tensor*>& constTensors,
54                                                  std::vector<Tensor*>& midConstTensors) {
55     // Create Const Tensors
56     for (auto& info : infos) {
57         if (info.op->type() != OpType_Const) {
58             continue;
59         }
60         SizeComputer::computeOutputSize(info.op, info.inputs, info.outputs);
61         for (auto t : info.outputs) {
62             TensorUtils::getDescribe(t)->usage = Tensor::InsideDescribe::CONSTANT;
63         }
64         info.type                                        = Schedule::CONSTANT;
65         TensorUtils::getDescribe(info.outputs[0])->usage = Tensor::InsideDescribe::CONSTANT;
66         TensorUtils::setLinearLayout(info.outputs[0]);
67         if (_hasZeroShapeOutput(info)) {
68             continue;
69         }
70         auto parameter                                     = info.op->main_as_Blob();
71         TensorUtils::getDescribe(info.outputs[0])->backend = backupBackend.get();
72         if (netBufferHold && (parameter->dataType() != DataType_DT_HALF)) {
73             // The net buffer will be hold by user, we can directly use it
74             info.outputs[0]->buffer().host = (uint8_t*)OpCommonUtils::blobData(info.op);
75         } else {
76             // The net buffer may be released later, or we can't directly use it (for half we need cast to float)
77             auto res = backupBackend->onAcquireBuffer(info.outputs[0], Backend::STATIC);
78             if (!res) {
79                 MNN_ERROR("Error for alloc const in pipeline\n");
80                 return;
81             }
82             TensorUtils::getDescribe(info.outputs[0])->backend = backupBackend.get();
83             AutoRelease<Execution> exe(backupBackend->onCreate(info.inputs, info.outputs, info.op));
84             exe->onResize(info.inputs, info.outputs);
85             exe->onExecute(info.inputs, info.outputs);
86             constTensors.emplace_back(info.outputs[0]);
87         }
88     }
89     // Check Middle Const
90     for (auto& info : infos) {
91         if (info.op->type() == OpType_Const) {
92             continue;
93         }
94         bool isConst = true;
95         for (int i = 0; i < info.inputs.size(); ++i) {
96             if (TensorUtils::getDescribe(info.inputs[i])->usage == Tensor::InsideDescribe::CONSTANT) {
97                 continue;
98             }
99             if (OpCommonUtils::opNeedContent(info.op->type(), i)) {
100                 isConst = false;
101                 break;
102             }
103         }
104         if (isConst) {
105             for (auto t : info.outputs) {
106                 TensorUtils::getDescribe(t)->usage = Tensor::InsideDescribe::CONSTANT;
107             }
108             info.type = Schedule::CONSTANT;
109         }
110     }
111     // Check force size compute op
112     bool hasSizeComputeOp = false;
113     for (auto& info : infos) {
114         if (info.op->type() == OpType_Const) {
115             continue;
116         }
117         if (info.op->type() == OpType_Where && (!netBufferHold)) {
118             // For compability old model
119             continue;
120         }
121         auto dims = SizeComputer::needInputContent(info.op, info.inputs.size());
122         for (auto index : dims) {
123             if (index < info.inputs.size()) {
124                 if (TensorUtils::getDescribe(info.inputs[index])->usage != Tensor::InsideDescribe::CONSTANT) {
125                     hasSizeComputeOp                                    = true;
126                     TensorUtils::getDescribe(info.inputs[index])->usage = Tensor::InsideDescribe::CONSTANT;
127                 }
128             }
129         }
130     }
131     if (hasSizeComputeOp) {
132         bool hasConst = true;
133         while (hasConst) {
134             hasConst = false;
135             for (auto& info : infos) {
136                 if (info.type == Schedule::CONSTANT) {
137                     continue;
138                 }
139                 bool turnConst = false;
140                 for (auto t : info.outputs) {
141                     if (TensorUtils::getDescribe(t)->usage == Tensor::InsideDescribe::CONSTANT) {
142                         turnConst = true;
143                         break;
144                     }
145                 }
146                 if (turnConst) {
147                     for (auto t : info.outputs) {
148                         TensorUtils::getDescribe(t)->usage = Tensor::InsideDescribe::CONSTANT;
149                     }
150                     for (auto t : info.inputs) {
151                         TensorUtils::getDescribe(t)->usage = Tensor::InsideDescribe::CONSTANT;
152                     }
153                     info.type = Schedule::CONSTANT;
154                     hasConst  = true;
155                 }
156             }
157         }
158     }
159     for (auto& info : infos) {
160         if (info.op->type() == OpType_Const) {
161             continue;
162         }
163         if (info.type == Schedule::CONSTANT) {
164             for (auto t : info.outputs) {
165                 TensorUtils::getDescribe(t)->usage = Tensor::InsideDescribe::CONSTANT;
166                 midConstTensors.emplace_back(t);
167             }
168         }
169     }
170 }
171 
shapeComputeAndGeometryTransform(std::vector<Schedule::PipelineInfo> & infos,CommandBuffer & buffer,GeometryComputer::Context & geoContext,std::shared_ptr<Backend> backupBackend,Runtime::CompilerType compileType)172 ErrorCode GeometryComputerUtils::shapeComputeAndGeometryTransform(
173     std::vector<Schedule::PipelineInfo>& infos,
174     CommandBuffer& buffer,
175     GeometryComputer::Context& geoContext,
176     std::shared_ptr<Backend> backupBackend,
177     Runtime::CompilerType compileType) {
178     /** Size Compute and compute Const Begin */
179     GeometryComputer::Context ctx(backupBackend, false);
180     // Size Compute and compute Const
181     for (auto& info : infos) {
182         if (info.op->type() == OpType_Const) {
183             continue;
184         }
185         auto res = SizeComputer::computeOutputSize(info.op, info.inputs, info.outputs);
186         if (!res) {
187             MNN_ERROR("Compute Shape Error for %s\n", info.op->name()->c_str());
188             return COMPUTE_SIZE_ERROR;
189         }
190         // FIXME: Find better way to may compability for old model
191         /**
192          For Convolution of 2D / 3D Tensor(Dense / 1D Convolution)
193          Because of old code, we will acces dim[2] / dim[3] to get width and height
194          Set the lenght to 1 for compability
195          */
196         for (auto t : info.outputs) {
197             TensorUtils::adjustTensorForCompability(t);
198         }
199         if (info.type == Schedule::CONSTANT) {
200             if (_hasZeroShapeOutput(info)) {
201                 continue;
202             }
203             ctx.clear();
204             CommandBuffer tempSrcbuffer;
205             CommandBuffer tempDstBuffer;
206             auto geo = GeometryComputer::search(info.op->type(), Runtime::Compiler_Loop);
207             {
208                 res = geo->compute(info.op, info.inputs, info.outputs, ctx, tempSrcbuffer);
209                 if (!res) {
210                     MNN_ERROR("Const Folder Error in geometry for %s\n", info.op->name()->c_str());
211                     return NOT_SUPPORT;
212                 }
213             }
214             GeometryComputerUtils::makeRaster(tempSrcbuffer, tempDstBuffer, ctx);
215             for (auto& c : tempDstBuffer.command) {
216                 AutoRelease<Execution> exe(backupBackend->onCreate(c.inputs, c.outputs, c.op));
217                 if (nullptr == exe.get()) {
218                     MNN_ERROR("Const Folder Error for %s\n", info.op->name()->c_str());
219                     return NO_EXECUTION;
220                 }
221                 for (auto t : c.outputs) {
222                     auto des = TensorUtils::getDescribe(t);
223                     if (des->backend == nullptr) {
224                         TensorUtils::setLinearLayout(t);
225                         res = backupBackend->onAcquireBuffer(t, Backend::STATIC);
226                         if (!res) {
227                             return OUT_OF_MEMORY;
228                         }
229                         des->backend = backupBackend.get();
230                     }
231                 }
232                 auto code = exe->onResize(c.inputs, c.outputs);
233                 if (NO_ERROR != code) {
234                     return NOT_SUPPORT;
235                 }
236                 code = exe->onExecute(c.inputs, c.outputs);
237                 if (NO_ERROR != code) {
238                     return NOT_SUPPORT;
239                 }
240             }
241             for (auto& c : tempDstBuffer.command) {
242                 for (auto t : c.outputs) {
243                     if (TensorUtils::getDescribe(t)->usage == Tensor::InsideDescribe::NORMAL) {
244                         backupBackend->onReleaseBuffer(t, Backend::STATIC);
245                     }
246                 }
247             }
248         }
249     }
250     /** Size Compute and compute Const End */
251 
252     /** Geometry Transform */
253     CommandBuffer tmpBuffer;
254     for (auto& info : infos) {
255         if (info.type == Schedule::CONSTANT) {
256             continue;
257         }
258         if (_hasZeroShapeOutput(info)) {
259             continue;
260         }
261         auto geo = GeometryComputer::search(info.op->type(), compileType);
262         {
263             bool res = geo->compute(info.op, info.inputs, info.outputs, geoContext, tmpBuffer);
264             if (!res) {
265                 return NOT_SUPPORT;
266             }
267         }
268     }
269     GeometryComputerUtils::makeRaster(tmpBuffer, buffer, geoContext);
270 #ifdef MNN_BUILD_CODEGEN
271     // fuse op and codegen
272     {
273         opFuse(buffer);
274     }
275 #endif
276     return NO_ERROR;
277 }
278 
makeRaster(const CommandBuffer & srcBuffer,CommandBuffer & dstBuffer,GeometryComputer::Context & ctx)279 void GeometryComputerUtils::makeRaster(const CommandBuffer& srcBuffer, CommandBuffer& dstBuffer,
280                                        GeometryComputer::Context& ctx) {
281     dstBuffer.extras = std::move(srcBuffer.extras);
282     for (int index = 0; index < srcBuffer.command.size(); ++index) {
283         auto& iter = srcBuffer.command[index];
284         const Op* op = iter.op;
285         auto cmd     = iter;
286         if (!iter.buffer.empty()) {
287             op = flatbuffers::GetRoot<Op>((void*)iter.buffer.data());
288         }
289         auto type = op->type();
290         MNN_ASSERT(OpType_Raster != type);
291         for (int i = 0; i < iter.inputs.size(); ++i) {
292             if (!OpCommonUtils::opNeedContent(type, i)) {
293                 continue;
294             }
295             auto des = TensorUtils::getDescribe(cmd.inputs[i]);
296             MNN_ASSERT(des->tensorArrayAttr == nullptr);
297             if (des->memoryType == Tensor::InsideDescribe::MEMORY_VIRTUAL) {
298                 ctx.getRasterCacheCreateRecurrse(cmd.inputs[i], dstBuffer);
299             }
300         }
301         dstBuffer.command.emplace_back(std::move(cmd));
302     }
303     auto& outputs = ctx.pOutputs;
304     for (auto& o : ctx.pOutputs) {
305         ctx.getRasterCacheCreateRecurrse(o, dstBuffer);
306     }
307 }
makeBinary(int type,Tensor * input0,Tensor * input1,Tensor * output)308 Command GeometryComputerUtils::makeBinary(int type, Tensor* input0, Tensor* input1, Tensor* output) {
309     flatbuffers::FlatBufferBuilder builder;
310     BinaryOpBuilder builder_(builder);
311     builder_.add_opType(type);
312     auto mainOffset = builder_.Finish().Union();
313     OpBuilder opB(builder);
314     opB.add_type(OpType_BinaryOp);
315     opB.add_main(mainOffset);
316     opB.add_main_type(OpParameter_BinaryOp);
317     builder.Finish(opB.Finish());
318     Command cmd;
319     cmd.buffer.resize(builder.GetSize());
320     ::memcpy(cmd.buffer.data(), builder.GetBufferPointer(), cmd.buffer.size());
321     cmd.inputs  = {input0, input1};
322     cmd.outputs = {output};
323     cmd.op      = flatbuffers::GetMutableRoot<Op>(cmd.buffer.data());
324     return cmd;
325 }
326 
makeReduce(ReductionType type,Tensor * input0,Tensor * output)327 Command GeometryComputerUtils::makeReduce(ReductionType type, Tensor* input0, Tensor* output) {
328     flatbuffers::FlatBufferBuilder builder;
329     auto vec = builder.CreateVector(std::vector<int>{1});
330     ReductionParamBuilder builder_(builder);
331     builder_.add_operation(type);
332     builder_.add_keepDims(true);
333     builder_.add_dim(vec);
334     auto mainOffset = builder_.Finish().Union();
335     OpBuilder opB(builder);
336     opB.add_type(OpType_Reduction);
337     opB.add_main(mainOffset);
338     opB.add_main_type(OpParameter_ReductionParam);
339     builder.Finish(opB.Finish());
340     Command cmd;
341     cmd.buffer.resize(builder.GetSize());
342     ::memcpy(cmd.buffer.data(), builder.GetBufferPointer(), cmd.buffer.size());
343     cmd.inputs  = {input0};
344     cmd.outputs = {output};
345     cmd.op      = flatbuffers::GetMutableRoot<Op>(cmd.buffer.data());
346     return cmd;
347 }
makeUnary(UnaryOpOperation type,Tensor * input0,Tensor * output)348 Command GeometryComputerUtils::makeUnary(UnaryOpOperation type, Tensor* input0, Tensor* output) {
349     flatbuffers::FlatBufferBuilder builder;
350     UnaryOpBuilder builder_(builder);
351     builder_.add_opType(type);
352     auto mainOffset = builder_.Finish().Union();
353     OpBuilder opB(builder);
354     opB.add_type(OpType_UnaryOp);
355     opB.add_main(mainOffset);
356     opB.add_main_type(OpParameter_UnaryOp);
357     builder.Finish(opB.Finish());
358     Command cmd;
359     cmd.buffer.resize(builder.GetSize());
360     ::memcpy(cmd.buffer.data(), builder.GetBufferPointer(), cmd.buffer.size());
361     cmd.inputs  = {input0};
362     cmd.outputs = {output};
363     cmd.op      = flatbuffers::GetMutableRoot<Op>(cmd.buffer.data());
364     return cmd;
365 }
makeCommand(flatbuffers::FlatBufferBuilder & builder,const std::vector<Tensor * > & inputs,const std::vector<Tensor * > & outputs)366 Command GeometryComputerUtils::makeCommand(flatbuffers::FlatBufferBuilder& builder, const std::vector<Tensor*>& inputs,
367                                            const std::vector<Tensor*>& outputs) {
368     Command cmd;
369     cmd.buffer.resize(builder.GetSize());
370     ::memcpy(cmd.buffer.data(), builder.GetBufferPointer(), cmd.buffer.size());
371     cmd.outputs = outputs;
372     cmd.inputs  = inputs;
373     cmd.op      = flatbuffers::GetMutableRoot<Op>(cmd.buffer.data());
374     return cmd;
375 }
376 
makeMatMul(Tensor * input0,Tensor * input1,Tensor * output,Tensor * Bias,bool transposeA,bool transposeB)377 Command GeometryComputerUtils::makeMatMul(Tensor* input0, Tensor* input1, Tensor* output, Tensor* Bias, bool transposeA,
378                                           bool transposeB) {
379     flatbuffers::FlatBufferBuilder builder;
380     MatMulBuilder builder_(builder);
381     builder_.add_transposeA(transposeA);
382     builder_.add_transposeB(transposeB);
383     auto mainOffset = builder_.Finish().Union();
384     OpBuilder opB(builder);
385     opB.add_type(OpType_MatMul);
386     opB.add_main(mainOffset);
387     opB.add_main_type(OpParameter_MatMul);
388     builder.Finish(opB.Finish());
389     Command cmd;
390     cmd.buffer.resize(builder.GetSize());
391     ::memcpy(cmd.buffer.data(), builder.GetBufferPointer(), cmd.buffer.size());
392     if (nullptr == Bias) {
393         cmd.inputs = {input0, input1};
394     } else {
395         cmd.inputs = {input0, input1, Bias};
396     }
397     cmd.outputs = {output};
398     cmd.op      = flatbuffers::GetMutableRoot<Op>(cmd.buffer.data());
399     return cmd;
400 }
401 
makeRawAddressRef(Tensor * src,int srcOffset,int size,int dstOffset)402 Tensor::InsideDescribe::Region GeometryComputerUtils::makeRawAddressRef(Tensor* src, int srcOffset, int size,
403                                                                         int dstOffset) {
404     Tensor::InsideDescribe::Region reg;
405     // Default is 1, 1, 1
406     reg.size[2] = size;
407 
408     // Default is 0, 1, 1, 1
409     reg.src.offset = srcOffset;
410     reg.dst.offset = dstOffset;
411     reg.origin     = src;
412     return reg;
413 }
414 
makeRawAddressRef(Tensor * dst,Tensor * src,int srcOffset,int size,int dstOffset)415 void GeometryComputerUtils::makeRawAddressRef(Tensor* dst, Tensor* src, int srcOffset, int size, int dstOffset) {
416     auto describe        = TensorUtils::getDescribe(dst);
417     describe->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
418     describe->regions    = {makeRawAddressRef(src, srcOffset, size, dstOffset)};
419 }
420 
makeSliceRef(Tensor * dst,Tensor * src,const std::vector<int> & originSize,const std::vector<int> & offset,const std::vector<int> & dstSize)421 void GeometryComputerUtils::makeSliceRef(Tensor* dst, Tensor* src, const std::vector<int>& originSize,
422                                          const std::vector<int>& offset, const std::vector<int>& dstSize) {
423     auto describe        = TensorUtils::getDescribe(dst);
424     describe->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
425     Tensor::InsideDescribe::Region reg;
426     reg.origin  = src;
427     reg.size[0] = dstSize[0];
428     reg.size[1] = dstSize[1];
429     reg.size[2] = dstSize[2];
430 
431     reg.src.offset    = offset[0] * originSize[1] * originSize[2] + offset[1] * originSize[2] + offset[2];
432     reg.src.stride[0] = originSize[1] * originSize[2];
433     reg.src.stride[1] = originSize[2];
434     reg.src.stride[2] = 1;
435 
436     reg.dst.offset    = 0;
437     reg.dst.stride[0] = dstSize[1] * dstSize[2];
438     reg.dst.stride[1] = dstSize[2];
439     reg.dst.stride[2] = 1;
440     describe->regions = {reg};
441 }
442 }; // namespace MNN
443