1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14 // Third party copyrights are property of their respective owners.
15 //
16 // Redistribution and use in source and binary forms, with or without modification,
17 // are permitted provided that the following conditions are met:
18 //
19 //   * Redistribution's of source code must retain the above copyright notice,
20 //     this list of conditions and the following disclaimer.
21 //
22 //   * Redistribution's in binary form must reproduce the above copyright notice,
23 //     this list of conditions and the following disclaimer in the documentation
24 //     and/or other materials provided with the distribution.
25 //
26 //   * The name of the copyright holders may not be used to endorse or promote products
27 //     derived from this software without specific prior written permission.
28 //
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the Intel Corporation or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
39 //
40 //M*/
41 
42 #include "precomp.hpp"
43 #include "op_halide.hpp"
44 #include "op_inf_engine.hpp"
45 #include "ie_ngraph.hpp"
46 #include "op_vkcom.hpp"
47 #include "op_cuda.hpp"
48 
49 #ifdef HAVE_CUDA
50 #include "cuda4dnn/init.hpp"
51 #include "cuda4dnn/primitives/eltwise.hpp" // required by fuseLayers
52 #endif
53 
54 #include "halide_scheduler.hpp"
55 
56 #include <set>
57 #include <algorithm>
58 #include <iostream>
59 #include <sstream>
60 #include <fstream>
61 #include <iterator>
62 #include <numeric>
63 #include <memory>
64 #include <opencv2/dnn/shape_utils.hpp>
65 #include <opencv2/imgproc.hpp>
66 #include <opencv2/dnn/layer_reg.private.hpp>
67 
68 #include <opencv2/core/utils/configuration.private.hpp>
69 #include <opencv2/core/utils/logger.hpp>
70 
71 namespace cv {
72 namespace dnn {
73 CV__DNN_INLINE_NS_BEGIN
74 
75 static size_t DNN_NETWORK_DUMP = utils::getConfigurationParameterSizeT("OPENCV_DNN_NETWORK_DUMP", 0);
76 
77 // this option is useful to run valgrind memory errors detection
78 static bool DNN_DISABLE_MEMORY_OPTIMIZATIONS = utils::getConfigurationParameterBool("OPENCV_DNN_DISABLE_MEMORY_OPTIMIZATIONS", false);
79 
80 #ifdef HAVE_OPENCL
81 static bool DNN_OPENCL_ALLOW_ALL_DEVICES = utils::getConfigurationParameterBool("OPENCV_DNN_OPENCL_ALLOW_ALL_DEVICES", false);
82 #endif
83 
84 static int PARAM_DNN_BACKEND_DEFAULT = (int)utils::getConfigurationParameterSizeT("OPENCV_DNN_BACKEND_DEFAULT",
85 #ifdef HAVE_INF_ENGINE
86     (size_t)DNN_BACKEND_INFERENCE_ENGINE
87 #else
88     (size_t)DNN_BACKEND_OPENCV
89 #endif
90 );
91 
92 // Additional checks (slowdowns execution!)
93 static bool DNN_CHECK_NAN_INF = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF", false);
94 static bool DNN_CHECK_NAN_INF_DUMP = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_DUMP", false);
95 static bool DNN_CHECK_NAN_INF_RAISE_ERROR = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_RAISE_ERROR", false);
96 
97 bool DNN_DIAGNOSTICS_RUN = false;
98 
enableModelDiagnostics(bool isDiagnosticsMode)99 void enableModelDiagnostics(bool isDiagnosticsMode)
100 {
101     DNN_DIAGNOSTICS_RUN = isDiagnosticsMode;
102 }
103 
104 using std::vector;
105 using std::map;
106 using std::make_pair;
107 using std::set;
108 using std::string;
109 
110 //==================================================================================================
111 
112 class BackendRegistry
113 {
114 public:
115     typedef std::vector< std::pair<Backend, Target> > BackendsList;
getBackends() const116     const BackendsList & getBackends() const { return backends; }
getRegistry()117     static BackendRegistry & getRegistry()
118     {
119         static BackendRegistry impl;
120         return impl;
121     }
122 
123 #ifdef HAVE_INF_ENGINE
checkIETarget(Target target)124     static inline bool checkIETarget(Target target)
125     {
126 #if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R3)
127         // Lightweight detection
128         const std::vector<std::string> devices = getCore("").GetAvailableDevices();
129         for (std::vector<std::string>::const_iterator i = devices.begin(); i != devices.end(); ++i)
130         {
131             if (std::string::npos != i->find("MYRIAD") && target == DNN_TARGET_MYRIAD)
132                 return true;
133             if (std::string::npos != i->find("HDDL") && target == DNN_TARGET_HDDL)
134                 return true;
135             else if (std::string::npos != i->find("FPGA") && target == DNN_TARGET_FPGA)
136                 return true;
137             else if (std::string::npos != i->find("CPU") && target == DNN_TARGET_CPU)
138                 return true;
139             else if (std::string::npos != i->find("GPU") && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
140                 return true;
141         }
142         return false;
143 #else
144         cv::dnn::Net net;
145         cv::dnn::LayerParams lp;
146         lp.set("kernel_size", 1);
147         lp.set("num_output", 1);
148         lp.set("bias_term", false);
149         lp.type = "Convolution";
150         lp.name = "testLayer";
151         lp.blobs.push_back(Mat({1, 2, 1, 1}, CV_32F, Scalar(1)));
152         net.addLayerToPrev(lp.name, lp.type, lp);
153         net.setPreferableBackend(cv::dnn::DNN_BACKEND_INFERENCE_ENGINE);
154         net.setPreferableTarget(target);
155         static int inpDims[] = {1, 2, 3, 4};
156         net.setInput(cv::Mat(4, &inpDims[0], CV_32FC1, cv::Scalar(0)));
157         try
158         {
159             net.forward();
160         }
161         catch(const std::exception& e)
162         {
163             CV_LOG_INFO(NULL, "checkIETarget(" << (int)target << ") has failed with message: " << e.what());
164             return false;
165         }
166         return true;
167 #endif
168     }
169 #endif
170 
171 private:
BackendRegistry()172     BackendRegistry()
173     {
174 #ifdef HAVE_HALIDE
175         backends.push_back(std::make_pair(DNN_BACKEND_HALIDE, DNN_TARGET_CPU));
176 #  ifdef HAVE_OPENCL
177         if (cv::ocl::useOpenCL())
178             backends.push_back(std::make_pair(DNN_BACKEND_HALIDE, DNN_TARGET_OPENCL));
179 #  endif
180 #endif // HAVE_HALIDE
181 
182 #ifdef HAVE_INF_ENGINE
183         if (checkIETarget(DNN_TARGET_CPU)) {
184 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
185             backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, DNN_TARGET_CPU));
186 #endif
187 #ifdef HAVE_DNN_NGRAPH
188             backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_CPU));
189 #endif
190         }
191         if (checkIETarget(DNN_TARGET_MYRIAD)) {
192 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
193             backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, DNN_TARGET_MYRIAD));
194 #endif
195 #ifdef HAVE_DNN_NGRAPH
196             backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_MYRIAD));
197 #endif
198         }
199         if (checkIETarget(DNN_TARGET_HDDL)) {
200 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
201             backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, DNN_TARGET_HDDL));
202 #endif
203 #ifdef HAVE_DNN_NGRAPH
204             backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_HDDL));
205 #endif
206         }
207 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
208         if (checkIETarget(DNN_TARGET_FPGA))
209             backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, DNN_TARGET_FPGA));
210 #endif
211 #ifdef HAVE_OPENCL
212         if (cv::ocl::useOpenCL() && ocl::Device::getDefault().isIntel())
213         {
214             if (checkIETarget(DNN_TARGET_OPENCL)) {
215 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
216                 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, DNN_TARGET_OPENCL));
217 #endif
218 #ifdef HAVE_DNN_NGRAPH
219                 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_OPENCL));
220 #endif
221             }
222             if (checkIETarget(DNN_TARGET_OPENCL_FP16)) {
223 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
224                 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, DNN_TARGET_OPENCL_FP16));
225 #endif
226 #ifdef HAVE_DNN_NGRAPH
227                 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_OPENCL_FP16));
228 #endif
229             }
230         }
231 #endif
232 #endif // HAVE_INF_ENGINE
233 
234 #ifdef HAVE_OPENCL
235         if (cv::ocl::useOpenCL())
236         {
237             backends.push_back(std::make_pair(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL));
238             backends.push_back(std::make_pair(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL_FP16));
239         }
240 #endif
241 
242         backends.push_back(std::make_pair(DNN_BACKEND_OPENCV, DNN_TARGET_CPU));
243 
244 #ifdef HAVE_VULKAN
245         if (haveVulkan())
246             backends.push_back(std::make_pair(DNN_BACKEND_VKCOM, DNN_TARGET_VULKAN));
247 #endif
248 
249 #ifdef HAVE_CUDA
250         if (haveCUDA())
251         {
252             backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA));
253             backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA_FP16));
254         }
255 #endif
256     }
257 
258     BackendsList backends;
259 };
260 
261 
getAvailableBackends()262 std::vector< std::pair<Backend, Target> > getAvailableBackends()
263 {
264     return BackendRegistry::getRegistry().getBackends();
265 }
266 
getAvailableTargets(Backend be)267 std::vector<Target> getAvailableTargets(Backend be)
268 {
269     if (be == DNN_BACKEND_DEFAULT)
270         be = (Backend)PARAM_DNN_BACKEND_DEFAULT;
271 #ifdef HAVE_INF_ENGINE
272     if (be == DNN_BACKEND_INFERENCE_ENGINE)
273         be = getInferenceEngineBackendTypeParam();
274 #endif
275 
276     std::vector<Target> result;
277     const BackendRegistry::BackendsList all_backends = getAvailableBackends();
278     for(BackendRegistry::BackendsList::const_iterator i = all_backends.begin(); i != all_backends.end(); ++i )
279     {
280         if (i->first == be)
281             result.push_back(i->second);
282     }
283     return result;
284 }
285 
286 //==================================================================================================
287 
288 namespace
289 {
290     typedef std::vector<MatShape> ShapesVec;
291 
292     struct LayerShapes
293     {
294         ShapesVec in, out, internal;
295         // No guarantees that layer which support in-place computations
296         // will be computed in-place (input.data_ptr == output.data_ptr).
297         // If layer said that it could work in-place and layers after it
298         // no longer use input blob, we'll set output = input.
299         bool supportInPlace;
LayerShapescv::dnn::__anon140d32020111::LayerShapes300         LayerShapes() {supportInPlace = false;}
301     };
302 }
303 
blobFromImage(InputArray image,double scalefactor,const Size & size,const Scalar & mean,bool swapRB,bool crop,int ddepth)304 Mat blobFromImage(InputArray image, double scalefactor, const Size& size,
305                   const Scalar& mean, bool swapRB, bool crop, int ddepth)
306 {
307     CV_TRACE_FUNCTION();
308     Mat blob;
309     blobFromImage(image, blob, scalefactor, size, mean, swapRB, crop, ddepth);
310     return blob;
311 }
312 
blobFromImage(InputArray image,OutputArray blob,double scalefactor,const Size & size,const Scalar & mean,bool swapRB,bool crop,int ddepth)313 void blobFromImage(InputArray image, OutputArray blob, double scalefactor,
314                    const Size& size, const Scalar& mean, bool swapRB, bool crop, int ddepth)
315 {
316     CV_TRACE_FUNCTION();
317     std::vector<Mat> images(1, image.getMat());
318     blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop, ddepth);
319 }
320 
blobFromImages(InputArrayOfArrays images,double scalefactor,Size size,const Scalar & mean,bool swapRB,bool crop,int ddepth)321 Mat blobFromImages(InputArrayOfArrays images, double scalefactor, Size size,
322                    const Scalar& mean, bool swapRB, bool crop, int ddepth)
323 {
324     CV_TRACE_FUNCTION();
325     Mat blob;
326     blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop, ddepth);
327     return blob;
328 }
329 
blobFromImages(InputArrayOfArrays images_,OutputArray blob_,double scalefactor,Size size,const Scalar & mean_,bool swapRB,bool crop,int ddepth)330 void blobFromImages(InputArrayOfArrays images_, OutputArray blob_, double scalefactor,
331                     Size size, const Scalar& mean_, bool swapRB, bool crop, int ddepth)
332 {
333     CV_TRACE_FUNCTION();
334     CV_CheckType(ddepth, ddepth == CV_32F || ddepth == CV_8U, "Blob depth should be CV_32F or CV_8U");
335     if (ddepth == CV_8U)
336     {
337         CV_CheckEQ(scalefactor, 1.0, "Scaling is not supported for CV_8U blob depth");
338         CV_Assert(mean_ == Scalar() && "Mean subtraction is not supported for CV_8U blob depth");
339     }
340 
341     std::vector<Mat> images;
342     images_.getMatVector(images);
343     CV_Assert(!images.empty());
344     for (size_t i = 0; i < images.size(); i++)
345     {
346         Size imgSize = images[i].size();
347         if (size == Size())
348             size = imgSize;
349         if (size != imgSize)
350         {
351             if(crop)
352             {
353               float resizeFactor = std::max(size.width / (float)imgSize.width,
354                                             size.height / (float)imgSize.height);
355               resize(images[i], images[i], Size(), resizeFactor, resizeFactor, INTER_LINEAR);
356               Rect crop(Point(0.5 * (images[i].cols - size.width),
357                               0.5 * (images[i].rows - size.height)),
358                         size);
359               images[i] = images[i](crop);
360             }
361             else
362               resize(images[i], images[i], size, 0, 0, INTER_LINEAR);
363         }
364         if(images[i].depth() == CV_8U && ddepth == CV_32F)
365             images[i].convertTo(images[i], CV_32F);
366         Scalar mean = mean_;
367         if (swapRB)
368             std::swap(mean[0], mean[2]);
369 
370         images[i] -= mean;
371         images[i] *= scalefactor;
372     }
373 
374     size_t nimages = images.size();
375     Mat image0 = images[0];
376     int nch = image0.channels();
377     CV_Assert(image0.dims == 2);
378     if (nch == 3 || nch == 4)
379     {
380         int sz[] = { (int)nimages, nch, image0.rows, image0.cols };
381         blob_.create(4, sz, ddepth);
382         Mat blob = blob_.getMat();
383         Mat ch[4];
384 
385         for(size_t i = 0; i < nimages; i++ )
386         {
387             const Mat& image = images[i];
388             CV_Assert(image.depth() == blob_.depth());
389             nch = image.channels();
390             CV_Assert(image.dims == 2 && (nch == 3 || nch == 4));
391             CV_Assert(image.size() == image0.size());
392 
393             for( int j = 0; j < nch; j++ )
394                 ch[j] = Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, j));
395             if(swapRB)
396                 std::swap(ch[0], ch[2]);
397             split(image, ch);
398         }
399     }
400     else
401     {
402        CV_Assert(nch == 1);
403        int sz[] = { (int)nimages, 1, image0.rows, image0.cols };
404        blob_.create(4, sz, ddepth);
405        Mat blob = blob_.getMat();
406 
407        for(size_t i = 0; i < nimages; i++ )
408        {
409            const Mat& image = images[i];
410            CV_Assert(image.depth() == blob_.depth());
411            nch = image.channels();
412            CV_Assert(image.dims == 2 && (nch == 1));
413            CV_Assert(image.size() == image0.size());
414 
415            image.copyTo(Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, 0)));
416        }
417     }
418 }
419 
imagesFromBlob(const cv::Mat & blob_,OutputArrayOfArrays images_)420 void imagesFromBlob(const cv::Mat& blob_, OutputArrayOfArrays images_)
421 {
422     CV_TRACE_FUNCTION();
423 
424     //A blob is a 4 dimensional matrix in floating point precision
425     //blob_[0] = batchSize = nbOfImages
426     //blob_[1] = nbOfChannels
427     //blob_[2] = height
428     //blob_[3] = width
429     CV_Assert(blob_.depth() == CV_32F);
430     CV_Assert(blob_.dims == 4);
431 
432     images_.create(cv::Size(1, blob_.size[0]), blob_.depth());
433 
434     std::vector<Mat> vectorOfChannels(blob_.size[1]);
435     for (int n = 0; n <  blob_.size[0]; ++n)
436     {
437         for (int c = 0; c < blob_.size[1]; ++c)
438         {
439             vectorOfChannels[c] = getPlane(blob_, n, c);
440         }
441         cv::merge(vectorOfChannels, images_.getMatRef(n));
442     }
443 }
444 
445 #ifdef HAVE_OPENCL
446 class OpenCLBackendWrapper : public BackendWrapper
447 {
448 public:
OpenCLBackendWrapper(Mat & m)449     OpenCLBackendWrapper(Mat& m) : BackendWrapper(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL)
450     {
451         m.copyTo(umat);
452         host = &m;
453         hostDirty = false;
454     }
455 
OpenCLBackendWrapper(const Ptr<BackendWrapper> & baseBuffer,Mat & m)456     OpenCLBackendWrapper(const Ptr<BackendWrapper>& baseBuffer, Mat& m)
457         : BackendWrapper(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL)
458     {
459         Ptr<OpenCLBackendWrapper> base = baseBuffer.dynamicCast<OpenCLBackendWrapper>();
460         CV_Assert(!base.empty());
461 
462         host = &m;
463 
464         int shape[] = {1, (int)base->umat.total()};
465         umat = base->umat.reshape(1, 2, &shape[0])
466                          .colRange(0, host->total())
467                          .reshape(1, host->dims, &host->size[0]);
468         hostDirty = false;
469     }
470 
create(Mat & m)471     static Ptr<BackendWrapper> create(Mat& m)
472     {
473         return Ptr<BackendWrapper>(new OpenCLBackendWrapper(m));
474     }
475 
create(const Ptr<BackendWrapper> & baseBuffer,Mat & m)476     static Ptr<BackendWrapper> create(const Ptr<BackendWrapper>& baseBuffer, Mat& m)
477     {
478         return Ptr<BackendWrapper>(new OpenCLBackendWrapper(baseBuffer, m));
479     }
480 
getUMatVector(const std::vector<Ptr<BackendWrapper>> & wrappers)481     static std::vector<UMat> getUMatVector(const std::vector<Ptr<BackendWrapper> >& wrappers)
482     {
483         const int numWrappers = wrappers.size();
484         std::vector<UMat> mats(wrappers.size());
485         for (int i = 0; i < numWrappers; ++i)
486         {
487             Ptr<OpenCLBackendWrapper> umatWrapper = wrappers[i].dynamicCast<OpenCLBackendWrapper>();
488             CV_Assert(!umatWrapper.empty());
489             umatWrapper->copyToDevice();
490             mats[i] = umatWrapper->umat;
491         }
492         return mats;
493     }
494 
495     // Replaces all umats in wrappers to specific ones.
update(const std::vector<Ptr<BackendWrapper>> & wrappers,const std::vector<UMat> & umats)496     static void update(const std::vector<Ptr<BackendWrapper> >& wrappers,
497                        const std::vector<UMat>& umats)
498     {
499         CV_Assert(wrappers.size() == umats.size());
500         for (int i = 0, n = umats.size(); i < n; ++i)
501         {
502             Ptr<OpenCLBackendWrapper> umatWrapper = wrappers[i].dynamicCast<OpenCLBackendWrapper>();
503             CV_Assert(!umatWrapper.empty());
504             umatWrapper->umat = umats[i];
505         }
506     }
507 
~OpenCLBackendWrapper()508     ~OpenCLBackendWrapper() {}
509 
510     // Copies data from device to a host memory.
copyToHost()511     virtual void copyToHost() CV_OVERRIDE
512     {
513         umat.copyTo(*host);
514     }
515 
setHostDirty()516     virtual void setHostDirty() CV_OVERRIDE
517     {
518         hostDirty = true;
519     };
520 
copyToDevice()521     void copyToDevice()
522     {
523         if (hostDirty)
524         {
525             host->copyTo(umat);
526             hostDirty = false;
527         }
528     }
529 
530 private:
531     UMat umat;
532     Mat* host;
533     bool hostDirty;
534 };
535 #endif
536 
537 struct LayerPin
538 {
539     int lid;
540     int oid;
541 
LayerPincv::dnn::LayerPin542     LayerPin(int layerId = -1, int outputId = -1)
543         : lid(layerId), oid(outputId) {}
544 
validcv::dnn::LayerPin545     bool valid() const
546     {
547         return (lid >= 0 && oid >= 0);
548     }
549 
equalcv::dnn::LayerPin550     bool equal(const LayerPin &r) const
551     {
552         return (lid == r.lid && oid == r.oid);
553     }
554 
operator <cv::dnn::LayerPin555     bool operator<(const LayerPin &r) const
556     {
557         return lid < r.lid || (lid == r.lid && oid < r.oid);
558     }
559 
operator ==cv::dnn::LayerPin560     bool operator ==(const LayerPin &r) const
561     {
562         return lid == r.lid && oid == r.oid;
563     }
564 };
565 
566 struct LayerData
567 {
LayerDatacv::dnn::LayerData568     LayerData() : id(-1), skip(false), flag(0) {}
LayerDatacv::dnn::LayerData569     LayerData(int _id, const String &_name, const String &_type, LayerParams &_params)
570         : id(_id), name(_name), type(_type), params(_params), skip(false), flag(0)
571     {
572         CV_TRACE_FUNCTION();
573 
574         //add logging info
575         params.name = name;
576         params.type = type;
577     }
578 
579     int id;
580     String name;
581     String type;
582     LayerParams params;
583 
584     std::vector<LayerPin> inputBlobsId;
585     std::set<int> inputLayersId;
586     std::set<int> requiredOutputs;
587     std::vector<LayerPin> consumers;
588     std::vector<Ptr<BackendWrapper> > outputBlobsWrappers;
589     std::vector<Ptr<BackendWrapper> > inputBlobsWrappers;
590     std::vector<Ptr<BackendWrapper> > internalBlobsWrappers;
591 
592 #ifdef HAVE_CUDA
593     /* output ids which must be transferred to the host in the background
594      * after the completion of the forward pass of the layer
595      */
596     std::vector<int> cudaD2HBackgroundTransfers;
597 #endif
598 
599     Ptr<Layer> layerInstance;
600     std::vector<Mat> outputBlobs;
601     std::vector<Mat*> inputBlobs;
602     std::vector<Mat> internals;
603     // Computation nodes of implemented backends (except DEFAULT).
604     std::map<int, Ptr<BackendNode> > backendNodes;
605     // Flag for skip layer computation for specific backend.
606     bool skip;
607 
608     int flag;
609 
getLayerInstancecv::dnn::LayerData610     Ptr<Layer> getLayerInstance()
611     {
612         CV_TRACE_FUNCTION();
613         CV_TRACE_ARG_VALUE(type, "type", type.c_str());
614 
615         if (layerInstance)
616             return layerInstance;
617 
618         layerInstance = LayerFactory::createLayerInstance(type, params);
619         if (!layerInstance)
620         {
621             CV_Error(Error::StsError, "Can't create layer \"" + name + "\" of type \"" + type + "\"");
622         }
623 
624         return layerInstance;
625     }
626 };
627 
628 //fake layer containing network input blobs
629 struct DataLayer : public Layer
630 {
DataLayercv::dnn::DataLayer631     DataLayer() : Layer()
632     {
633         skip = false;
634     }
635 
supportBackendcv::dnn::DataLayer636     virtual bool supportBackend(int backendId) CV_OVERRIDE
637     {
638         return backendId == DNN_BACKEND_OPENCV ||
639                (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && inputsData.size() == 1);
640     }
641 
forwardcv::dnn::DataLayer642     void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
643     {
644         CV_TRACE_FUNCTION();
645         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
646 
647         CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
648                    forward_ocl(inputs_arr, outputs_arr, internals_arr))
649 
650         if (outputs_arr.depth() == CV_16S)
651         {
652             forward_fallback(inputs_arr, outputs_arr, internals_arr);
653             return;
654         }
655 
656         std::vector<Mat> outputs, internals;
657         outputs_arr.getMatVector(outputs);
658         internals_arr.getMatVector(internals);
659 
660         // Supported modes:
661         // | Input type | Output type |
662         // |       fp32 |        fp32 |
663         // |      uint8 |        fp32 |
664         for (int i = 0; i < inputsData.size(); ++i)
665         {
666             double scale = scaleFactors[i];
667             Scalar& mean = means[i];
668             CV_Assert(mean == Scalar() || inputsData[i].size[1] <= 4);
669             CV_CheckTypeEQ(outputs[i].type(), CV_32FC1, "");
670 
671             bool singleMean = true;
672             for (int j = 1; j < std::min(4, inputsData[i].size[1]) && singleMean; ++j)
673             {
674                 singleMean = mean[j] == mean[j - 1];
675             }
676 
677             if (singleMean)
678             {
679                 inputsData[i].convertTo(outputs[i], CV_32F, scale, -mean[0] * scale);
680             }
681             else
682             {
683                 for (int n = 0; n < inputsData[i].size[0]; ++n)
684                     for (int c = 0; c < inputsData[i].size[1]; ++c)
685                     {
686                         Mat inp = getPlane(inputsData[i], n, c);
687                         Mat out = getPlane(outputs[i], n, c);
688                         inp.convertTo(out, CV_32F, scale, -mean[c] * scale);
689                     }
690             }
691         }
692     }
693 
694 #ifdef HAVE_OPENCL
695     std::vector<Mat> tmp_expressions;
forward_oclcv::dnn::DataLayer696     bool forward_ocl(InputArrayOfArrays, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
697     {
698         // Supported modes:
699         // | Input type | Output type |
700         // |       fp32 |        fp32 |
701         // |       fp32 |        fp16 |
702         // |      uint8 |        fp32 |
703         std::vector<UMat> outputs;
704         outputs_.getUMatVector(outputs);
705 
706         tmp_expressions.clear();
707         for (int i = 0; i < inputsData.size(); ++i)
708         {
709             Mat inputData = inputsData[i];
710 
711             double scale = scaleFactors[i];
712             Scalar& mean = means[i];
713 
714             CV_Assert(mean == Scalar() || inputsData[i].size[1] <= 4);
715             bool singleMean = true;
716             for (int j = 1; j < std::min(4, inputsData[i].size[1]) && singleMean; ++j)
717             {
718                 singleMean = mean[j] == mean[j - 1];
719             }
720 
721             if (outputs_.depth() == CV_16S)
722             {
723                 if (singleMean)
724                 {
725                     tmp_expressions.push_back(Mat(scale * (inputsData[i] - mean[0])));
726                     convertFp16(tmp_expressions.back(), outputs[i]);
727                 }
728                 else
729                 {
730                     for (int n = 0; n < inputsData[i].size[0]; ++n)
731                         for (int c = 0; c < inputsData[i].size[1]; ++c)
732                         {
733                             Mat inp = getPlane(inputsData[i], n, c);
734 
735                             std::vector<cv::Range> plane(4, Range::all());
736                             plane[0] = Range(n, n + 1);
737                             plane[1] = Range(c, c + 1);
738                             UMat out = outputs[i](plane).reshape(1, inp.dims, inp.size);
739 
740                             tmp_expressions.push_back(scale * (inp - mean[c]));
741                             convertFp16(tmp_expressions.back(), out);
742                         }
743                 }
744             }
745             else
746             {
747                 CV_Assert(outputs_.depth() == CV_32F);
748                 if (singleMean)
749                 {
750                     inputsData[i].convertTo(outputs[i], CV_32F, scale, -mean[0] * scale);
751                 }
752                 else
753                 {
754                     for (int n = 0; n < inputsData[i].size[0]; ++n)
755                         for (int c = 0; c < inputsData[i].size[1]; ++c)
756                         {
757                             Mat inp = getPlane(inputsData[i], n, c);
758 
759                             std::vector<cv::Range> plane(4, Range::all());
760                             plane[0] = Range(n, n + 1);
761                             plane[1] = Range(c, c + 1);
762                             UMat out = outputs[i](plane).reshape(1, inp.dims, inp.size);
763 
764                             inp.convertTo(out, CV_32F, scale, -mean[c] * scale);
765                         }
766                 }
767             }
768         }
769         return true;
770     }
771 #endif
772 
outputNameToIndexcv::dnn::DataLayer773     int outputNameToIndex(const String& tgtName) CV_OVERRIDE
774     {
775         int idx = (int)(std::find(outNames.begin(), outNames.end(), tgtName) - outNames.begin());
776         return (idx < (int)outNames.size()) ? idx : -1;
777     }
778 
setNamescv::dnn::DataLayer779     void setNames(const std::vector<String> &names)
780     {
781         outNames.assign(names.begin(), names.end());
782         shapes.clear(); shapes.resize(outNames.size());
783     }
784 
setInputShapecv::dnn::DataLayer785     void setInputShape(const String& tgtName, const MatShape& shape)
786     {
787         std::vector<String>::const_iterator it = std::find(outNames.begin(), outNames.end(), tgtName);
788         CV_Check(tgtName, it != outNames.end(), "Unknown input");
789         int idx = (int)(it - outNames.begin());
790 
791         CV_Assert(idx < (int)shapes.size());
792         CV_Check(tgtName, shapes[idx].empty(), "Input shape redefinition is not allowed");
793         shapes[idx] = shape;
794     }
795 
getMemoryShapescv::dnn::DataLayer796     bool getMemoryShapes(const std::vector<MatShape> &inputs,
797                          const int requiredOutputs,
798                          std::vector<MatShape> &outputs,
799                          std::vector<MatShape> &internals) const CV_OVERRIDE
800     {
801         CV_Assert(inputs.size() == requiredOutputs);
802         outputs.assign(inputs.begin(), inputs.end());
803         return false;
804     }
805 
finalizecv::dnn::DataLayer806     virtual void finalize(InputArrayOfArrays, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
807     {
808         std::vector<Mat> outputs;
809         outputs_arr.getMatVector(outputs);
810 
811         CV_Assert_N(outputs.size() == scaleFactors.size(), outputs.size() == means.size(),
812                   inputsData.size() == outputs.size());
813         skip = true;
814         for (int i = 0; skip && i < inputsData.size(); ++i)
815         {
816             if (inputsData[i].data != outputs[i].data || scaleFactors[i] != 1.0 || means[i] != Scalar())
817                 skip = false;
818         }
819     }
820 
821 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
initInfEnginecv::dnn::DataLayer822     virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
823     {
824         CV_CheckEQ(inputsData.size(), (size_t)1, "");
825         CV_CheckEQ(inputsData[0].dims, 4, "");
826         const size_t numChannels = inputsData[0].size[1];
827         CV_Assert(numChannels <= 4);
828 
829         // Scale
830         InferenceEngine::TensorDesc td(InferenceEngine::Precision::FP32, {numChannels},
831                                        InferenceEngine::Layout::C);
832         auto weights = InferenceEngine::make_shared_blob<float>(td);
833         weights->allocate();
834 
835         float* weight_buf = weights->buffer().as<float*>();
836         std::fill(weight_buf, weight_buf + numChannels, scaleFactors[0]);
837 
838         // Mean subtraction
839         auto biases = InferenceEngine::make_shared_blob<float>(td);
840         biases->allocate();
841         float* bias_buf = biases->buffer().as<float*>();
842 
843         for (int i = 0; i < numChannels; ++i)
844         {
845             bias_buf[i] = -means[0][i] * scaleFactors[0];
846         }
847 
848         InferenceEngine::Builder::Layer ieLayer = InferenceEngine::Builder::ScaleShiftLayer(name);
849         addConstantData("weights", weights, ieLayer);
850         addConstantData("biases", biases, ieLayer);
851         return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
852     }
853 #endif  // HAVE_DNN_IE_NN_BUILDER_2019
854 
855     std::vector<String> outNames;
856     std::vector<MatShape> shapes;
857     // Preprocessing parameters for each network's input.
858     std::vector<double> scaleFactors;
859     std::vector<Scalar> means;
860     std::vector<Mat> inputsData;
861     bool skip;
862 };
863 
864 struct BlobManager
865 {
866 public:
867     // Increase references counter to layer output.
addReferencecv::dnn::BlobManager868     void addReference(const LayerPin& lp)
869     {
870         std::map<LayerPin, int>::iterator it = refCounter.find(lp);
871         if (it == refCounter.end())
872             refCounter[lp] = 1;
873         else
874             it->second += 1;
875     }
876 
addReferencescv::dnn::BlobManager877     void addReferences(const std::vector<LayerPin>& pins)
878     {
879         for (int i = 0; i < pins.size(); i++)
880         {
881             addReference(pins[i]);
882         }
883     }
884 
885     // Returns number of references to allocated memory that used in specific
886     // layer blob.
numReferencescv::dnn::BlobManager887     int numReferences(const LayerPin& lp)
888     {
889         std::map<LayerPin, LayerPin>::iterator mapIt = reuseMap.find(lp);
890         CV_Assert(mapIt != reuseMap.end());
891         LayerPin memHost = mapIt->second;
892 
893         std::map<LayerPin, int>::iterator refIt = refCounter.find(memHost);
894         CV_Assert(refIt != refCounter.end());
895         return refIt->second;
896     }
897 
898     // Reuse data allocated in <host> inside the <user> blob.
reusecv::dnn::BlobManager899     void reuse(const LayerPin& host, const LayerPin& user)
900     {
901         CV_Assert(reuseMap.find(user) == reuseMap.end());
902         CV_Assert(reuseMap.find(host) != reuseMap.end());
903         LayerPin memHost = reuseMap[host];
904         reuseMap[user] = memHost;
905         if (refCounter.find(memHost) != refCounter.end())
906         {
907             std::map<LayerPin, int>::iterator userRefIt = refCounter.find(user);
908             if (userRefIt != refCounter.end())
909             {
910                 refCounter[memHost] += userRefIt->second;
911                 refCounter.erase(userRefIt);
912             }
913             else
914                 refCounter[memHost] += 1;
915         }
916     }
917 
918     // Decrease references counter to allocated memory inside specific blob.
releaseReferencecv::dnn::BlobManager919     void releaseReference(const LayerPin& lp)
920     {
921         std::map<LayerPin, LayerPin>::iterator mapIt = reuseMap.find(lp);
922         CV_Assert(mapIt != reuseMap.end());
923 
924         std::map<LayerPin, int>::iterator refIt = refCounter.find(mapIt->second);
925         CV_Assert(refIt != refCounter.end());
926         CV_Assert(refIt->second > 0);
927         refIt->second -= 1;
928     }
929 
releaseReferencescv::dnn::BlobManager930     void releaseReferences(const std::vector<LayerPin>& pins)
931     {
932         for (int i = 0; i < pins.size(); i++)
933         {
934             releaseReference(pins[i]);
935         }
936     }
937 
reuseOrCreatecv::dnn::BlobManager938     void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst, bool use_half)
939     {
940         if (!DNN_DISABLE_MEMORY_OPTIMIZATIONS)
941         {
942             Mat bestBlob;
943             LayerPin bestBlobPin;
944 
945             std::map<LayerPin, Mat>::iterator hostIt;
946             std::map<LayerPin, int>::iterator refIt;
947 
948             const int targetTotal = total(shape);
949             int bestBlobTotal = INT_MAX;
950 
951             for (hostIt = memHosts.begin(); hostIt != memHosts.end(); ++hostIt)
952             {
953                 refIt = refCounter.find(hostIt->first);
954                 // Use only blobs that had references before because if not,
955                 // it might be used as output.
956                 if (refIt != refCounter.end() && refIt->second == 0)
957                 {
958                     Mat& unusedBlob = hostIt->second;
959                     if (unusedBlob.total() >= targetTotal &&
960                         unusedBlob.total() < bestBlobTotal)
961                     {
962                         bestBlobPin = hostIt->first;
963                         bestBlob = unusedBlob;
964                         bestBlobTotal = unusedBlob.total();
965                     }
966                 }
967             }
968             if (!bestBlob.empty())
969             {
970                 reuse(bestBlobPin, lp);
971                 dst = bestBlob.reshape(1, 1).colRange(0, targetTotal).reshape(1, shape);
972                 return;
973             }
974         }
975 
976         {
977             // if dst already has been allocated with total(shape) elements,
978             // it won't be recreated and pointer of dst.data remains the same.
979             dst.create(shape, use_half ? CV_16S : CV_32F);
980             addHost(lp, dst);
981         }
982     }
983 
allocateBlobsForLayercv::dnn::BlobManager984     void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes,
985                                std::vector<LayerPin>& pinsForInternalBlobs,
986                                bool use_half = false)
987     {
988         CV_TRACE_FUNCTION();
989 
990         pinsForInternalBlobs.clear();
991 
992         std::vector<Mat>& outputBlobs = ld.outputBlobs,
993                 &internalBlobs = ld.internals;
994 
995         const ShapesVec& outShapes = layerShapes.out,
996                 internalShapes = layerShapes.internal;
997 
998         outputBlobs.resize(std::max((size_t)1, outShapes.size())); //layer produce at least one output blob
999         internalBlobs.resize(internalShapes.size());
1000 
1001         CV_Assert(ld.requiredOutputs.size() <= outShapes.size());
1002 
1003         // Check that layer could work in-place.
1004         bool inPlace = false;
1005         if (layerShapes.supportInPlace)
1006         {
1007             if (ld.inputBlobs.size() == 1)
1008             {
1009                 // Get number of references to the input memory.
1010                 int numRef = numReferences(ld.inputBlobsId[0]);
1011                 // If current layer is one and only customer of this blob.
1012                 inPlace = numRef == 1;
1013             }
1014         }
1015 
1016         ShapesVec shapes(outShapes);
1017         shapes.insert(shapes.end(), internalShapes.begin(), internalShapes.end());
1018         std::vector<Mat*> blobs;
1019         for(int i = 0; i < outputBlobs.size(); i++)
1020         {
1021             blobs.push_back(&outputBlobs[i]);
1022         }
1023 
1024         for(int i = 0; i < internalBlobs.size(); i++)
1025         {
1026             blobs.push_back(&internalBlobs[i]);
1027             if (total(internalShapes[i]))
1028             {
1029                 pinsForInternalBlobs.push_back(LayerPin(ld.id, ld.outputBlobs.size() + i));
1030             }
1031         }
1032 
1033         addReferences(pinsForInternalBlobs);
1034 
1035         std::map<int, std::vector<int> > idxSizes;
1036         for(int i = 0; i < shapes.size(); i++)
1037         {
1038             idxSizes[total(shapes[i])].push_back(i);
1039         }
1040 
1041         std::map<int, std::vector<int> >::reverse_iterator it;
1042         for(it = idxSizes.rbegin(); it != idxSizes.rend(); it++)
1043         {
1044             for(int j = 0; j < it->second.size(); j++)
1045             {
1046                 int index = it->second[j];
1047                 if (total(shapes[index]))
1048                 {
1049                     LayerPin blobPin(ld.id, index);
1050                     if (index < outShapes.size() && inPlace)
1051                     {
1052                         CV_Assert(ld.inputBlobs[0]->total() == total(shapes[index]));
1053                         ld.outputBlobs[index] = ld.inputBlobs[0]->reshape(1, shapes[index]);
1054                         reuse(ld.inputBlobsId[0], blobPin);
1055                     }
1056                     else
1057                         reuseOrCreate(shapes[index], blobPin, *blobs[index], use_half);
1058                 }
1059             }
1060         }
1061     }
1062 
1063     // Clear internal state. Calls before an every reallocation.
resetcv::dnn::BlobManager1064     void reset()
1065     {
1066         CV_TRACE_FUNCTION();
1067 
1068         refCounter.clear();
1069         reuseMap.clear();
1070         memHosts.clear();
1071     }
1072 
1073 private:
1074     // Register allocated memory.
addHostcv::dnn::BlobManager1075     void addHost(const LayerPin& lp, const Mat& mat)
1076     {
1077         CV_Assert(memHosts.find(lp) == memHosts.end());
1078         reuseMap[lp] = lp;
1079         memHosts[lp] = mat;
1080     }
1081 
1082     std::map<LayerPin, int> refCounter;
1083     // Maps pin to origin blob (for whom memory was allocated firstly).
1084     // For origin blobs key == value.
1085     std::map<LayerPin, LayerPin> reuseMap;
1086     std::map<LayerPin, Mat> memHosts;
1087 };
1088 
wrapMat(int backendId,int targetId,cv::Mat & m)1089 static Ptr<BackendWrapper> wrapMat(int backendId, int targetId, cv::Mat& m)
1090 {
1091     if (backendId == DNN_BACKEND_OPENCV)
1092     {
1093         if (targetId == DNN_TARGET_CPU)
1094             return Ptr<BackendWrapper>();
1095 #ifdef HAVE_OPENCL
1096         else if (IS_DNN_OPENCL_TARGET(targetId))
1097             return OpenCLBackendWrapper::create(m);
1098 #endif
1099         else
1100             CV_Error(Error::StsNotImplemented, "Unknown/unsupported target identifier");
1101     }
1102     else if (backendId == DNN_BACKEND_HALIDE)
1103     {
1104         CV_Assert(haveHalide());
1105 #ifdef HAVE_HALIDE
1106         return Ptr<BackendWrapper>(new HalideBackendWrapper(targetId, m));
1107 #endif  // HAVE_HALIDE
1108     }
1109     else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
1110     {
1111 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
1112         return Ptr<BackendWrapper>(new InfEngineBackendWrapper(targetId, m));
1113 #else
1114         CV_Error(Error::StsNotImplemented, "This OpenCV version is built without Inference Engine NN Builder API support");
1115 #endif
1116     }
1117     else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
1118     {
1119 #ifdef HAVE_DNN_NGRAPH
1120         return Ptr<BackendWrapper>(new NgraphBackendWrapper(targetId, m));
1121 #else
1122         CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of Inference Engine + nGraph");
1123 #endif
1124     }
1125     else if (backendId == DNN_BACKEND_VKCOM)
1126     {
1127         CV_Assert(haveVulkan());
1128 #ifdef HAVE_VULKAN
1129         return Ptr<BackendWrapper>(new VkComBackendWrapper(m));
1130 #endif  // HAVE_VULKAN
1131     }
1132     else if (backendId == DNN_BACKEND_CUDA)
1133     {
1134         CV_Assert(haveCUDA());
1135 
1136 #ifdef HAVE_CUDA
1137         switch (targetId)
1138         {
1139         case DNN_TARGET_CUDA:
1140             return CUDABackendWrapperFP32::create(m);
1141         case DNN_TARGET_CUDA_FP16:
1142             return CUDABackendWrapperFP16::create(m);
1143         default:
1144             CV_Assert(IS_DNN_CUDA_TARGET(targetId));
1145         }
1146 #endif
1147     }
1148     else
1149         CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
1150     return Ptr<BackendWrapper>();  // TODO Error?
1151 }
1152 
1153 static int g_networkId = 0;
1154 
NetImplBase()1155 detail::NetImplBase::NetImplBase()
1156     : networkId(CV_XADD(&g_networkId, 1))
1157     , networkDumpCounter(0)
1158     , dumpLevel(DNN_NETWORK_DUMP)
1159 {
1160     // nothing
1161 }
1162 
getDumpFileNameBase()1163 std::string detail::NetImplBase::getDumpFileNameBase()
1164 {
1165     std::string dumpFileNameBase = cv::format("ocv_dnn_net_%05d_%02d", networkId, networkDumpCounter++);
1166     return dumpFileNameBase;
1167 }
1168 
1169 struct Net::Impl : public detail::NetImplBase
1170 {
1171     typedef std::map<int, LayerShapes> LayersShapesMap;
1172     typedef std::map<int, LayerData> MapIdToLayerData;
1173 
Implcv::dnn::Net::Impl1174     Impl()
1175     {
1176         //allocate fake net input layer
1177         netInputLayer = Ptr<DataLayer>(new DataLayer());
1178         LayerData &inpl = layers.insert( make_pair(0, LayerData()) ).first->second;
1179         inpl.id = 0;
1180         netInputLayer->name = inpl.name = "_input";
1181         inpl.type = "__NetInputLayer__";
1182         inpl.layerInstance = netInputLayer;
1183         layerNameToId.insert(std::make_pair(inpl.name, inpl.id));
1184 
1185         lastLayerId = 0;
1186         netWasAllocated = false;
1187         fusion = true;
1188         isAsync = false;
1189         preferableBackend = DNN_BACKEND_DEFAULT;
1190         preferableTarget = DNN_TARGET_CPU;
1191         skipInfEngineInit = false;
1192         hasDynamicShapes = false;
1193     }
1194 
1195     Ptr<DataLayer> netInputLayer;
1196     std::vector<LayerPin> blobsToKeep;
1197     MapIdToLayerData layers;
1198     std::map<String, int> layerNameToId;
1199     BlobManager blobManager;
1200     int preferableBackend;
1201     int preferableTarget;
1202     String halideConfigFile;
1203     bool skipInfEngineInit;
1204     bool hasDynamicShapes;
1205     // Map host data to backend specific wrapper.
1206     std::map<void*, Ptr<BackendWrapper> > backendWrappers;
1207 
1208     int lastLayerId;
1209 
1210     bool netWasAllocated;
1211     bool fusion;
1212     bool isAsync;
1213     std::vector<int64> layersTimings;
1214     Mat output_blob;
1215 
1216 #ifdef HAVE_CUDA
1217     struct CudaInfo_t
1218     {
CudaInfo_tcv::dnn::Net::Impl::CudaInfo_t1219         CudaInfo_t(cuda4dnn::csl::CSLContext ctxt, cuda4dnn::csl::Stream d2h_stream_)
1220          : context(std::move(ctxt)), d2h_stream(std::move(d2h_stream_)) { }
1221         cuda4dnn::csl::CSLContext context;
1222         cuda4dnn::csl::Stream d2h_stream;
1223         cuda4dnn::csl::Workspace workspace;
1224     };
1225 
1226     std::unique_ptr<CudaInfo_t> cudaInfo;
1227 #endif
1228 
wrapcv::dnn::Net::Impl1229     Ptr<BackendWrapper> wrap(Mat& host)
1230     {
1231         if (preferableBackend == DNN_BACKEND_OPENCV && preferableTarget == DNN_TARGET_CPU)
1232             return Ptr<BackendWrapper>();
1233 
1234         MatShape shape(host.dims);
1235         for (int i = 0; i < host.dims; ++i)
1236             shape[i] = host.size[i];
1237 
1238         void* data = host.data;
1239         if (backendWrappers.find(data) != backendWrappers.end())
1240         {
1241             Ptr<BackendWrapper> baseBuffer = backendWrappers[data];
1242             if (preferableBackend == DNN_BACKEND_OPENCV)
1243             {
1244 #ifdef HAVE_OPENCL
1245                 CV_Assert(IS_DNN_OPENCL_TARGET(preferableTarget));
1246                 return OpenCLBackendWrapper::create(baseBuffer, host);
1247 #else
1248                 CV_Error(Error::StsInternal, "");
1249 #endif
1250             }
1251             else if (preferableBackend == DNN_BACKEND_HALIDE)
1252             {
1253                 CV_Assert(haveHalide());
1254 #ifdef HAVE_HALIDE
1255                 return Ptr<BackendWrapper>(new HalideBackendWrapper(baseBuffer, shape));
1256 #endif
1257             }
1258             else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
1259             {
1260                 return wrapMat(preferableBackend, preferableTarget, host);
1261             }
1262             else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
1263             {
1264                 return wrapMat(preferableBackend, preferableTarget, host);
1265             }
1266             else if (preferableBackend == DNN_BACKEND_VKCOM)
1267             {
1268   #ifdef HAVE_VULKAN
1269                 return Ptr<BackendWrapper>(new VkComBackendWrapper(baseBuffer, host));
1270   #endif
1271             }
1272             else if (preferableBackend == DNN_BACKEND_CUDA)
1273             {
1274                 CV_Assert(haveCUDA());
1275 #ifdef HAVE_CUDA
1276                 switch (preferableTarget)
1277                 {
1278                 case DNN_TARGET_CUDA:
1279                     return CUDABackendWrapperFP32::create(baseBuffer, shape);
1280                 case DNN_TARGET_CUDA_FP16:
1281                     return CUDABackendWrapperFP16::create(baseBuffer, shape);
1282                 default:
1283                     CV_Assert(IS_DNN_CUDA_TARGET(preferableTarget));
1284                 }
1285 #endif
1286             }
1287             else
1288                 CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
1289         }
1290 
1291         Ptr<BackendWrapper> wrapper = wrapMat(preferableBackend, preferableTarget, host);
1292         backendWrappers[data] = wrapper;
1293         return wrapper;
1294     }
1295 
1296 #ifdef HAVE_HALIDE
compileHalidecv::dnn::Net::Impl1297     void compileHalide()
1298     {
1299         CV_TRACE_FUNCTION();
1300 
1301         CV_Assert(preferableBackend == DNN_BACKEND_HALIDE);
1302 
1303         HalideScheduler scheduler(halideConfigFile);
1304         std::vector< std::reference_wrapper<LayerData> > compileList; compileList.reserve(64);
1305         for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it)
1306         {
1307             LayerData &ld = it->second;
1308             Ptr<Layer> layer = ld.layerInstance;
1309             if (layer->supportBackend(DNN_BACKEND_HALIDE) && !ld.skip)
1310             {
1311                 CV_Assert(!ld.backendNodes[DNN_BACKEND_HALIDE].empty());
1312                 bool scheduled = scheduler.process(ld.backendNodes[DNN_BACKEND_HALIDE]);
1313                 if (!scheduled)
1314                 {
1315                     // Use automatic scheduling provided by layer.
1316                     layer->applyHalideScheduler(ld.backendNodes[DNN_BACKEND_HALIDE],
1317                                                 ld.inputBlobs, ld.outputBlobs,
1318                                                 preferableTarget);
1319                 }
1320                 compileList.emplace_back(ld);
1321             }
1322         }
1323         std::atomic<int> progress(0);
1324         auto fn = ([&] () -> void
1325         {
1326             for (;;)
1327             {
1328                 int id = progress.fetch_add(1);
1329                 if ((size_t)id >= compileList.size())
1330                     return;
1331                 const LayerData& ld = compileList[id].get();
1332                 Ptr<BackendNode> node = ld.backendNodes.find(DNN_BACKEND_HALIDE)->second;
1333                 dnn::compileHalide(ld.outputBlobs, node, preferableTarget);
1334             }
1335         });
1336         size_t num_threads = std::min(compileList.size(), (size_t)std::thread::hardware_concurrency());
1337         num_threads = std::max((size_t)1u, std::min((size_t)8u, num_threads));
1338         std::vector<std::thread> threads(num_threads - 1);
1339         for (auto& t: threads) t = std::thread(fn);
1340         fn(); // process own tasks
1341         for (auto& t: threads) t.join();
1342     }
1343 #endif
1344 
clearcv::dnn::Net::Impl1345     void clear()
1346     {
1347         CV_TRACE_FUNCTION();
1348 
1349         MapIdToLayerData::iterator it;
1350         for (it = layers.begin(); it != layers.end(); it++)
1351         {
1352             if (it->second.id != 0) {
1353                 it->second.inputBlobs.clear();
1354                 it->second.outputBlobs.clear();
1355                 it->second.internals.clear();
1356             }
1357             it->second.skip = false;
1358             //it->second.consumers.clear();
1359             Ptr<Layer> currLayer = it->second.layerInstance;
1360 
1361             if( currLayer.empty() )
1362                 continue;
1363 
1364             currLayer->unsetAttached();
1365         }
1366 
1367         layersTimings.clear();
1368     }
1369 
setUpNetcv::dnn::Net::Impl1370     void setUpNet(const std::vector<LayerPin>& blobsToKeep_ = std::vector<LayerPin>())
1371     {
1372         CV_TRACE_FUNCTION();
1373 
1374         if (dumpLevel && networkDumpCounter == 0)
1375         {
1376             dumpNetworkToFile();
1377         }
1378 
1379         if (preferableBackend == DNN_BACKEND_DEFAULT)
1380             preferableBackend = (Backend)PARAM_DNN_BACKEND_DEFAULT;
1381 #ifdef HAVE_INF_ENGINE
1382         if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE)
1383             preferableBackend = getInferenceEngineBackendTypeParam();
1384 #endif
1385 
1386         CV_Assert(preferableBackend != DNN_BACKEND_OPENCV ||
1387                   preferableTarget == DNN_TARGET_CPU ||
1388                   preferableTarget == DNN_TARGET_OPENCL ||
1389                   preferableTarget == DNN_TARGET_OPENCL_FP16);
1390         CV_Assert(preferableBackend != DNN_BACKEND_HALIDE ||
1391                   preferableTarget == DNN_TARGET_CPU ||
1392                   preferableTarget == DNN_TARGET_OPENCL);
1393 #ifdef HAVE_INF_ENGINE
1394         if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
1395             preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
1396         {
1397             CV_Assert(
1398                   (preferableTarget == DNN_TARGET_CPU && (!isArmComputePlugin() || preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)) ||
1399                   preferableTarget == DNN_TARGET_OPENCL ||
1400                   preferableTarget == DNN_TARGET_OPENCL_FP16 ||
1401                   preferableTarget == DNN_TARGET_MYRIAD ||
1402                   preferableTarget == DNN_TARGET_HDDL ||
1403                   preferableTarget == DNN_TARGET_FPGA
1404             );
1405         }
1406 #endif
1407         CV_Assert(preferableBackend != DNN_BACKEND_VKCOM ||
1408                   preferableTarget == DNN_TARGET_VULKAN);
1409         CV_Assert(preferableBackend != DNN_BACKEND_CUDA ||
1410                   IS_DNN_CUDA_TARGET(preferableTarget));
1411         if (!netWasAllocated || this->blobsToKeep != blobsToKeep_)
1412         {
1413             if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
1414 #ifndef HAVE_OPENCL
1415             {
1416                 CV_LOG_WARNING(NULL, "DNN: OpenCL target is not available in this OpenCV build, switching to CPU.");
1417                 preferableTarget = DNN_TARGET_CPU;
1418             }
1419 #else
1420             {
1421                 if (!DNN_OPENCL_ALLOW_ALL_DEVICES)
1422                 {
1423                     // Current implementation is only valid for GPU (#11494)
1424                     if (ocl::Device::getDefault().type() != ocl::Device::TYPE_GPU)
1425                     {
1426                         CV_LOG_WARNING(NULL, "DNN: OpenCL target is not supported with current OpenCL device (tested with GPUs only), switching to CPU.");
1427                         preferableTarget = DNN_TARGET_CPU;
1428                     }
1429                     else if (preferableTarget == DNN_TARGET_OPENCL_FP16 && !ocl::Device::getDefault().isIntel())
1430                     {
1431                         CV_LOG_WARNING(NULL,
1432                             "DNN: OpenCL target with fp16 precision is not supported "
1433                             "with current OpenCL device (tested with Intel GPUs only), "
1434                             "switching to OpenCL with fp32 precision.");
1435                         preferableTarget = DNN_TARGET_OPENCL;
1436                     }
1437                 }
1438             }
1439 #endif
1440             if (preferableBackend == DNN_BACKEND_VKCOM && !haveVulkan())
1441             {
1442                 preferableBackend = DNN_BACKEND_OPENCV;
1443                 preferableTarget = DNN_TARGET_CPU;
1444             }
1445 
1446             if (preferableBackend == DNN_BACKEND_CUDA && !haveCUDA())
1447             {
1448 #ifdef HAVE_CUDA
1449                 CV_LOG_WARNING(NULL, "unable to use CUDA backend; switching to CPU");
1450 #else
1451                 CV_LOG_WARNING(NULL, "DNN module was not built with CUDA backend; switching to CPU");
1452 #endif
1453                 preferableBackend = DNN_BACKEND_OPENCV;
1454                 preferableTarget = DNN_TARGET_CPU;
1455             }
1456 
1457             clear();
1458 
1459             this->blobsToKeep = blobsToKeep_;
1460 
1461             allocateLayers(blobsToKeep_);
1462 
1463             MapIdToLayerData::iterator it = layers.find(0);
1464             CV_Assert(it != layers.end());
1465             it->second.skip = netInputLayer->skip;
1466 
1467             initBackend(blobsToKeep_);
1468 
1469             if (!netWasAllocated)
1470             {
1471 #ifdef HAVE_HALIDE
1472                 if (preferableBackend == DNN_BACKEND_HALIDE)
1473                     compileHalide();
1474 #else
1475                 CV_Assert(preferableBackend != DNN_BACKEND_HALIDE);
1476 #endif
1477             }
1478 
1479             netWasAllocated = true;
1480 
1481             if (dumpLevel)
1482             {
1483                 dumpNetworkToFile();
1484             }
1485         }
1486     }
1487 
getLayerIdcv::dnn::Net::Impl1488     int getLayerId(const String &layerName)
1489     {
1490         std::map<String, int>::iterator it = layerNameToId.find(layerName);
1491         return (it != layerNameToId.end()) ? it->second : -1;
1492     }
1493 
getLayerIdcv::dnn::Net::Impl1494     int getLayerId(int id)
1495     {
1496         MapIdToLayerData::iterator it = layers.find(id);
1497         return (it != layers.end()) ? id : -1;
1498     }
1499 
getLayerIdcv::dnn::Net::Impl1500     int getLayerId(DictValue &layerDesc)
1501     {
1502         if (layerDesc.isInt())
1503             return getLayerId(layerDesc.get<int>());
1504         else if (layerDesc.isString())
1505             return getLayerId(layerDesc.get<String>());
1506 
1507         CV_Assert(layerDesc.isInt() || layerDesc.isString());
1508         return -1;
1509     }
1510 
getLayerNamecv::dnn::Net::Impl1511     String getLayerName(int id)
1512     {
1513         MapIdToLayerData::iterator it = layers.find(id);
1514         return (it != layers.end()) ? it->second.name : "(unknown layer)";
1515     }
1516 
getLayerDatacv::dnn::Net::Impl1517     LayerData& getLayerData(int id)
1518     {
1519         MapIdToLayerData::iterator it = layers.find(id);
1520 
1521         if (it == layers.end())
1522             CV_Error(Error::StsObjectNotFound, format("Layer with requested id=%d not found", id));
1523 
1524         return it->second;
1525     }
1526 
getLayerDatacv::dnn::Net::Impl1527     LayerData& getLayerData(const String &layerName)
1528     {
1529         int id = getLayerId(layerName);
1530 
1531         if (id < 0)
1532             CV_Error(Error::StsError, "Requested layer \"" + layerName + "\" not found");
1533 
1534         return getLayerData(id);
1535     }
1536 
getLayerDatacv::dnn::Net::Impl1537     LayerData& getLayerData(const DictValue &layerDesc)
1538     {
1539         CV_Assert(layerDesc.isInt() || layerDesc.isString());
1540         if (layerDesc.isInt())
1541             return getLayerData(layerDesc.get<int>());
1542         else /*if (layerDesc.isString())*/
1543             return getLayerData(layerDesc.get<String>());
1544     }
1545 
addLayerInputcv::dnn::Net::Impl1546     static void addLayerInput(LayerData &ld, int inNum, LayerPin from)
1547     {
1548         if ((int)ld.inputBlobsId.size() <= inNum)
1549         {
1550             ld.inputBlobsId.resize(inNum + 1);
1551         }
1552         else
1553         {
1554             LayerPin storedFrom = ld.inputBlobsId[inNum];
1555             if (storedFrom.valid() && !storedFrom.equal(from))
1556                 CV_Error(Error::StsError, format("Input #%d of layer \"%s\" already was connected",
1557                                                  inNum, ld.name.c_str()));
1558         }
1559 
1560         ld.inputBlobsId[inNum] = from;
1561     }
1562 
resolvePinOutputNamecv::dnn::Net::Impl1563     int resolvePinOutputName(LayerData &ld, const String &outName)
1564     {
1565         if (outName.empty())
1566             return 0;
1567         return ld.getLayerInstance()->outputNameToIndex(outName);
1568     }
1569 
getPinByAliascv::dnn::Net::Impl1570     LayerPin getPinByAlias(const String &layerName)
1571     {
1572         LayerPin pin;
1573         pin.lid = (layerName.empty()) ? 0 : getLayerId(layerName);
1574 
1575         if (pin.lid >= 0)
1576             pin.oid = resolvePinOutputName(getLayerData(pin.lid), layerName);
1577 
1578         return pin;
1579     }
1580 
getLayerOutPinscv::dnn::Net::Impl1581     std::vector<LayerPin> getLayerOutPins(const String &layerName)
1582     {
1583         int lid = (layerName.empty()) ? 0 : getLayerId(layerName);
1584 
1585         std::vector<LayerPin> pins;
1586 
1587         for (int i = 0; i < layers[lid].outputBlobs.size(); i++)
1588         {
1589             pins.push_back(LayerPin(lid, i));
1590         }
1591 
1592         return pins;
1593     }
1594 
connectcv::dnn::Net::Impl1595     void connect(int outLayerId, int outNum, int inLayerId, int inNum)
1596     {
1597         CV_Assert(outLayerId < inLayerId);
1598         LayerData &ldOut = getLayerData(outLayerId);
1599         LayerData &ldInp = getLayerData(inLayerId);
1600 
1601         addLayerInput(ldInp, inNum, LayerPin(outLayerId, outNum));
1602         ldOut.requiredOutputs.insert(outNum);
1603         ldOut.consumers.push_back(LayerPin(inLayerId, outNum));
1604     }
1605 
initBackendcv::dnn::Net::Impl1606     void initBackend(const std::vector<LayerPin>& blobsToKeep_)
1607     {
1608         CV_TRACE_FUNCTION();
1609         if (preferableBackend == DNN_BACKEND_OPENCV)
1610         {
1611             CV_Assert(preferableTarget == DNN_TARGET_CPU || IS_DNN_OPENCL_TARGET(preferableTarget));
1612         }
1613         else if (preferableBackend == DNN_BACKEND_HALIDE)
1614             initHalideBackend();
1615         else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
1616         {
1617 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
1618             initInfEngineBackend(blobsToKeep_);
1619 #else
1620             CV_Assert(false && "This OpenCV version is built without Inference Engine NN Builder API support");
1621 #endif
1622         }
1623         else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
1624         {
1625 #ifdef HAVE_DNN_NGRAPH
1626             initNgraphBackend(blobsToKeep_);
1627 #else
1628             CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of Inference Engine + nGraph");
1629 #endif
1630         }
1631         else if (preferableBackend == DNN_BACKEND_VKCOM)
1632             initVkComBackend();
1633         else if (preferableBackend == DNN_BACKEND_CUDA)
1634             initCUDABackend(blobsToKeep_);
1635         else
1636             CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
1637     }
1638 
initHalideBackendcv::dnn::Net::Impl1639     void initHalideBackend()
1640     {
1641         CV_TRACE_FUNCTION();
1642         CV_Assert_N(preferableBackend == DNN_BACKEND_HALIDE, haveHalide());
1643 
1644         // Iterator to current layer.
1645         MapIdToLayerData::iterator it = layers.begin();
1646         // Iterator to base layer for fusion. In example, in case of conv+bn+relu
1647         // it'll be a conv layer.
1648         MapIdToLayerData::iterator baseIt = layers.begin();
1649         for (; it != layers.end(); it++)
1650         {
1651             LayerData &ldTop = it->second;
1652             Ptr<Layer> layerTop = ldTop.layerInstance;
1653             if (!layerTop->supportBackend(preferableBackend))
1654             {
1655                 // Move base iterator to layer that don't support preferable
1656                 // backend to prevent fusion over layer of different backend.
1657                 baseIt = it;
1658                 continue;
1659             }
1660             // Try to do layers fusion.
1661             LayerData &ldBot = baseIt->second;
1662             Ptr<Layer> layerBot = ldBot.layerInstance;
1663             // 1. Check that bottom and top from the same backends.
1664             if (it != layers.begin() && layerBot->supportBackend(preferableBackend))
1665             {
1666                 // 2. Check that current layer works in-place.
1667                 bool inPlace = ldTop.inputBlobs.size() == 1 &&
1668                                ldBot.outputBlobs.size() == 1 &&
1669                                ldTop.inputBlobs[0]->data ==
1670                                ldBot.outputBlobs[0].data;
1671                 if (inPlace)
1672                 {
1673                     // 3. Try to attach node.
1674                     CV_Assert(!ldBot.backendNodes[preferableBackend].empty());
1675                     Ptr<BackendNode> fusedNode =
1676                         layerTop->tryAttach(ldBot.backendNodes[preferableBackend]);
1677                     if (!fusedNode.empty())
1678                     {
1679                         ldTop.skip = true;
1680                         ldBot.backendNodes[preferableBackend] = fusedNode;
1681                         ldBot.outputBlobsWrappers = ldTop.outputBlobsWrappers;
1682                         continue;
1683                     }
1684                 }
1685             }
1686             // No layers fusion.
1687             ldTop.skip = false;
1688             ldTop.backendNodes[DNN_BACKEND_HALIDE] =
1689                 layerTop->initHalide(ldTop.inputBlobsWrappers);
1690             baseIt = it;
1691         }
1692     }
1693 
1694 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
1695     // Before launching Inference Engine graph we need to specify output blobs.
1696     // This function requests output blobs based on inputs references of
1697     // layers from default backend or layers from different graphs.
addInfEngineNetOutputscv::dnn::Net::Impl1698     void addInfEngineNetOutputs(LayerData &ld)
1699     {
1700         CV_TRACE_FUNCTION();
1701         Ptr<InfEngineBackendNet> layerNet;
1702         if (ld.backendNodes.find(preferableBackend) != ld.backendNodes.end())
1703         {
1704             Ptr<BackendNode> node = ld.backendNodes[preferableBackend];
1705             if (!node.empty())
1706             {
1707                 Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
1708                 CV_Assert(!ieNode.empty()); CV_Assert(!ieNode->net.empty());
1709                 layerNet = ieNode->net;
1710             }
1711         }
1712         // For an every input reference we check that it belongs to one of
1713         // the Inference Engine backend graphs. Request an output blob if it is.
1714         // Do nothing if layer's input is from the same graph.
1715         for (int i = 0; i < ld.inputBlobsId.size(); ++i)
1716         {
1717             LayerData &inpLd = layers[ld.inputBlobsId[i].lid];
1718             Ptr<BackendNode> inpNode = inpLd.backendNodes[preferableBackend];
1719             if (!inpNode.empty())
1720             {
1721                 Ptr<InfEngineBackendNode> ieInpNode = inpNode.dynamicCast<InfEngineBackendNode>();
1722                 CV_Assert(!ieInpNode.empty()); CV_Assert(!ieInpNode->net.empty());
1723                 if (layerNet != ieInpNode->net)
1724                 {
1725                     // layerNet is empty or nodes are from different graphs.
1726                     ieInpNode->net->addOutput(ieInpNode->layer.getName());
1727                 }
1728             }
1729         }
1730     }
1731 
initInfEngineBackendcv::dnn::Net::Impl1732     void initInfEngineBackend(const std::vector<LayerPin>& blobsToKeep_)
1733     {
1734         CV_TRACE_FUNCTION();
1735         CV_Assert_N(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, haveInfEngine());
1736         MapIdToLayerData::iterator it;
1737         Ptr<InfEngineBackendNet> net;
1738 
1739         for (it = layers.begin(); it != layers.end(); ++it)
1740         {
1741             LayerData &ld = it->second;
1742             if (ld.id == 0)
1743             {
1744                 CV_Assert((netInputLayer->outNames.empty() && ld.outputBlobsWrappers.size() == 1) ||
1745                           (netInputLayer->outNames.size() == ld.outputBlobsWrappers.size()));
1746                 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
1747                 {
1748                     InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]);
1749 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2019010000)
1750                     dataPtr->name = netInputLayer->outNames.empty() ? ld.name : netInputLayer->outNames[i];
1751 #else
1752                     dataPtr->setName(netInputLayer->outNames.empty() ? ld.name : netInputLayer->outNames[i]);
1753 #endif
1754                 }
1755             }
1756             else
1757             {
1758                 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
1759                 {
1760                     InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]);
1761 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2019010000)
1762                     dataPtr->name = ld.name;
1763 #else
1764                     dataPtr->setName(ld.name);
1765 #endif
1766                 }
1767             }
1768         }
1769 
1770         if (skipInfEngineInit)
1771         {
1772             Ptr<BackendNode> node = layers[lastLayerId].backendNodes[preferableBackend];
1773             CV_Assert(!node.empty());
1774 
1775             Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
1776             CV_Assert(!ieNode.empty());
1777             ieNode->net->reset();
1778 
1779             for (it = layers.begin(); it != layers.end(); ++it)
1780             {
1781                 LayerData &ld = it->second;
1782                 if (ld.id == 0)
1783                 {
1784                     for (int i = 0; i < ld.inputBlobsWrappers.size(); ++i)
1785                     {
1786                         InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.inputBlobsWrappers[i]);
1787 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2019010000)
1788                         dataPtr->name = netInputLayer->outNames[i];
1789 #else
1790                         dataPtr->setName(netInputLayer->outNames[i]);
1791 #endif
1792                     }
1793                 }
1794                 else
1795                 {
1796                     for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
1797                     {
1798                         InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]);
1799 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2019010000)
1800                         dataPtr->name = ld.name;
1801 #else
1802                         dataPtr->setName(ld.name);
1803 #endif
1804                     }
1805                 }
1806                 ieNode->net->addBlobs(ld.inputBlobsWrappers);
1807                 ieNode->net->addBlobs(ld.outputBlobsWrappers);
1808                 ld.skip = true;
1809             }
1810             layers[lastLayerId].skip = false;
1811             ieNode->net->init((Target)preferableTarget);
1812             return;
1813         }
1814 
1815         // Build Inference Engine networks from sets of layers that support this
1816         // backend. Split a whole model on several Inference Engine networks if
1817         // some of layers are not implemented.
1818 
1819         bool supportsCPUFallback = preferableTarget == DNN_TARGET_CPU ||
1820                                    BackendRegistry::checkIETarget(DNN_TARGET_CPU);
1821 
1822         // Set of all input and output blobs wrappers for current network.
1823         std::map<LayerPin, Ptr<BackendWrapper> > netBlobsWrappers;
1824         for (it = layers.begin(); it != layers.end(); ++it)
1825         {
1826             LayerData &ld = it->second;
1827             if (ld.id == 0 && ld.skip)
1828                 continue;
1829             bool fused = ld.skip;
1830 
1831             Ptr<Layer> layer = ld.layerInstance;
1832             if (!fused && !layer->supportBackend(preferableBackend))
1833             {
1834                 bool customizable = ld.id != 0 &&
1835                                     INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R2) &&
1836                                     supportsCPUFallback;
1837                 // TODO: there is a bug in Myriad plugin with custom layers shape infer.
1838                 if (preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL)
1839                 {
1840                     for (int i = 0; customizable && i < ld.inputBlobs.size(); ++i)
1841                     {
1842                         customizable = ld.inputBlobs[i]->size[0] == 1;
1843                     }
1844                 }
1845 
1846                 // TODO: fix these workarounds
1847                 if (preferableTarget == DNN_TARGET_MYRIAD ||
1848                     preferableTarget == DNN_TARGET_HDDL ||
1849                     preferableTarget == DNN_TARGET_OPENCL ||
1850                     preferableTarget == DNN_TARGET_OPENCL_FP16)
1851                     customizable &= ld.type != "Concat";
1852 
1853                 if (preferableTarget == DNN_TARGET_OPENCL ||
1854                     preferableTarget == DNN_TARGET_OPENCL_FP16)
1855                     customizable &= ld.type != "Power";
1856 
1857                 if (preferableTarget == DNN_TARGET_OPENCL)
1858                     customizable &= ld.type != "Eltwise";
1859 
1860                 if (!customizable)
1861                 {
1862                     addInfEngineNetOutputs(ld);
1863                     net = Ptr<InfEngineBackendNet>();
1864                     netBlobsWrappers.clear();  // Is not used for R5 release but we don't wrap it to #ifdef.
1865                     layer->preferableTarget = DNN_TARGET_CPU;
1866                     continue;
1867                 }
1868             }
1869             ld.skip = true;  // Initially skip all Inference Engine supported layers.
1870 
1871             // Create a new network if one of inputs from different Inference Engine graph.
1872             for (int i = 0; i < ld.inputBlobsId.size(); ++i)
1873             {
1874                 LayerData &inpLd = layers[ld.inputBlobsId[i].lid];
1875                 Ptr<BackendNode> inpNode = inpLd.backendNodes[preferableBackend];
1876                 if (!inpNode.empty())
1877                 {
1878                     Ptr<InfEngineBackendNode> ieInpNode = inpNode.dynamicCast<InfEngineBackendNode>();
1879                     CV_Assert(!ieInpNode.empty()); CV_Assert(!ieInpNode->net.empty());
1880                     if (ieInpNode->net != net)
1881                     {
1882                         net = Ptr<InfEngineBackendNet>();
1883                         netBlobsWrappers.clear();  // Is not used for R5 release but we don't wrap it to #ifdef.
1884                         break;
1885                     }
1886                 }
1887             }
1888 
1889             Ptr<BackendNode> node;
1890             if (!net.empty())
1891             {
1892                 if (fused)
1893                 {
1894                     bool inPlace = ld.inputBlobsId.size() == 1 && ld.outputBlobs.size() == 1 &&
1895                                    ld.inputBlobs[0]->data == ld.outputBlobs[0].data;
1896                     CV_Assert(inPlace);
1897                     node = layers[ld.inputBlobsId[0].lid].backendNodes[preferableBackend];
1898                     ld.inputBlobsWrappers = layers[ld.inputBlobsId[0].lid].inputBlobsWrappers;
1899                 }
1900             }
1901             else
1902                 net = Ptr<InfEngineBackendNet>(new InfEngineBackendNet());
1903 
1904             if (!fused)
1905             {
1906                 if (layer->supportBackend(preferableBackend))
1907                     node = layer->initInfEngine(ld.inputBlobsWrappers);
1908                 else
1909                 {
1910                     node = Ptr<BackendNode>(new InfEngineBackendNode(
1911                         ld.layerInstance, ld.inputBlobs, ld.outputBlobs, ld.internals));
1912                 }
1913             }
1914             else if (node.empty())
1915                 continue;
1916 
1917             CV_Assert(!node.empty());
1918             ld.backendNodes[preferableBackend] = node;
1919 
1920             Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
1921             CV_Assert(!ieNode.empty());
1922             ieNode->net = net;
1923 
1924             for (const auto& pin : blobsToKeep_)
1925             {
1926                 if (pin.lid == ld.id)
1927                 {
1928                     ieNode->net->addOutput(ieNode->layer.getName());
1929                     break;
1930                 }
1931             }
1932 
1933             // Convert weights in FP16 for specific targets.
1934             if ((preferableTarget == DNN_TARGET_OPENCL_FP16 ||
1935                  preferableTarget == DNN_TARGET_MYRIAD ||
1936                  preferableTarget == DNN_TARGET_HDDL ||
1937                  preferableTarget == DNN_TARGET_FPGA) && !fused)
1938             {
1939 #if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1)
1940                 for (const std::string& name : {"weights", "biases"})
1941                 {
1942                     auto it = ieNode->layer.getParameters().find(name);
1943                     if (it != ieNode->layer.getParameters().end())
1944                     {
1945                         InferenceEngine::Blob::Ptr bp = it->second.as<InferenceEngine::Blob::Ptr>();
1946                         it->second = convertFp16(std::const_pointer_cast<InferenceEngine::Blob>(bp));
1947                     }
1948                 }
1949 #else
1950                 auto& blobs = ieNode->layer.getConstantData();
1951                 if (blobs.empty())
1952                 {
1953                     // In case of non weightable layer we have to specify
1954                     // it's precision adding dummy blob.
1955                     auto blob = InferenceEngine::make_shared_blob<int16_t>(
1956                                     InferenceEngine::Precision::FP16,
1957                                     InferenceEngine::Layout::C, {1});
1958                     blob->allocate();
1959                     blobs[""] = blob;
1960                 }
1961                 else
1962                 {
1963                     for (auto& it : blobs)
1964                         it.second = convertFp16(std::const_pointer_cast<InferenceEngine::Blob>(it.second));
1965                 }
1966 #endif
1967             }
1968 
1969             if (!fused)
1970                 net->addLayer(ieNode->layer);
1971 
1972             net->connect(ld.inputBlobsWrappers, ld.outputBlobsWrappers, ieNode->layer.getName());
1973             net->addBlobs(ld.inputBlobsWrappers);
1974             net->addBlobs(ld.outputBlobsWrappers);
1975             addInfEngineNetOutputs(ld);
1976         }
1977 
1978         // Initialize all networks.
1979         for (MapIdToLayerData::reverse_iterator it = layers.rbegin(); it != layers.rend(); ++it)
1980         {
1981             LayerData &ld = it->second;
1982             if (ld.backendNodes.find(preferableBackend) == ld.backendNodes.end())
1983                 continue;
1984 
1985             Ptr<BackendNode> node = ld.backendNodes[preferableBackend];
1986             if (node.empty())
1987                 continue;
1988 
1989             Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
1990             if (ieNode.empty())
1991                 continue;
1992 
1993             CV_Assert(!ieNode->net.empty());
1994 
1995             if (!ieNode->net->isInitialized())
1996             {
1997                 ieNode->net->init((Target)preferableTarget);
1998                 ld.skip = false;
1999             }
2000         }
2001     }
2002 #endif  // HAVE_DNN_IE_NN_BUILDER_2019
2003 
2004 
2005 #ifdef HAVE_DNN_NGRAPH
addNgraphOutputscv::dnn::Net::Impl2006     void addNgraphOutputs(LayerData &ld)
2007     {
2008         CV_TRACE_FUNCTION();
2009 
2010         Ptr<InfEngineNgraphNet> layerNet;
2011         auto it = ld.backendNodes.find(preferableBackend);
2012         if (it != ld.backendNodes.end())
2013         {
2014             Ptr<BackendNode> node = it->second;
2015             if (!node.empty())
2016             {
2017                 Ptr<InfEngineNgraphNode> ieNode = node.dynamicCast<InfEngineNgraphNode>();
2018                 CV_Assert(!ieNode.empty()); CV_Assert(!ieNode->net.empty());
2019                 layerNet = ieNode->net;
2020             }
2021         }
2022 
2023         for (int i = 0; i < ld.inputBlobsId.size(); ++i)
2024         {
2025             LayerData &inpLd = layers[ld.inputBlobsId[i].lid];
2026             Ptr<BackendNode> inpNode = inpLd.backendNodes[preferableBackend];
2027             if (!inpNode.empty())
2028             {
2029                 Ptr<InfEngineNgraphNode> ieInpNode = inpNode.dynamicCast<InfEngineNgraphNode>();
2030                 CV_Assert(!ieInpNode.empty()); CV_Assert(!ieInpNode->net.empty());
2031                 if (layerNet != ieInpNode->net)
2032                 {
2033                     ieInpNode->net->addOutput(ieInpNode->node->get_friendly_name());
2034                     ieInpNode->net->setUnconnectedNodes(ieInpNode);
2035                 }
2036             }
2037         }
2038     }
2039 
initNgraphBackendcv::dnn::Net::Impl2040     void initNgraphBackend(const std::vector<LayerPin>& blobsToKeep_)
2041     {
2042         CV_TRACE_FUNCTION();
2043         CV_Assert_N(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, haveInfEngine());
2044 
2045         MapIdToLayerData::iterator it;
2046         Ptr<InfEngineNgraphNet> net;
2047 
2048         for (it = layers.begin(); it != layers.end(); ++it)
2049         {
2050             LayerData &ld = it->second;
2051             if (ld.id == 0)
2052             {
2053                 CV_Assert((netInputLayer->outNames.empty() && ld.outputBlobsWrappers.size() == 1) ||
2054                           (netInputLayer->outNames.size() == ld.outputBlobsWrappers.size()));
2055                 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
2056                 {
2057                     InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]);
2058                     std::string outputName = netInputLayer->outNames.empty() ? ld.name : netInputLayer->outNames[i];
2059                     outputName = ld.outputBlobsWrappers.size() > 1 ? (outputName + "." + std::to_string(i)) : outputName;
2060                     dataPtr->setName(outputName);
2061                 }
2062             }
2063             else
2064             {
2065                 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
2066                 {
2067                     InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]);
2068                     std::string outputName = ld.outputBlobsWrappers.size() > 1 ? (ld.name + "." + std::to_string(i)) : ld.name;
2069                     dataPtr->setName(outputName);
2070                 }
2071             }
2072         }
2073 
2074         if (skipInfEngineInit)
2075         {
2076             Ptr<BackendNode> node = layers[lastLayerId].backendNodes[preferableBackend];
2077             CV_Assert(!node.empty());
2078 
2079             Ptr<InfEngineNgraphNode> ieNode = node.dynamicCast<InfEngineNgraphNode>();
2080             CV_Assert(!ieNode.empty());
2081             ieNode->net->reset();
2082 
2083             for (it = layers.begin(); it != layers.end(); ++it)
2084             {
2085                 LayerData &ld = it->second;
2086                 if (ld.id == 0)
2087                 {
2088                     for (int i = 0; i < ld.inputBlobsWrappers.size(); ++i)
2089                     {
2090                         InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.inputBlobsWrappers[i]);
2091                         dataPtr->setName(netInputLayer->outNames[i]);
2092                     }
2093                 }
2094                 else
2095                 {
2096                     for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
2097                     {
2098                         InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]);
2099                         dataPtr->setName(ld.name);
2100                     }
2101                 }
2102                 ieNode->net->addBlobs(ld.inputBlobsWrappers);
2103                 ieNode->net->addBlobs(ld.outputBlobsWrappers);
2104                 ld.skip = true;
2105             }
2106             layers[lastLayerId].skip = false;
2107             ieNode->net->init((Target)preferableTarget);
2108             return;
2109         }
2110 
2111         bool supportsCPUFallback = !isArmComputePlugin() && (preferableTarget == DNN_TARGET_CPU ||
2112                                    BackendRegistry::checkIETarget(DNN_TARGET_CPU));
2113 
2114         // Build Inference Engine networks from sets of layers that support this
2115         // backend. Split a whole model on several Inference Engine networks if
2116         // some of layers are not implemented.
2117         for (it = layers.begin(); it != layers.end(); ++it)
2118         {
2119             LayerData &ld = it->second;
2120 
2121             if (ld.id == 0 && ld.skip)
2122                 continue;
2123 
2124             bool fused = ld.skip;
2125             Ptr<Layer> layer = ld.layerInstance;
2126             if (!fused && !layer->supportBackend(preferableBackend))
2127             {
2128                 bool customizable = ld.id != 0 && supportsCPUFallback;
2129 
2130                 // TODO: there is a bug in Myriad plugin with custom layers shape infer.
2131                 if (preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL)
2132                 {
2133                     for (int i = 0; customizable && i < ld.inputBlobs.size(); ++i)
2134                     {
2135                         customizable = ld.inputBlobs[i]->size[0] == 1;
2136                     }
2137                 }
2138 
2139                 // TODO: fix these workarounds
2140                 if (preferableTarget == DNN_TARGET_MYRIAD ||
2141                     preferableTarget == DNN_TARGET_HDDL ||
2142                     preferableTarget == DNN_TARGET_OPENCL ||
2143                     preferableTarget == DNN_TARGET_OPENCL_FP16)
2144                     customizable &= ld.type != "Concat";
2145 
2146                 if (preferableTarget == DNN_TARGET_OPENCL ||
2147                     preferableTarget == DNN_TARGET_OPENCL_FP16)
2148                     customizable &= ld.type != "Power";
2149 
2150                 if (preferableTarget == DNN_TARGET_OPENCL)
2151                     customizable &= ld.type != "Eltwise";
2152 
2153                 if (!customizable)
2154                 {
2155                     addNgraphOutputs(ld);
2156                     net = Ptr<InfEngineNgraphNet>();
2157                     layer->preferableTarget = DNN_TARGET_CPU;
2158 
2159                     for (int i = 0; i < ld.inputBlobsId.size(); ++i)
2160                     {
2161                         LayerData &inpLd = layers[ld.inputBlobsId[i].lid];
2162                         Ptr<BackendNode> inpNode = inpLd.backendNodes[preferableBackend];
2163                         if (!inpNode.empty()) {
2164                             Ptr<InfEngineNgraphNode> ieNode = inpNode.dynamicCast<InfEngineNgraphNode>();
2165                             CV_Assert(!ieNode.empty());
2166                             ieNode->net->setUnconnectedNodes(ieNode);
2167                         }
2168                     }
2169                     continue;
2170                 }
2171             }
2172             ld.skip = true;  // Initially skip all Inference Engine supported layers.
2173 
2174             // Create a new network if one of inputs from different Inference Engine graph.
2175             std::vector<Ptr<BackendNode>> inputNodes;
2176             for (int i = 0; i < ld.inputBlobsId.size(); ++i)
2177             {
2178                 // Layer_Test_ROIPooling.Accuracy has 2 inputs inpLD = 0, 0 -> has 4 inputNodes (input, rois, input, rois)
2179                 if (inputNodes.size() == ld.inputBlobsId.size()) {
2180                     break;
2181                 }
2182                 LayerData &inpLd = layers[ld.inputBlobsId[i].lid];
2183                 Ptr<BackendNode> inpNode = inpLd.backendNodes[preferableBackend];
2184                 if (!inpNode.empty())
2185                 {
2186                      Ptr<InfEngineNgraphNode> ieInpNode = inpNode.dynamicCast<InfEngineNgraphNode>();
2187                      CV_Assert(!ieInpNode.empty()); CV_Assert(!ieInpNode->net.empty());
2188                      if (ieInpNode->net == net && !fused) {
2189                         inputNodes.push_back(inpNode);
2190                         continue;
2191                      }
2192                 }
2193 
2194                 if (net.empty()) {
2195                     net = Ptr<InfEngineNgraphNet>(new InfEngineNgraphNet(*this));
2196                 }
2197 
2198                 if (!fused) {
2199                     std::vector<std::string> inputNames;
2200                     std::vector<cv::Mat> inputs;
2201 
2202                     auto curr_pos = inpLd.consumers.begin();
2203                     auto compare = [&ld] (const LayerPin& lp) { return lp.lid == ld.id; };
2204                     auto cons = curr_pos;
2205                     while ((cons = std::find_if(curr_pos, inpLd.consumers.end(), compare)) !=
2206                             inpLd.consumers.end()) {
2207                         int cons_inp = cons->oid;
2208                         Ptr<NgraphBackendWrapper> inpWrapper = inpLd.outputBlobsWrappers[cons_inp].
2209                                                                      dynamicCast<NgraphBackendWrapper>();
2210                         CV_Assert(!inpWrapper.empty());
2211                         auto iter = std::find(inputNames.begin(), inputNames.end(),
2212                                               inpWrapper->dataPtr->getName());
2213                         if (iter == inputNames.end()) {
2214                             inputNames.push_back(inpWrapper->dataPtr->getName());
2215                             inputs.push_back(inpLd.outputBlobs[cons_inp]);
2216                         }
2217                         curr_pos = cons + 1;
2218                     }
2219 
2220                     auto inps = net->setInputs(inputs, inputNames);
2221                     for (auto& inp : inps) {
2222                         inputNodes.emplace_back(Ptr<BackendNode>(new InfEngineNgraphNode(inp)));
2223                     }
2224                 }
2225             }
2226 
2227             Ptr<BackendNode> node;
2228             if (!net.empty())
2229             {
2230                 if (fused)
2231                 {
2232                     bool inPlace = ld.inputBlobsId.size() == 1 && ld.outputBlobs.size() == 1 &&
2233                                    ld.inputBlobs[0]->data == ld.outputBlobs[0].data;
2234                     CV_Assert(inPlace);
2235                     node = layers[ld.inputBlobsId[0].lid].backendNodes[preferableBackend];
2236                     ld.inputBlobsWrappers = layers[ld.inputBlobsId[0].lid].inputBlobsWrappers;
2237                 }
2238             }
2239             else {
2240                 net = Ptr<InfEngineNgraphNet>(new InfEngineNgraphNet(*this));
2241             }
2242 
2243             if (!fused)
2244             {
2245                 CV_Assert(ld.inputBlobsId.size() == inputNodes.size());
2246                 for (int i = 0; i < ld.inputBlobsId.size(); ++i)
2247                 {
2248                     int lid = ld.inputBlobsId[i].lid;
2249                     int oid = ld.inputBlobsId[i].oid;
2250                     if (oid == 0 || lid == 0)
2251                         continue;
2252 
2253                     auto ieInpNode = inputNodes[i].dynamicCast<InfEngineNgraphNode>();
2254                     CV_Assert(oid < ieInpNode->node->get_output_size());
2255 #if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
2256                     inputNodes[i] = Ptr<BackendNode>(new InfEngineNgraphNode(ieInpNode->node));
2257 #elif INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_3)
2258                     inputNodes[i] = Ptr<BackendNode>(new InfEngineNgraphNode(ieInpNode->node->get_output_as_single_output_node(oid)));
2259 #else
2260                     inputNodes[i] = Ptr<BackendNode>(new InfEngineNgraphNode(ieInpNode->node->get_output_as_single_output_node(oid, false)));
2261 #endif
2262                 }
2263 
2264                 if (layer->supportBackend(preferableBackend))
2265                 {
2266                     node = layer->initNgraph(ld.inputBlobsWrappers, inputNodes);
2267                     for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
2268                     {
2269                         InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]);
2270                         node.dynamicCast<InfEngineNgraphNode>()->setName(dataPtr->getName());
2271                     }
2272                 }
2273                 else
2274                 {
2275                     node = Ptr<BackendNode>(new InfEngineNgraphNode(inputNodes,
2276                         ld.layerInstance, ld.inputBlobs, ld.outputBlobs, ld.internals));
2277                 }
2278             }
2279             else if (node.empty())
2280                 continue;
2281 
2282             ld.backendNodes[preferableBackend] = node;
2283 
2284             Ptr<InfEngineNgraphNode> ieNode = node.dynamicCast<InfEngineNgraphNode>();
2285             CV_Assert(!ieNode.empty());
2286             ieNode->net = net;
2287 
2288             if (ld.consumers.empty()) {
2289                 // TF EAST_text_detection
2290                 ieNode->net->setUnconnectedNodes(ieNode);
2291             }
2292             for (const auto& pin : blobsToKeep_)
2293             {
2294                 if (pin.lid == ld.id)
2295                 {
2296                     ieNode->net->addOutput(ieNode->node->get_friendly_name());
2297                     break;
2298                 }
2299             }
2300             ieNode->net->setNodePtr(&ieNode->node);
2301 
2302             net->addBlobs(ld.inputBlobsWrappers);
2303             net->addBlobs(ld.outputBlobsWrappers);
2304             addNgraphOutputs(ld);
2305         }
2306 
2307         // Initialize all networks.
2308         for (MapIdToLayerData::reverse_iterator it = layers.rbegin(); it != layers.rend(); ++it)
2309         {
2310             LayerData &ld = it->second;
2311             auto iter = ld.backendNodes.find(preferableBackend);
2312             if (iter == ld.backendNodes.end())
2313                 continue;
2314 
2315             Ptr<BackendNode>& node = iter->second;
2316             if (node.empty())
2317                 continue;
2318 
2319             Ptr<InfEngineNgraphNode> ieNode = node.dynamicCast<InfEngineNgraphNode>();
2320             if (ieNode.empty())
2321                 continue;
2322 
2323             CV_Assert(!ieNode->net.empty());
2324 
2325             if (!ieNode->net->isInitialized())
2326             {
2327                 ieNode->net->setUnconnectedNodes(ieNode);
2328                 ieNode->net->createNet((Target)preferableTarget);
2329                 ld.skip = false;
2330             }
2331         }
2332     }
2333 #endif  // HAVE_DNN_NGRAPH
2334 
initVkComBackendcv::dnn::Net::Impl2335     void initVkComBackend()
2336     {
2337         CV_TRACE_FUNCTION();
2338         CV_Assert(preferableBackend == DNN_BACKEND_VKCOM);
2339 #ifdef HAVE_VULKAN
2340         if (!haveVulkan())
2341             return;
2342 
2343         MapIdToLayerData::iterator it = layers.begin();
2344         for (; it != layers.end(); it++)
2345         {
2346             LayerData &ld = it->second;
2347             Ptr<Layer> layer = ld.layerInstance;
2348             if (!layer->supportBackend(preferableBackend))
2349             {
2350                 continue;
2351             }
2352 
2353             ld.skip = false;
2354 
2355             try
2356             {
2357                 ld.backendNodes[DNN_BACKEND_VKCOM] =
2358                     layer->initVkCom(ld.inputBlobsWrappers);
2359             }
2360             catch (const cv::Exception& e)
2361             {
2362                 CV_LOG_ERROR(NULL, "initVkCom failed, fallback to CPU implementation. " << e.what());
2363                 ld.backendNodes[DNN_BACKEND_VKCOM] = Ptr<BackendNode>();
2364             }
2365         }
2366 #endif
2367     }
2368 
initCUDABackendcv::dnn::Net::Impl2369     void initCUDABackend(const std::vector<LayerPin>& blobsToKeep_)
2370     {
2371         CV_Assert(haveCUDA());
2372         CV_Assert(preferableBackend == DNN_BACKEND_CUDA);
2373 
2374 #ifdef HAVE_CUDA
2375         if (!cudaInfo) /* we need to check only once */
2376             cuda4dnn::checkVersions();
2377 
2378         if (cuda4dnn::getDeviceCount() <= 0)
2379             CV_Error(Error::StsError, "No CUDA capable device found.");
2380 
2381         if (cuda4dnn::getDevice() < 0)
2382             CV_Error(Error::StsError, "No CUDA capable device selected.");
2383 
2384         if (!cuda4dnn::isDeviceCompatible())
2385             CV_Error(Error::GpuNotSupported, "OpenCV was not built to work with the selected device. Please check CUDA_ARCH_PTX or CUDA_ARCH_BIN in your build configuration.");
2386 
2387         if (preferableTarget == DNN_TARGET_CUDA_FP16 && !cuda4dnn::doesDeviceSupportFP16())
2388         {
2389             CV_LOG_WARNING(NULL, "The selected CUDA device does not support FP16 target; switching to FP32 target.");
2390             preferableTarget = DNN_TARGET_CUDA;
2391         }
2392 
2393         if (!cudaInfo)
2394         {
2395             cuda4dnn::csl::CSLContext context;
2396             context.stream = cuda4dnn::csl::Stream(true);
2397             context.cublas_handle = cuda4dnn::csl::cublas::Handle(context.stream);
2398             context.cudnn_handle = cuda4dnn::csl::cudnn::Handle(context.stream);
2399 
2400             auto d2h_stream = cuda4dnn::csl::Stream(true); // stream for background D2H data transfers
2401             cudaInfo = std::unique_ptr<CudaInfo_t>(new CudaInfo_t(std::move(context), std::move(d2h_stream)));
2402         }
2403 
2404         cudaInfo->workspace = cuda4dnn::csl::Workspace(); // release workspace memory if any
2405 
2406         for (auto& layer : layers)
2407         {
2408             auto& ld = layer.second;
2409             if (ld.id == 0)
2410             {
2411                 for (auto& wrapper : ld.inputBlobsWrappers)
2412                 {
2413                     auto cudaWrapper = wrapper.dynamicCast<CUDABackendWrapper>();
2414                     cudaWrapper->setStream(cudaInfo->context.stream, cudaInfo->d2h_stream);
2415                 }
2416             }
2417 
2418             for (auto& wrapper : ld.outputBlobsWrappers)
2419             {
2420                 auto cudaWrapper = wrapper.dynamicCast<CUDABackendWrapper>();
2421                 cudaWrapper->setStream(cudaInfo->context.stream, cudaInfo->d2h_stream);
2422             }
2423         }
2424 
2425         for (auto& layer : layers)
2426         {
2427             auto& ld = layer.second;
2428             auto& layerInstance = ld.layerInstance;
2429 
2430             if (!layerInstance->supportBackend(DNN_BACKEND_CUDA))
2431             {
2432                 std::ostringstream os;
2433                 os << "CUDA backend will fallback to the CPU implementation for the layer \"" << ld.name
2434                    << "\" of type " << ld.type << '\n';
2435                 CV_LOG_INFO(NULL, os.str().c_str());
2436                 continue;
2437             }
2438 
2439             /* we make a copy so that `initCUDA` doesn't modify `cudaInfo->context` */
2440             auto context = cudaInfo->context;
2441             auto node = layerInstance->initCUDA(&context, ld.inputBlobsWrappers, ld.outputBlobsWrappers);
2442             ld.backendNodes[DNN_BACKEND_CUDA] = node;
2443 
2444             auto cudaNode = node.dynamicCast<CUDABackendNode>();
2445             cudaInfo->workspace.require(cudaNode->get_workspace_memory_in_bytes());
2446         }
2447 
2448         if (blobsToKeep_.size() > 1)
2449         {
2450             for (const auto& pin : blobsToKeep_)
2451             {
2452                 LayerData& ld = layers[pin.lid];
2453                 ld.cudaD2HBackgroundTransfers.push_back(pin.oid);
2454             }
2455         }
2456 #endif
2457     }
2458 
allocateLayercv::dnn::Net::Impl2459     void allocateLayer(int lid, const LayersShapesMap& layersShapes)
2460     {
2461         CV_TRACE_FUNCTION();
2462 
2463         LayerData &ld = layers[lid];
2464 
2465         //already allocated
2466         if (ld.flag)
2467             return;
2468 
2469         size_t ninputs = ld.inputBlobsId.size();
2470 #if 0
2471         printf("layer %s:", ld.name.c_str());
2472         for (size_t i = 0; i < ninputs; i++)
2473         {
2474             int inp_lid = ld.inputBlobsId[i].lid;
2475             LayerData &inp_ld = layers[inp_lid];
2476             int inp_outputs = (int)inp_ld.outputBlobs.size();
2477             std::cout << " " << inp_ld.name << "(" << inp_outputs;
2478 
2479             for( int j = 0; j < inp_outputs; j++ )
2480             {
2481                 std::cout << (j == 0 ? ": " : ", ") << inp_ld.outputBlobs[j].size;
2482             }
2483             std::cout << ")";
2484         }
2485         printf("\n");
2486 #endif
2487 
2488         //determine parent layers
2489         for (size_t i = 0; i < ninputs; i++)
2490             ld.inputLayersId.insert(ld.inputBlobsId[i].lid);
2491 
2492         //allocate parents
2493         for (set<int>::iterator i = ld.inputLayersId.begin(); i != ld.inputLayersId.end(); i++)
2494             allocateLayer(*i, layersShapes);
2495 
2496         //bind inputs
2497         if (ld.id == 0)  // DataLayer
2498         {
2499             ninputs = netInputLayer->inputsData.size();
2500             ld.inputBlobsWrappers.resize(ninputs);
2501             for (size_t i = 0; i < ninputs; i++)
2502                 ld.inputBlobsWrappers[i] = wrap(netInputLayer->inputsData[i]);
2503         }
2504         else
2505         {
2506             ld.inputBlobs.resize(ninputs);
2507             ld.inputBlobsWrappers.resize(ninputs);
2508             for (size_t i = 0; i < ninputs; i++)
2509             {
2510                 LayerPin from = ld.inputBlobsId[i];
2511                 CV_Assert(from.valid());
2512                 CV_DbgAssert(layers.count(from.lid) && (int)layers[from.lid].outputBlobs.size() > from.oid);
2513                 ld.inputBlobs[i] = &layers[from.lid].outputBlobs[from.oid];
2514                 ld.inputBlobsWrappers[i] = layers[from.lid].outputBlobsWrappers[from.oid];
2515             }
2516         }
2517 
2518         LayersShapesMap::const_iterator layerShapesIt = layersShapes.find(lid);
2519 
2520         CV_Assert(layerShapesIt != layersShapes.end());
2521 
2522         std::vector<LayerPin> pinsForInternalBlobs;
2523         blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs,
2524                                           preferableBackend == DNN_BACKEND_OPENCV &&
2525                                           preferableTarget == DNN_TARGET_OPENCL_FP16);
2526         ld.outputBlobsWrappers.resize(ld.outputBlobs.size());
2527         for (int i = 0; i < ld.outputBlobs.size(); ++i)
2528             ld.outputBlobsWrappers[i] = wrap(ld.outputBlobs[i]);
2529 
2530         /* CUDA backend has its own system for internal blobs; we don't need these */
2531         ld.internalBlobsWrappers.resize((preferableBackend == DNN_BACKEND_CUDA) ? 0 : ld.internals.size());
2532         for (int i = 0; i < ld.internalBlobsWrappers.size(); ++i)
2533             ld.internalBlobsWrappers[i] = wrap(ld.internals[i]);
2534 
2535         Ptr<Layer> layerPtr = ld.getLayerInstance();
2536         {
2537             std::vector<Mat> inps(ld.inputBlobs.size());
2538             for (int i = 0; i < ld.inputBlobs.size(); ++i)
2539             {
2540                 inps[i] = *ld.inputBlobs[i];
2541             }
2542             layerPtr->finalize(inps, ld.outputBlobs);
2543             layerPtr->preferableTarget = preferableTarget;
2544 #if 0
2545             std::cout << "\toutputs:";
2546             size_t noutputs = ld.outputBlobs.size();
2547             for (size_t j = 0; j < noutputs; j++)
2548             {
2549                 std::cout << (j == 0 ? " " : ", ") << ld.outputBlobs[j].size;
2550             }
2551             std::cout << "\n";
2552 #endif
2553         }
2554 
2555         // After allocation of layer, we decrease counters to it's input blobs.
2556         blobManager.releaseReferences(ld.inputBlobsId);
2557         blobManager.releaseReferences(pinsForInternalBlobs);
2558 
2559         ld.flag = 1;
2560     }
2561 
2562 #if 0
2563 #define printf_(args) printf args
2564 #else
2565 #define printf_(args)
2566 #endif
2567 
fuseLayerscv::dnn::Net::Impl2568     void fuseLayers(const std::vector<LayerPin>& blobsToKeep_)
2569     {
2570         CV_TRACE_FUNCTION();
2571 
2572         if(!fusion || (preferableBackend != DNN_BACKEND_OPENCV &&
2573                         preferableBackend != DNN_BACKEND_CUDA &&
2574                         preferableBackend != DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 &&
2575                         preferableBackend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH))
2576            return;
2577 
2578         // scan through all the layers. If there is convolution layer followed by the activation layer,
2579         // we try to embed this activation into the convolution and disable separate execution of the activation
2580         std::set<LayerPin> pinsToKeep(blobsToKeep_.begin(),
2581                                       blobsToKeep_.end());
2582         MapIdToLayerData::iterator it;
2583         for (it = layers.begin(); it != layers.end(); it++)
2584         {
2585             int lid = it->first;
2586             LayerData& ld = layers[lid];
2587             if( ld.skip )
2588             {
2589                 printf_(("skipped %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str()));
2590                 continue;
2591             }
2592             printf_(("analyzing %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str()));
2593 
2594             // the optimization #1. try to fuse batch norm, scaling and/or activation layers
2595             // with the current layer if they follow it. Normally, the are fused with the convolution layer,
2596             // but some of them (like activation) may be fused with fully-connected, elemwise (+) and
2597             // some other layers.
2598             Ptr<Layer>& currLayer = ld.layerInstance;
2599             if( ld.consumers.size() == 1 && pinsToKeep.count(LayerPin(lid, 0)) == 0 )
2600             {
2601                 LayerData* nextData = &layers[ld.consumers[0].lid];
2602                 LayerPin lpNext(ld.consumers[0].lid, 0);
2603                 while (nextData)
2604                 {
2605                     /* we use `tryFuse` member of convolution layer to fuse eltwise later
2606                      * it's not intended to be fused here; hence, we stop when we encounter eltwise
2607                      */
2608                     if (preferableBackend == DNN_BACKEND_CUDA && ld.type == "Convolution" && nextData->type == "Eltwise")
2609                         break;
2610                     Ptr<Layer> nextLayer = nextData->layerInstance;
2611                     if (currLayer->tryFuse(nextLayer))
2612                     {
2613                         printf_(("\tfused with %s\n", nextLayer->name.c_str()));
2614                         nextData->skip = true;
2615                         ld.outputBlobs = layers[lpNext.lid].outputBlobs;
2616                         ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers;
2617                         if (nextData->consumers.size() == 1)
2618                         {
2619                             int nextLayerId = nextData->consumers[0].lid;
2620                             nextData = &layers[nextLayerId];
2621                             lpNext = LayerPin(nextLayerId, 0);
2622                         }
2623                         else
2624                         {
2625                             nextData = 0;
2626                             break;
2627                         }
2628                     }
2629                     else
2630                         break;
2631                 }
2632 
2633                 if (preferableBackend != DNN_BACKEND_OPENCV && preferableBackend != DNN_BACKEND_CUDA)
2634                     continue;  // Go to the next layer.
2635 
2636                 // TODO: OpenCL target support more fusion styles.
2637                 if ( preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget) &&
2638                      (!cv::ocl::useOpenCL() || (ld.layerInstance->type != "Convolution" &&
2639                      ld.layerInstance->type != "MVN" && ld.layerInstance->type != "Pooling" &&
2640                      ld.layerInstance->type != "Concat")) )
2641                     continue;
2642 
2643                 if (preferableBackend == DNN_BACKEND_CUDA && IS_DNN_CUDA_TARGET(preferableTarget)
2644                     && ld.layerInstance->type != "Convolution"
2645                     && ld.layerInstance->type != "Concat")
2646                     continue;
2647 
2648                 while (nextData)
2649                 {
2650                     // For now, OpenCL target support fusion with activation of ReLU/ChannelsPReLU/Power/Tanh
2651                     if (IS_DNN_OPENCL_TARGET(preferableTarget) &&
2652                         nextData->type != "ReLU" &&
2653                         nextData->type != "ChannelsPReLU" &&
2654                         nextData->type != "ReLU6" &&
2655                         nextData->type != "TanH" &&
2656                         nextData->type != "Power")
2657                         break;
2658 
2659                     Ptr<ActivationLayer> nextActivLayer = nextData->layerInstance.dynamicCast<ActivationLayer>();
2660                     if (nextActivLayer.empty())
2661                         break;
2662 
2663                     if (currLayer->setActivation(nextActivLayer))
2664                     {
2665                         printf_(("\tfused with %s\n", nextActivLayer->name.c_str()));
2666                         nextData->skip = true;
2667                         ld.outputBlobs = layers[lpNext.lid].outputBlobs;
2668                         ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers;
2669                         if (nextData->consumers.size() == 1)
2670                         {
2671                             int nextLayerId = nextData->consumers[0].lid;
2672                             nextData = &layers[nextLayerId];
2673                             lpNext = LayerPin(nextLayerId, 0);
2674                         }
2675                         else
2676                         {
2677                             nextData = 0;
2678                             break;
2679                         }
2680                     }
2681                     else
2682                         break;
2683                 }
2684 
2685                 // OpenCL: fuse convolution layer followed by eltwise + relu
2686                 // CUDA: fuse convolution layer followed by eltwise (and optional activation)
2687                 while (nextData &&
2688                     (IS_DNN_OPENCL_TARGET(preferableTarget) || IS_DNN_CUDA_TARGET(preferableTarget)) &&
2689                     ld.layerInstance->type == "Convolution"
2690                 )  // semantic of 'if'
2691                 {
2692                     Ptr<EltwiseLayer> nextEltwiseLayer = nextData->layerInstance.dynamicCast<EltwiseLayer>();
2693                     if (nextEltwiseLayer.empty())
2694                         break;
2695 
2696 #ifdef HAVE_CUDA
2697                     // CUDA backend supports fusion with eltwise sum (without variable channels)
2698                     if (IS_DNN_CUDA_TARGET(preferableTarget) && !nextEltwiseLayer.empty())
2699                     {
2700                         // we create a temporary backend node for eltwise layer to obtain the eltwise configuration
2701                         cuda4dnn::csl::CSLContext context; // assume that initCUDA and EltwiseOp do not use the context during init
2702                         const auto node = nextData->layerInstance->initCUDA(&context, nextData->inputBlobsWrappers, nextData->outputBlobsWrappers);
2703                         const auto eltwiseNode = node.dynamicCast<cuda4dnn::EltwiseOpBase>();
2704                         // CUDA backend uses EltwiseOp when all operands have the same number of channels; otherwise, ShortcutOp is used.
2705                         // Hence, a successful cast to EltwiseOp implies that the number of channels is same in all operand tensors.
2706                         if (eltwiseNode.empty() || eltwiseNode->op != cuda4dnn::EltwiseOpType::SUM || !eltwiseNode->coeffs.empty())
2707                             break;
2708                     }
2709 #endif
2710 
2711                     if (IS_DNN_OPENCL_TARGET(preferableTarget) && pinsToKeep.count(lpNext) != 0)
2712                         break;
2713                     if (nextData->inputBlobsId.size() != 2)
2714                         break;
2715 
2716                     if (IS_DNN_OPENCL_TARGET(preferableTarget))
2717                     {
2718                         if (!nextData->params.has("operation") || toLowerCase(nextData->params.get<String>("operation")) == "sum")
2719                         {
2720                             if (nextData->params.has("coeff"))
2721                             {
2722                                 DictValue paramCoeff = nextData->params.get("coeff");
2723                                 int n = paramCoeff.size();
2724                                 bool isCoeffOneOne = (n == 2);
2725                                 for (int i = 0; isCoeffOneOne && i < n; i++)
2726                                 {
2727                                     float c = paramCoeff.get<float>(i);
2728                                     isCoeffOneOne &= (c == 1.0f);
2729                                 }
2730                                 if (!isCoeffOneOne)
2731                                 {
2732                                     CV_LOG_DEBUG(NULL, "DNN/OpenCL: fusion of 'Sum' without coeffs (or {1.0, 1.0}) is supported only");
2733                                     break;
2734                                 }
2735                             }
2736                         }
2737                         else
2738                         {
2739                             CV_LOG_DEBUG(NULL, "DNN/OpenCL: fusion with eltwise operation is not supported: " << nextData->params.get<String>("operation"));
2740                             break;
2741                         }
2742                     }
2743 
2744                     {
2745                         LayerData *eltwiseData = nextData;
2746 
2747                         // Eltwise layer has two inputs. We need to determine which
2748                         // is a base convolution layer and which could be used as it's bias.
2749                         LayerData* biasLayerData = 0;
2750                         for (int i = 0; i < 2; ++i)
2751                         {
2752                             LayerData *downLayerData = &layers[eltwiseData->inputBlobsId[i].lid];
2753                             CV_Assert(downLayerData);
2754                             while (downLayerData->skip)
2755                             {
2756                                 if (downLayerData->inputBlobsId.size() == 1)
2757                                     downLayerData = &layers[downLayerData->inputBlobsId[0].lid];
2758                                 else
2759                                 {
2760                                     downLayerData = 0;
2761                                     break;
2762                                 }
2763                             }
2764                             if (downLayerData && ld.id == downLayerData->id)
2765                             {
2766                                 biasLayerData = &layers[eltwiseData->inputBlobsId[1 - i].lid];
2767                                 break;
2768                             }
2769                         }
2770                         CV_Assert(biasLayerData);
2771                         {
2772                             // fuse eltwise + activation layer
2773                             // bias must already be computed to fuse => bias layer must appear before convolution
2774                             if (biasLayerData->id < ld.id)
2775                             {
2776                                 /* we can fuse activation if:
2777                                  * => activation layer that follows is the only consumer of eltwise output
2778                                  * => activation layer does not process multiple inputs
2779                                  * => we do not require to keep the output of eltwise
2780                                  */
2781                                 Ptr<ActivationLayer> nextFusabeleActivLayer;
2782                                 if (eltwiseData->consumers.size() == 1 && pinsToKeep.count(lpNext) == 0)
2783                                 {
2784                                     nextData = &layers[eltwiseData->consumers[0].lid];
2785                                     lpNext = LayerPin(eltwiseData->consumers[0].lid, 0);
2786                                     CV_Assert(nextData);
2787                                     if (nextData->outputBlobs.size() == 1)
2788                                         nextFusabeleActivLayer = nextData->layerInstance.dynamicCast<ActivationLayer>();
2789                                 }
2790                                 else
2791                                 {
2792                                     // OCL backend cannot fuse in this case but the CUDA backend can continue with just eltwise
2793                                     nextData = 0;
2794                                 }
2795 
2796                                 // the requirements of OCV OpenCL backend and CUDA backend are different
2797                                 // we need to check them separately; hence, the fuse variables
2798                                 bool fuse_eltwise = false, fuse_activation = false;
2799 
2800                                 Ptr<PowerLayer> activ_power;
2801                                 if (IS_DNN_OPENCL_TARGET(preferableTarget) && !nextFusabeleActivLayer.empty() &&
2802                                     nextData &&
2803                                     (!nextData->type.compare("ReLU") ||
2804                                      !nextData->type.compare("ChannelsPReLU") ||
2805                                      (!nextData->type.compare("Power") && (activ_power = nextFusabeleActivLayer.dynamicCast<PowerLayer>()) && activ_power->scale == 1.0f)
2806                                     ) &&
2807                                     currLayer->setActivation(nextFusabeleActivLayer))
2808                                 {
2809                                     fuse_eltwise = true;
2810                                     fuse_activation = true;
2811                                 }
2812 
2813                                 if (IS_DNN_CUDA_TARGET(preferableTarget))
2814                                 {
2815                                     /* supported fusion options:
2816                                      * => convolution + eltwise
2817                                      * => activation(convolution) + eltwise
2818                                      *    > convolution + activation would have been fused already; we have to fuse eltwise
2819                                      * => activation(convolution + eltwise)
2820                                      *    > fuse eltwise and then activation
2821                                      */
2822                                     auto layer = nextEltwiseLayer.staticCast<Layer>();
2823                                     if (currLayer->tryFuse(layer))
2824                                     {
2825                                         fuse_eltwise = true; /* eltwise was successfully fused */
2826                                         if (!nextFusabeleActivLayer.empty() && nextData)
2827                                         {
2828                                             if ((!nextData->type.compare("ReLU") ||
2829                                                  !nextData->type.compare("ReLU6") ||
2830                                                  !nextData->type.compare("Power") ||
2831                                                  !nextData->type.compare("TanH") ||
2832                                                  !nextData->type.compare("Sigmoid") ||
2833                                                  !nextData->type.compare("Swish") ||
2834                                                  !nextData->type.compare("Mish")) &&
2835                                                 currLayer->setActivation(nextFusabeleActivLayer))
2836                                             {
2837                                                 // activation was fused
2838                                                 fuse_activation = true;
2839                                             }
2840                                         }
2841                                     }
2842                                 }
2843 
2844                                 CV_Assert(!fuse_activation || fuse_eltwise); /* cannot fuse activation without eltwise */
2845                                 if(fuse_eltwise && fuse_activation)
2846                                 {
2847                                     CV_Assert(nextData);
2848                                     CV_Assert_N(biasLayerData->outputBlobsWrappers.size() == 1, ld.inputBlobsWrappers.size() == 1);
2849                                     ld.inputBlobsWrappers.push_back(biasLayerData->outputBlobsWrappers[0]);
2850                                     printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str()));
2851                                     printf_(("\tfused with %s\n", nextFusabeleActivLayer->name.c_str()));
2852                                     eltwiseData->skip = true;
2853                                     nextData->skip = true;
2854                                     // This optimization for cases like
2855                                     // some_layer   conv
2856                                     //   |             |
2857                                     //   +-- eltwise --+
2858                                     //          |
2859                                     //        activ
2860                                     // This way all the element-wise computations
2861                                     // (i.e. some_layer+conv or some_layer*conv)
2862                                     // would be done at [conv] layer. So we need to
2863                                     // replace [conv]'s output blob to [eltwise]'s one
2864                                     // considering that [activ] is an in-place layer.
2865                                     // Also we need to move all the consumers' references.
2866                                     // To prevent memory collisions (i.e. when input of
2867                                     // [conv] and output of [eltwise] is the same blob)
2868                                     // we allocate a new blob.
2869                                     CV_Assert_N(ld.outputBlobs.size() == 1, ld.outputBlobsWrappers.size() == 1);
2870                                     ld.outputBlobs[0] = ld.outputBlobs[0].clone();
2871                                     ld.outputBlobsWrappers[0] = wrap(ld.outputBlobs[0]);
2872 
2873                                     eltwiseData->outputBlobs = ld.outputBlobs;
2874                                     nextData->outputBlobs = ld.outputBlobs;
2875                                     eltwiseData->outputBlobsWrappers = ld.outputBlobsWrappers;
2876                                     nextData->outputBlobsWrappers = ld.outputBlobsWrappers;
2877 
2878                                     // Move references of [activ] layer consumers to the newly allocated blob.
2879                                     for (int i = 0; i < nextData->consumers.size(); ++i)
2880                                     {
2881                                         LayerData& consumer = layers[nextData->consumers[i].lid];
2882                                         for (int j = 0; j < consumer.inputBlobsId.size(); ++j)
2883                                         {
2884                                             if (consumer.inputBlobsId[j].lid == lpNext.lid)
2885                                             {
2886                                                 consumer.inputBlobs[j] = &ld.outputBlobs[0];
2887                                                 consumer.inputBlobsWrappers[j] = ld.outputBlobsWrappers[0];
2888                                                 break;
2889                                             }
2890                                         }
2891                                     }
2892                                 }
2893                                 else if (fuse_eltwise) // conv + eltwise (note: conv could have fused activations before eltwise)
2894                                 {
2895                                     CV_Assert(IS_DNN_CUDA_TARGET(preferableTarget));
2896                                     CV_Assert_N(biasLayerData->outputBlobsWrappers.size() == 1, ld.inputBlobsWrappers.size() == 1);
2897                                     ld.inputBlobsWrappers.push_back(biasLayerData->outputBlobsWrappers[0]);
2898                                     printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str()));
2899                                     eltwiseData->skip = true;
2900                                     // This optimization is for cases like
2901                                     // some_layer   conv (maybe fused with activ)
2902                                     //   |             |
2903                                     //   +-- eltwise --+
2904                                     //
2905                                     // This way all the element-wise computations
2906                                     // (i.e. some_layer+conv or some_layer*conv)
2907                                     // would be done at [conv] layer. So we need to
2908                                     // replace [conv]'s output blob to [eltwise]'s one.
2909                                     // Also we need to move all the consumers' references.
2910                                     // To prevent memory collisions (i.e. when input of
2911                                     // [conv] and output of [eltwise] is the same blob)
2912                                     // we allocate a new blob.
2913                                     CV_Assert_N(ld.outputBlobs.size() == 1, ld.outputBlobsWrappers.size() == 1);
2914                                     ld.outputBlobs[0] = ld.outputBlobs[0].clone();
2915                                     ld.outputBlobsWrappers[0] = wrap(ld.outputBlobs[0]);
2916 
2917                                     eltwiseData->outputBlobs = ld.outputBlobs;
2918                                     eltwiseData->outputBlobsWrappers = ld.outputBlobsWrappers;
2919 
2920                                     // Move references of [eltwise] layer consumers to the newly allocated blob.
2921                                     for (int i = 0; i < eltwiseData->consumers.size(); ++i)
2922                                     {
2923                                         LayerData& consumer = layers[eltwiseData->consumers[i].lid];
2924                                         for (int j = 0; j < consumer.inputBlobsId.size(); ++j)
2925                                         {
2926                                             if (consumer.inputBlobsId[j].lid == eltwiseData->id)
2927                                             {
2928                                                 consumer.inputBlobs[j] = &ld.outputBlobs[0];
2929                                                 consumer.inputBlobsWrappers[j] = ld.outputBlobsWrappers[0];
2930                                                 break;
2931                                             }
2932                                         }
2933                                     }
2934                                 }
2935                             }
2936                         }
2937                     }
2938 
2939                     break;
2940                 }
2941             }
2942 
2943             if (preferableBackend != DNN_BACKEND_OPENCV && preferableBackend != DNN_BACKEND_CUDA)
2944                 continue;  // Go to the next layer.
2945 
2946             // the optimization #2. if there is concat layer that concatenates channels
2947             // from the inputs together (i.e. axis == 1) then we make the inputs of
2948             // the concat layer to write to the concatenation output buffer
2949             // (and so we eliminate the concatenation layer, because the channels
2950             // are concatenated implicitly).
2951             Ptr<ConcatLayer> concatLayer = ld.layerInstance.dynamicCast<ConcatLayer>();
2952             if( !concatLayer.empty() && !concatLayer->padding && ld.outputBlobs.size() == 1 )
2953             {
2954                 Mat& output = ld.outputBlobs[0];
2955                 UMat umat_output;
2956 #ifdef HAVE_OPENCL
2957                 if (!ld.outputBlobsWrappers.empty() &&
2958                     (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget)))
2959                 {
2960                     size_t i, ninputs = ld.inputBlobsId.size();
2961                     bool conv_layer = true;
2962                     for( i = 0; i < ninputs; i++ )
2963                     {
2964                         LayerPin pin = ld.inputBlobsId[i];
2965                         LayerData* inp_i_data = &layers[pin.lid];
2966                         while(inp_i_data->skip &&
2967                               inp_i_data->inputBlobsId.size() == 1 &&
2968                               inp_i_data->consumers.size() == 1)
2969                         {
2970                             pin = inp_i_data->inputBlobsId[0];
2971                             inp_i_data = &layers[pin.lid];
2972                         }
2973                         conv_layer = conv_layer && (inp_i_data->getLayerInstance()->type == "Convolution");
2974                     }
2975                     if (!conv_layer)
2976                         continue;
2977                     std::vector<UMat> umat_outputBlobs;
2978                     umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
2979                     umat_output = umat_outputBlobs[0];
2980                 }
2981 #endif
2982 
2983                 // TODO: in general, this optimization can always be done, but
2984                 // many layers currently check that the input/output blobs are
2985                 // continuous arrays. Unfortunately, this is not true when
2986                 // the concatenation optimization is applied with batch_size > 1.
2987                 // so, for now, we only apply this optimization in the most popular
2988                 // case batch_size == 1.
2989                 int axis = normalize_axis(concatLayer->axis, output.dims);
2990                 if( output.total(0, axis) == 1 )
2991                 {
2992                     size_t i, ninputs = ld.inputBlobsId.size();
2993                     std::vector<LayerPin> realinputs(ninputs);
2994                     for( i = 0; i < ninputs; i++ )
2995                     {
2996                         LayerPin pin = ld.inputBlobsId[i];
2997                         LayerData* inp_i_data = &layers[pin.lid];
2998                         while(inp_i_data->skip &&
2999                               inp_i_data->inputBlobsId.size() == 1 &&
3000                               inp_i_data->consumers.size() == 1)
3001                         {
3002                             pin = inp_i_data->inputBlobsId[0];
3003                             inp_i_data = &layers[pin.lid];
3004                         }
3005                         printf_(("\treal input for %s is %s\n",
3006                                layers[ld.inputBlobsId[i].lid].getLayerInstance()->name.c_str(),
3007                                inp_i_data->getLayerInstance()->name.c_str()));
3008 
3009                         if(inp_i_data->skip || inp_i_data->consumers.size() != 1)
3010                             break;
3011 #ifdef HAVE_CUDA
3012                         if (preferableBackend == DNN_BACKEND_CUDA &&
3013                             (inp_i_data->layerInstance->supportBackend(DNN_BACKEND_CUDA) == false ||
3014                              (inp_i_data->layerInstance->type != "Convolution" &&
3015                               inp_i_data->layerInstance->type != "Pooling" &&
3016                               inp_i_data->layerInstance->type != "Resize"  &&
3017                               inp_i_data->layerInstance->type != "Flatten" &&
3018                               inp_i_data->layerInstance->type != "Permute" &&
3019                               inp_i_data->layerInstance->type != "Reorg" &&
3020                               inp_i_data->layerInstance->type != "Eltwise" &&
3021                               inp_i_data->layerInstance.dynamicCast<ActivationLayer>().empty())))
3022                         {
3023                             break;
3024                         }
3025 #endif
3026                         realinputs[i] = pin;
3027                     }
3028 
3029                     if( i >= ninputs )
3030                     {
3031                         // Allocate new memory to prevent collisions during memory
3032                         // reusing (see https://github.com/opencv/opencv/pull/10456).
3033                         output = output.clone();
3034 #ifdef HAVE_OPENCL
3035                         if (preferableBackend == DNN_BACKEND_OPENCV &&
3036                             IS_DNN_OPENCL_TARGET(preferableTarget))
3037                         {
3038                             std::vector<UMat> umats(1);
3039                             umat_output = umat_output.clone();
3040                             umats[0] = umat_output;
3041                             OpenCLBackendWrapper::update(ld.outputBlobsWrappers, umats);
3042                         }
3043 #endif
3044 
3045 #ifdef HAVE_CUDA
3046                         if (preferableBackend == DNN_BACKEND_CUDA)
3047                             ld.outputBlobsWrappers[0] = wrap(output);
3048 #endif
3049                         std::vector<Range> chrange(output.dims, Range::all());
3050                         int ofs = 0;
3051                         for( i = 0; i < ninputs; i++ )
3052                         {
3053                             LayerPin pin = realinputs[i];
3054                             LayerData* inp_i_data = &layers[pin.lid];
3055                             int channels_i = ld.inputBlobs[i]->size[axis];
3056                             chrange[axis] = Range(ofs, ofs + channels_i);
3057                             printf_(("\toutput %s(%d) to channels (%d, %d)\n", inp_i_data->layerInstance->name.c_str(),
3058                                    pin.oid, ofs, ofs + channels_i));
3059                             ofs += channels_i;
3060                             Mat output_slice = output(chrange);
3061                             Mat& curr_output = inp_i_data->outputBlobs[pin.oid];
3062                             CV_Assert(output_slice.isContinuous() && output_slice.size == curr_output.size);
3063                             Mat* oldPtr = &curr_output;
3064                             curr_output = output_slice;
3065 #ifdef HAVE_OPENCL
3066                             if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
3067                             {
3068                                 std::vector<UMat> umats(inp_i_data->outputBlobsWrappers.size());
3069                                 umats[pin.oid] = umat_output(chrange);
3070                                 OpenCLBackendWrapper::update(inp_i_data->outputBlobsWrappers, umats);
3071                             }
3072 #endif
3073 #ifdef HAVE_CUDA
3074                             if (preferableBackend == DNN_BACKEND_CUDA)
3075                             {
3076                                 auto cuda_wrapper = wrap(output).dynamicCast<CUDABackendWrapper>();
3077                                 auto offset = chrange[axis].start * output_slice.total(axis + 1, output.dims);
3078                                 auto new_shape = shape(output_slice);
3079                                 cuda_wrapper->update(new_shape, offset);
3080                                 inp_i_data->outputBlobsWrappers[pin.oid] = cuda_wrapper.staticCast<BackendWrapper>();
3081                             }
3082 #endif
3083                             // Layers that refer old input Mat will refer to the
3084                             // new data but the same Mat object.
3085                             CV_Assert_N(curr_output.data == output_slice.data, oldPtr == &curr_output);
3086                         }
3087 
3088 #ifdef HAVE_CUDA
3089                         if (preferableBackend == DNN_BACKEND_CUDA)
3090                         {
3091                             for (int i = 0; i < ld.consumers.size(); i++)
3092                             {
3093                                 LayerData& consumer = layers[ld.consumers[i].lid];
3094                                 for (int j = 0; j < consumer.inputBlobsId.size(); j++)
3095                                 {
3096                                     if (consumer.inputBlobsId[j].lid == ld.id)
3097                                     {
3098                                         CV_Assert(consumer.inputBlobs[j]->data == ld.outputBlobs[0].data);
3099                                         consumer.inputBlobsWrappers[j] = ld.outputBlobsWrappers[0];
3100                                         break;
3101                                     }
3102                                 }
3103                             }
3104                         }
3105 #endif
3106                         ld.skip = true;
3107                         printf_(("\toptimized out Concat layer %s\n", concatLayer->name.c_str()));
3108                     }
3109                 }
3110             }
3111         }
3112     }
3113 
allocateLayerscv::dnn::Net::Impl3114     void allocateLayers(const std::vector<LayerPin>& blobsToKeep_)
3115     {
3116         CV_TRACE_FUNCTION();
3117 
3118         MapIdToLayerData::iterator it;
3119         for (it = layers.begin(); it != layers.end(); it++)
3120             it->second.flag = 0;
3121 
3122         CV_Assert(!layers[0].outputBlobs.empty());
3123         ShapesVec inputShapes;
3124         for(int i = 0; i < layers[0].outputBlobs.size(); i++)
3125         {
3126             Mat& inp = layers[0].outputBlobs[i];
3127             CV_Assert(inp.total());
3128             if (preferableBackend == DNN_BACKEND_OPENCV &&
3129                 preferableTarget == DNN_TARGET_OPENCL_FP16)
3130             {
3131                 layers[0].outputBlobs[i].create(inp.dims, inp.size, CV_16S);
3132             }
3133             inputShapes.push_back(shape(inp));
3134         }
3135         LayersShapesMap layersShapes;
3136         getLayersShapes(inputShapes, layersShapes);
3137 
3138         blobManager.reset();
3139         backendWrappers.clear();
3140 
3141         for(auto& layer : layers)
3142         {
3143             auto& ld = layer.second;
3144             ld.inputBlobsWrappers.clear();
3145             ld.outputBlobsWrappers.clear();
3146             ld.internalBlobsWrappers.clear();
3147         }
3148 
3149         // Fake references to input blobs.
3150         for (int i = 0; i < layers[0].outputBlobs.size(); ++i)
3151             blobManager.addReference(LayerPin(0, i));
3152         for (it = layers.begin(); it != layers.end(); ++it)
3153         {
3154             const LayerData& ld = it->second;
3155             blobManager.addReferences(ld.inputBlobsId);
3156         }
3157 
3158         for (int i = 0; i < blobsToKeep_.size(); i++)
3159         {
3160             blobManager.addReference(blobsToKeep_[i]);
3161         }
3162 
3163         for (it = layers.begin(); it != layers.end(); it++)
3164         {
3165             int lid = it->first;
3166             allocateLayer(lid, layersShapes);
3167         }
3168 
3169         layersTimings.resize(lastLayerId + 1, 0);
3170         fuseLayers(blobsToKeep_);
3171     }
3172 
forwardLayercv::dnn::Net::Impl3173     void forwardLayer(LayerData &ld)
3174     {
3175         CV_TRACE_FUNCTION();
3176 
3177         Ptr<Layer> layer = ld.layerInstance;
3178 
3179         if( !ld.skip )
3180         {
3181             TickMeter tm;
3182             tm.start();
3183 
3184             std::map<int, Ptr<BackendNode> >::iterator it = ld.backendNodes.find(preferableBackend);
3185             if (preferableBackend == DNN_BACKEND_OPENCV || it == ld.backendNodes.end() || it->second.empty())
3186             {
3187                 if (isAsync)
3188                     CV_Error(Error::StsNotImplemented, "Default implementation fallbacks in asynchronous mode");
3189 
3190                 if (!layer->supportBackend(DNN_BACKEND_OPENCV))
3191                     CV_Error(Error::StsNotImplemented, format("Layer \"%s\" of type \"%s\" unsupported on OpenCV backend",
3192                                                        ld.name.c_str(), ld.type.c_str()));
3193 
3194 #ifdef HAVE_OPENCL
3195                 if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
3196                 {
3197                     std::vector<UMat> umat_inputBlobs = OpenCLBackendWrapper::getUMatVector(ld.inputBlobsWrappers);
3198                     std::vector<UMat> umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
3199                     std::vector<UMat> umat_internalBlobs = OpenCLBackendWrapper::getUMatVector(ld.internalBlobsWrappers);
3200                     layer->forward(umat_inputBlobs,
3201                                    umat_outputBlobs,
3202                                    umat_internalBlobs);
3203                     if (DNN_CHECK_NAN_INF)
3204                     {
3205                         bool fail = false;
3206                         for (size_t i = 0; i < umat_outputBlobs.size(); ++i)
3207                         {
3208                             UMat& u = umat_outputBlobs[i];
3209                             Mat m;
3210                             if (u.depth() == CV_16S) // FP16
3211                                 convertFp16(u, m);
3212                             else
3213                                 m = u.getMat(ACCESS_READ);
3214                             if (!checkRange(m))
3215                             {
3216                                 std::cerr << "WARNING: NaN detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
3217                                 std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
3218                                 fail = true;
3219                             }
3220                             else if (!checkRange(m, true, NULL, -1e6, 1e6))
3221                             {
3222                                 std::cerr << "WARNING: Inf detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
3223                                 std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
3224                                 fail = true;
3225                             }
3226                         }
3227                         if (fail)
3228                         {
3229                             for (size_t i = 0; i < umat_inputBlobs.size(); ++i)
3230                             {
3231                                 UMat& u = umat_inputBlobs[i];
3232                                 Mat m;
3233                                 if (u.depth() == CV_16S) // FP16
3234                                     convertFp16(u, m);
3235                                 else
3236                                     m = u.getMat(ACCESS_READ);
3237                                 std::cout << "INPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl;
3238                                 if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
3239                             }
3240                             for (size_t i = 0; i < umat_outputBlobs.size(); ++i)
3241                             {
3242                                 UMat& u = umat_outputBlobs[i];
3243                                 Mat m;
3244                                 if (u.depth() == CV_16S) // FP16
3245                                     convertFp16(u, m);
3246                                 else
3247                                     m = u.getMat(ACCESS_READ);
3248                                 std::cout << "OUTPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl;
3249                                 if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
3250                             }
3251                             for (size_t i = 0; i < umat_internalBlobs.size(); ++i)
3252                             {
3253                                 UMat& u = umat_internalBlobs[i];
3254                                 Mat m;
3255                                 if (u.depth() == CV_16S) // FP16
3256                                     convertFp16(u, m);
3257                                 else
3258                                     m = u.getMat(ACCESS_READ);
3259                                 std::cout << "INTERNAL " << i << " " << shape(m) << std::endl;
3260                                 if (DNN_CHECK_NAN_INF_DUMP) std::cout << cv::typeToString(u.type()) << " " << m.reshape(1, 1) << std::endl;
3261                             }
3262                             if (DNN_CHECK_NAN_INF_RAISE_ERROR)
3263                                 CV_Assert(!fail);
3264                         }
3265                     }
3266                     OpenCLBackendWrapper::update(ld.outputBlobsWrappers, umat_outputBlobs);
3267                 }
3268                 else
3269 #endif
3270                 {
3271                     for (int i = 0, n = ld.inputBlobsWrappers.size(); i < n; ++i)
3272                     {
3273                         if (!ld.inputBlobsWrappers[i].empty())
3274                             ld.inputBlobsWrappers[i]->copyToHost();
3275                     }
3276 
3277                     std::vector<Mat> inps(ld.inputBlobs.size());
3278                     for (int i = 0; i < ld.inputBlobs.size(); ++i)
3279                     {
3280                         inps[i] = *ld.inputBlobs[i];
3281                     }
3282                     layer->forward(inps, ld.outputBlobs, ld.internals);
3283 
3284                     if (DNN_CHECK_NAN_INF)
3285                     {
3286                         bool fail = false;
3287                         for (size_t i = 0; i < ld.outputBlobs.size(); ++i)
3288                         {
3289                             const Mat& m = ld.outputBlobs[i];
3290                             if (!checkRange(m))
3291                             {
3292                                 std::cerr << "WARNING: NaN detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
3293                                 std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
3294                                 fail = true;
3295                             }
3296                             else if (!checkRange(m, true, NULL, -1e6, 1e6))
3297                             {
3298                                 std::cerr << "WARNING: Inf detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
3299                                 std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
3300                                 fail = true;
3301                             }
3302                         }
3303                         if (fail)
3304                         {
3305                             for (size_t i = 0; i < ld.inputBlobs.size(); ++i)
3306                             {
3307                                 const Mat* pM = ld.inputBlobs[i];
3308                                 if (!pM)
3309                                 {
3310                                     std::cout << "INPUT " << i << " is NULL" << std::endl;
3311                                     continue;
3312                                 }
3313                                 const Mat& m = *pM;
3314                                 std::cout << "INPUT " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl;
3315                                 if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
3316                             }
3317                             for (size_t i = 0; i < ld.outputBlobs.size(); ++i)
3318                             {
3319                                 const Mat& m = ld.outputBlobs[i];
3320                                 std::cout << "OUTPUT " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl;
3321                                 if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
3322                             }
3323                             for (size_t i = 0; i < ld.internals.size(); ++i)
3324                             {
3325                                 const Mat& m = ld.internals[i];
3326                                 std::cout << "INTERNAL " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl;
3327                                 if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
3328                             }
3329                             if (DNN_CHECK_NAN_INF_RAISE_ERROR)
3330                                 CV_Assert(!fail);
3331                         }
3332                     }
3333 
3334                     for (int i = 0, n = ld.outputBlobsWrappers.size(); i < n; ++i)
3335                     {
3336                         if (!ld.outputBlobsWrappers[i].empty())
3337                             ld.outputBlobsWrappers[i]->setHostDirty();
3338                     }
3339                 }
3340             }
3341             else
3342             {
3343                 Ptr<BackendNode> node = it->second;
3344                 CV_Assert(!node.empty());
3345                 if (preferableBackend == DNN_BACKEND_CUDA)
3346                 {
3347                     CV_Assert(haveCUDA());
3348 
3349 #ifdef HAVE_CUDA
3350                     Ptr<CUDABackendNode> cudaNode = node.dynamicCast<CUDABackendNode>();
3351                     CV_Assert(!cudaNode.empty());
3352 
3353                     cudaNode->forward(ld.inputBlobsWrappers, ld.outputBlobsWrappers, cudaInfo->workspace);
3354 
3355                     for (auto id : ld.cudaD2HBackgroundTransfers)
3356                     {
3357                         auto wrapper = ld.outputBlobsWrappers[id].dynamicCast<CUDABackendWrapper>();
3358                         wrapper->copyToHostInBackground();
3359                     }
3360 #endif
3361                 }
3362                 else if (preferableBackend == DNN_BACKEND_HALIDE)
3363                 {
3364                     forwardHalide(ld.outputBlobsWrappers, node);
3365                 }
3366                 else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
3367                 {
3368                     forwardInfEngine(ld.outputBlobsWrappers, node, isAsync);
3369                 }
3370                 else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
3371                 {
3372                     forwardNgraph(ld.outputBlobsWrappers, node, isAsync);
3373                 }
3374                 else if (preferableBackend == DNN_BACKEND_VKCOM)
3375                 {
3376                     try
3377                     {
3378                         forwardVkCom(ld.outputBlobsWrappers, node);
3379                     }
3380                     catch (const cv::Exception& e)
3381                     {
3382                         CV_LOG_ERROR(NULL, "forwardVkCom failed, fallback to CPU implementation. " << e.what());
3383                         it->second = Ptr<BackendNode>();
3384                         forwardLayer(ld);
3385                     }
3386                 }
3387                 else
3388                 {
3389                     CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
3390                 }
3391             }
3392 
3393             tm.stop();
3394             int64 t = tm.getTimeTicks();
3395             layersTimings[ld.id] = (t > 0) ? t : t + 1;  // zero for skipped layers only
3396         }
3397         else
3398         {
3399             layersTimings[ld.id] = 0;
3400         }
3401 
3402         ld.flag = 1;
3403     }
3404 
forwardToLayercv::dnn::Net::Impl3405     void forwardToLayer(LayerData &ld, bool clearFlags = true)
3406     {
3407         CV_TRACE_FUNCTION();
3408 
3409         if (clearFlags)
3410         {
3411             MapIdToLayerData::iterator it;
3412             for (it = layers.begin(); it != layers.end(); it++)
3413                 it->second.flag = 0;
3414         }
3415 
3416         //already was forwarded
3417         if (ld.flag)
3418             return;
3419 
3420         //forward parents
3421         MapIdToLayerData::iterator it;
3422         for (it = layers.begin(); it != layers.end() && (it->second.id < ld.id); ++it)
3423         {
3424             LayerData &ld = it->second;
3425             if (ld.flag)
3426                 continue;
3427             forwardLayer(ld);
3428         }
3429 
3430         //forward itself
3431         forwardLayer(ld);
3432 
3433 #ifdef HAVE_CUDA
3434         if (preferableBackend == DNN_BACKEND_CUDA)
3435             cudaInfo->context.stream.synchronize();
3436 #endif
3437     }
3438 
getLayerShapesRecursivelycv::dnn::Net::Impl3439     void getLayerShapesRecursively(int id, LayersShapesMap& inOutShapes)
3440     {
3441         std::vector<LayerPin>& inputLayerIds = layers[id].inputBlobsId;
3442 
3443         if (id == 0 && inOutShapes[id].in[0].empty())
3444         {
3445             if (!layers[0].outputBlobs.empty())
3446             {
3447                 ShapesVec shapes;
3448                 for (int i = 0; i < layers[0].outputBlobs.size(); i++)
3449                 {
3450                     Mat& inp = layers[0].outputBlobs[i];
3451                     CV_Assert(inp.total());
3452                     shapes.push_back(shape(inp));
3453                 }
3454                 inOutShapes[0].in = shapes;
3455             }
3456             else
3457             {
3458                 const std::vector<MatShape>& inputShapes = netInputLayer->shapes;
3459                 bool none = true;
3460                 for (size_t i = 0; i < inputShapes.size(); i++)
3461                 {
3462                     if (!inputShapes[i].empty())
3463                     {
3464                         none = false;
3465                         break;
3466                     }
3467                 }
3468                 if (none)
3469                 {
3470                     inOutShapes[0].out.clear();
3471                     return;
3472                 }
3473                 else
3474                 {
3475                     inOutShapes[0].in = inputShapes;
3476                 }
3477             }
3478         }
3479 
3480         if (inOutShapes[id].in.empty())
3481         {
3482             for(int i = 0; i < inputLayerIds.size(); i++)
3483             {
3484                 int layerId = inputLayerIds[i].lid;
3485                 LayersShapesMap::iterator it =
3486                         inOutShapes.find(layerId);
3487                 if(it == inOutShapes.end() ||
3488                         it->second.out.empty())
3489                 {
3490                     getLayerShapesRecursively(layerId, inOutShapes);
3491                 }
3492                 const MatShape& shape = inOutShapes[layerId].out[inputLayerIds[i].oid];
3493                 inOutShapes[id].in.push_back(shape);
3494             }
3495         }
3496         const ShapesVec& is = inOutShapes[id].in;
3497         ShapesVec& os = inOutShapes[id].out;
3498         ShapesVec& ints = inOutShapes[id].internal;
3499         int requiredOutputs = layers[id].requiredOutputs.size();
3500         Ptr<Layer> l = layers[id].getLayerInstance();
3501         CV_Assert(l);
3502         bool layerSupportInPlace = false;
3503         try
3504         {
3505             layerSupportInPlace = l->getMemoryShapes(is, requiredOutputs, os, ints);
3506         }
3507         catch (const cv::Exception& e)
3508         {
3509             CV_LOG_ERROR(NULL, "OPENCV/DNN: [" << l->type << "]:(" << l->name << "): getMemoryShapes() throws exception." <<
3510                     " inputs=" << is.size() <<
3511                     " outputs=" << os.size() << "/" << requiredOutputs <<
3512                     " blobs=" << l->blobs.size());
3513             for (size_t i = 0; i < is.size(); ++i)
3514             {
3515                 CV_LOG_ERROR(NULL, "    input[" << i << "] = " << toString(is[i]));
3516             }
3517             for (size_t i = 0; i < os.size(); ++i)
3518             {
3519                 CV_LOG_ERROR(NULL, "    output[" << i << "] = " << toString(os[i]));
3520             }
3521             for (size_t i = 0; i < l->blobs.size(); ++i)
3522             {
3523                 CV_LOG_ERROR(NULL, "    blobs[" << i << "] = " << typeToString(l->blobs[i].type()) << " " << toString(shape(l->blobs[i])));
3524             }
3525             CV_LOG_ERROR(NULL, "Exception message: " << e.what());
3526             throw;
3527         }
3528         inOutShapes[id].supportInPlace = layerSupportInPlace;
3529 
3530         for (int i = 0; i < ints.size(); i++)
3531             CV_Assert(total(ints[i]) > 0);
3532 
3533         for (int i = 0; i < os.size(); i++)
3534             CV_Assert(total(os[i]) > 0);
3535     }
3536 
getLayersShapescv::dnn::Net::Impl3537     void getLayersShapes(const ShapesVec& netInputShapes,
3538                          LayersShapesMap& inOutShapes)
3539     {
3540         inOutShapes.clear();
3541 
3542         inOutShapes[0].in = netInputShapes; //insert shape for first input layer
3543         for (MapIdToLayerData::iterator it = layers.begin();
3544              it != layers.end(); it++)
3545         {
3546             getLayerShapesRecursively(it->first, inOutShapes);
3547         }
3548     }
3549 
getLayerShapescv::dnn::Net::Impl3550     void getLayerShapes(const ShapesVec& netInputShapes,
3551                         const int layerId,
3552                         LayerShapes& shapes)
3553     {
3554         LayersShapesMap inOutShapes;
3555         inOutShapes[0].in = netInputShapes; //insert shape for first input layer
3556         getLayerShapesRecursively(layerId, inOutShapes);
3557         shapes = inOutShapes[layerId];
3558     }
3559 
updateLayersShapescv::dnn::Net::Impl3560     void updateLayersShapes()
3561     {
3562         CV_Assert(!layers[0].outputBlobs.empty());
3563         ShapesVec inputShapes;
3564         for(int i = 0; i < layers[0].outputBlobs.size(); i++)
3565         {
3566             Mat& inp = layers[0].outputBlobs[i];
3567             CV_Assert(inp.total());
3568             if (preferableBackend == DNN_BACKEND_OPENCV &&
3569                 preferableTarget == DNN_TARGET_OPENCL_FP16)
3570             {
3571                 layers[0].outputBlobs[i].create(inp.dims, inp.size, CV_16S);
3572             }
3573             inputShapes.push_back(shape(inp));
3574         }
3575         LayersShapesMap layersShapes;
3576         layersShapes[0].in = inputShapes;
3577         for (MapIdToLayerData::iterator it = layers.begin();
3578              it != layers.end(); it++)
3579         {
3580             int layerId = it->first;
3581             std::vector<LayerPin>& inputLayerIds = it->second.inputBlobsId;
3582             if (layersShapes[layerId].in.empty())
3583             {
3584                 for(int i = 0; i < inputLayerIds.size(); i++)
3585                 {
3586                     int inputLayerId = inputLayerIds[i].lid;
3587                     LayersShapesMap::iterator inputIt = layersShapes.find(inputLayerId);
3588                     if(inputIt == layersShapes.end() || inputIt->second.out.empty())
3589                     {
3590                         getLayerShapesRecursively(inputLayerId, layersShapes);
3591                     }
3592                     const MatShape& shape = layersShapes[inputLayerId].out[inputLayerIds[i].oid];
3593                     layersShapes[layerId].in.push_back(shape);
3594                 }
3595                 it->second.layerInstance->updateMemoryShapes(layersShapes[layerId].in);
3596             }
3597         }
3598     }
3599 
getLatestLayerPincv::dnn::Net::Impl3600     LayerPin getLatestLayerPin(const std::vector<LayerPin>& pins)
3601     {
3602         return *std::max_element(pins.begin(), pins.end());
3603     }
3604 
getBlobcv::dnn::Net::Impl3605     Mat getBlob(const LayerPin& pin)
3606     {
3607         CV_TRACE_FUNCTION();
3608 
3609         if (!pin.valid())
3610             CV_Error(Error::StsObjectNotFound, "Requested blob not found");
3611 
3612         LayerData &ld = layers[pin.lid];
3613         if ((size_t)pin.oid >= ld.outputBlobs.size())
3614         {
3615             CV_Error(Error::StsOutOfRange, format("Layer \"%s\" produce only %zu outputs, "
3616                                            "the #%d was requested", ld.name.c_str(),
3617                                            ld.outputBlobs.size(), pin.oid));
3618         }
3619         if (preferableTarget != DNN_TARGET_CPU)
3620         {
3621             CV_Assert(!ld.outputBlobsWrappers.empty() && !ld.outputBlobsWrappers[pin.oid].empty());
3622             // Transfer data to CPU if it's require.
3623             ld.outputBlobsWrappers[pin.oid]->copyToHost();
3624         }
3625 
3626         if (ld.outputBlobs[pin.oid].depth() == CV_16S)
3627         {
3628             convertFp16(ld.outputBlobs[pin.oid], output_blob);
3629             return output_blob;
3630         }
3631         else
3632             return ld.outputBlobs[pin.oid];
3633     }
3634 
getBlobcv::dnn::Net::Impl3635     Mat getBlob(String outputName)
3636     {
3637         return getBlob(getPinByAlias(outputName));
3638     }
3639 
3640 #ifdef CV_CXX11
getBlobAsynccv::dnn::Net::Impl3641     AsyncArray getBlobAsync(const LayerPin& pin)
3642     {
3643         CV_TRACE_FUNCTION();
3644 #ifdef HAVE_INF_ENGINE
3645         if (!pin.valid())
3646             CV_Error(Error::StsObjectNotFound, "Requested blob not found");
3647 
3648         LayerData &ld = layers[pin.lid];
3649         if ((size_t)pin.oid >= ld.outputBlobs.size())
3650         {
3651             CV_Error(Error::StsOutOfRange, format("Layer \"%s\" produce only %d outputs, "
3652                                            "the #%d was requested", ld.name.c_str(),
3653                                            (int)ld.outputBlobs.size(), (int)pin.oid));
3654         }
3655         if (preferableTarget != DNN_TARGET_CPU)
3656         {
3657             CV_Assert(!ld.outputBlobsWrappers.empty() && !ld.outputBlobsWrappers[pin.oid].empty());
3658             // Transfer data to CPU if it's require.
3659             ld.outputBlobsWrappers[pin.oid]->copyToHost();
3660         }
3661         CV_Assert(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH);
3662 
3663         if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) {
3664 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
3665             Ptr<InfEngineBackendWrapper> wrapper = ld.outputBlobsWrappers[pin.oid].dynamicCast<InfEngineBackendWrapper>();
3666             return std::move(wrapper->futureMat);
3667 #else
3668             CV_Error(Error::StsNotImplemented, "This OpenCV version is built without Inference Engine NN Builder API support");
3669 #endif
3670         }
3671         else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
3672         {
3673 #ifdef HAVE_DNN_NGRAPH
3674             Ptr<NgraphBackendWrapper> wrapper = ld.outputBlobsWrappers[pin.oid].dynamicCast<NgraphBackendWrapper>();
3675             return std::move(wrapper->futureMat);
3676 #else
3677             CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of Inference Engine + nGraph");
3678 #endif
3679         }
3680 #endif  // HAVE_INF_ENGINE
3681         CV_Error(Error::StsNotImplemented, "DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 backend is required");
3682     }
3683 
getBlobAsynccv::dnn::Net::Impl3684     AsyncArray getBlobAsync(String outputName)
3685     {
3686         return getBlobAsync(getPinByAlias(outputName));
3687     }
3688 #endif  // CV_CXX11
3689 
3690 #ifdef HAVE_INF_ENGINE
3691     static
3692     Net createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNet);
3693 #endif
3694 
3695     string dump();
3696 
dumpNetworkToFilecv::dnn::Net::Impl3697     void dumpNetworkToFile()
3698     {
3699 #ifndef OPENCV_DNN_DISABLE_NETWORK_AUTO_DUMP
3700         string dumpFileNameBase = getDumpFileNameBase();
3701         string dumpFileName = dumpFileNameBase + ".dot";
3702         try
3703         {
3704             string dumpStr = dump();
3705             std::ofstream out(dumpFileName.c_str(), std::ios::out | std::ios::binary);
3706             out << dumpStr;
3707         }
3708         catch (const std::exception& e)
3709         {
3710             std::ofstream out((dumpFileName + ".error").c_str(), std::ios::out);
3711             out << "Exception: " << e.what() << std::endl;
3712         }
3713         catch (...)
3714         {
3715             std::ofstream out((dumpFileName + ".error").c_str(), std::ios::out);
3716             out << "Can't dump: unknown exception" << std::endl;
3717         }
3718 #endif
3719     }
3720 };
3721 
Net()3722 Net::Net() : impl(new Net::Impl)
3723 {
3724 }
3725 
3726 #ifdef HAVE_INF_ENGINE
3727 /*static*/
createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork & ieNet)3728 Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNet)
3729 {
3730     CV_TRACE_FUNCTION();
3731 
3732     CV_TRACE_REGION("register_inputs");
3733 
3734     std::vector<String> inputsNames;
3735     std::vector<MatShape> inp_shapes;
3736     for (auto& it : ieNet.getInputsInfo())
3737     {
3738         inputsNames.push_back(it.first);
3739         std::vector<size_t> dims = it.second->getTensorDesc().getDims();
3740         inp_shapes.push_back(std::vector<int>(dims.begin(), dims.end()));
3741     }
3742 
3743     Net cvNet;
3744     cvNet.setInputsNames(inputsNames);
3745 
3746     // set empty input to determine input shapes
3747     for (int inp_id = 0; inp_id < inputsNames.size(); ++inp_id)
3748     {
3749         cvNet.setInputShape(inputsNames[inp_id], inp_shapes[inp_id]);
3750     }
3751 
3752     CV_TRACE_REGION_NEXT("backendNode");
3753 
3754     Ptr<BackendNode> backendNode;
3755 #ifdef HAVE_DNN_NGRAPH
3756     if (DNN_BACKEND_INFERENCE_ENGINE_NGRAPH == getInferenceEngineBackendTypeParam())
3757     {
3758         auto fake_node = std::make_shared<ngraph::op::Parameter>(ngraph::element::f32, ngraph::Shape{});
3759         Ptr<InfEngineNgraphNode> backendNodeNGraph(new InfEngineNgraphNode(fake_node));
3760         backendNodeNGraph->net = Ptr<InfEngineNgraphNet>(new InfEngineNgraphNet(*(cvNet.impl), ieNet));
3761         backendNode = backendNodeNGraph;
3762     }
3763     else
3764 #endif
3765     {
3766 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
3767         Ptr<InfEngineBackendNode> backendNodeNN(new InfEngineBackendNode(InferenceEngine::Builder::Layer("")));
3768         backendNodeNN->net = Ptr<InfEngineBackendNet>(new InfEngineBackendNet(ieNet));
3769         backendNode = backendNodeNN;
3770 #else
3771         CV_Error(Error::StsNotImplemented, "This OpenCV version is built without Inference Engine NN Builder API support");
3772 #endif
3773     }
3774 
3775     CV_TRACE_REGION_NEXT("register_outputs");
3776 
3777 #ifdef HAVE_DNN_NGRAPH
3778     auto ngraphFunction = ieNet.getFunction();
3779 #if INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2020_2)
3780     std::list< std::shared_ptr<ngraph::Node> > ngraphOperations;
3781 #else
3782     std::vector< std::shared_ptr<ngraph::Node> > ngraphOperations;
3783 #endif
3784     if (ngraphFunction)
3785     {
3786         ngraphOperations = ngraphFunction->get_ops();
3787     }
3788 #endif
3789 
3790     for (auto& it : ieNet.getOutputsInfo())
3791     {
3792         CV_TRACE_REGION("output");
3793         const auto& outputName = it.first;
3794 
3795         LayerParams lp;
3796         int lid = cvNet.addLayer(it.first, "", lp);
3797 
3798         LayerData& ld = cvNet.impl->layers[lid];
3799 
3800 #ifdef HAVE_DNN_NGRAPH
3801         if (DNN_BACKEND_INFERENCE_ENGINE_NGRAPH == getInferenceEngineBackendTypeParam())
3802         {
3803             Ptr<Layer> cvLayer(new NgraphBackendLayer(ieNet));
3804             cvLayer->name = outputName;
3805             cvLayer->type = "_unknown_";
3806 
3807             auto process_layer = [&](const std::string& name) -> bool
3808             {
3809                 if (ngraphFunction)
3810                 {
3811                     CV_TRACE_REGION("ngraph_function");
3812                     for (const auto& op : ngraphOperations)
3813                     {
3814                         CV_Assert(op);
3815                         if (op->get_friendly_name() == name)
3816                         {
3817                             const std::string typeName = op->get_type_info().name;
3818                             cvLayer->type = typeName;
3819                             return true;
3820                         }
3821                     }
3822                     return false;
3823                 }
3824                 else
3825                 {
3826 #if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
3827                     CV_Error(Error::StsNotImplemented, "This OpenCV version is built with Inference Engine which has dropped IR v7 support");
3828 #else
3829                     CV_TRACE_REGION("legacy_cnn_layer");
3830                     try
3831                     {
3832                         InferenceEngine::CNNLayerPtr ieLayer = ieNet.getLayerByName(name.c_str());
3833                         CV_Assert(ieLayer);
3834 
3835                         cvLayer->type = ieLayer->type;
3836                         return true;
3837                     }
3838                     catch (const std::exception& e)
3839                     {
3840                         CV_UNUSED(e);
3841                         CV_LOG_DEBUG(NULL, "IE layer extraction failure: '" << name << "' - " << e.what());
3842                         return false;
3843                     }
3844 #endif
3845 
3846                 }
3847             };
3848 
3849             bool found = process_layer(outputName);
3850             if (!found)
3851             {
3852                 auto pos = outputName.rfind('.');  // cut port number: ".0"
3853                 if (pos != std::string::npos)
3854                 {
3855                     std::string layerName = outputName.substr(0, pos);
3856                     found = process_layer(layerName);
3857                 }
3858             }
3859             if (!found)
3860                 CV_LOG_WARNING(NULL, "DNN/IE: Can't determine output layer type: '" << outputName << "'");
3861 
3862             ld.layerInstance = cvLayer;
3863             ld.backendNodes[DNN_BACKEND_INFERENCE_ENGINE_NGRAPH] = backendNode;
3864         }
3865         else
3866 #endif
3867         {
3868 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
3869             Ptr<Layer> cvLayer(new InfEngineBackendLayer(ieNet));
3870 
3871             InferenceEngine::CNNLayerPtr ieLayer;
3872             try
3873             {
3874                 ieLayer = ieNet.getLayerByName(outputName.c_str());
3875             }
3876             catch (...)
3877             {
3878                 auto pos = outputName.rfind('.');  // cut port number: ".0"
3879                 if (pos != std::string::npos)
3880                 {
3881                     std::string layerName = outputName.substr(0, pos);
3882                     ieLayer = ieNet.getLayerByName(layerName.c_str());
3883                 }
3884             }
3885             CV_Assert(ieLayer);
3886 
3887             cvLayer->name = outputName;
3888             cvLayer->type = ieLayer->type;
3889             ld.layerInstance = cvLayer;
3890 
3891             ld.backendNodes[DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019] = backendNode;
3892 #else
3893             CV_Error(Error::StsNotImplemented, "This OpenCV version is built without Inference Engine NN Builder API support");
3894 #endif
3895         }
3896 
3897         for (int i = 0; i < inputsNames.size(); ++i)
3898             cvNet.connect(0, i, lid, i);
3899     }
3900 
3901     CV_TRACE_REGION_NEXT("finalize");
3902 
3903     cvNet.setPreferableBackend(getInferenceEngineBackendTypeParam());
3904 
3905     cvNet.impl->skipInfEngineInit = true;
3906     return cvNet;
3907 }
3908 #endif  // HAVE_INF_ENGINE
3909 
readFromModelOptimizer(const String & xml,const String & bin)3910 Net Net::readFromModelOptimizer(const String& xml, const String& bin)
3911 {
3912     CV_TRACE_FUNCTION();
3913 #ifndef HAVE_INF_ENGINE
3914     CV_UNUSED(xml); CV_UNUSED(bin);
3915     CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer.");
3916 #else
3917 #if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R3)
3918     InferenceEngine::CNNNetReader reader;
3919     reader.ReadNetwork(xml);
3920     reader.ReadWeights(bin);
3921 
3922     InferenceEngine::CNNNetwork ieNet = reader.getNetwork();
3923 #else
3924     InferenceEngine::Core& ie = getCore("");
3925     InferenceEngine::CNNNetwork ieNet = ie.ReadNetwork(xml, bin);
3926 #endif
3927 
3928     return Impl::createNetworkFromModelOptimizer(ieNet);
3929 #endif  // HAVE_INF_ENGINE
3930 }
3931 
readFromModelOptimizer(const std::vector<uchar> & bufferModelConfig,const std::vector<uchar> & bufferWeights)3932 Net Net::readFromModelOptimizer(const std::vector<uchar>& bufferModelConfig, const std::vector<uchar>& bufferWeights)
3933 {
3934     CV_TRACE_FUNCTION();
3935     CV_Assert(!bufferModelConfig.empty());
3936     CV_Assert(!bufferWeights.empty());
3937     return readFromModelOptimizer(bufferModelConfig.data(), bufferModelConfig.size(),
3938                                            bufferWeights.data(), bufferWeights.size());
3939 }
3940 
readFromModelOptimizer(const uchar * bufferModelConfigPtr,size_t bufferModelConfigSize,const uchar * bufferWeightsPtr,size_t bufferWeightsSize)3941 Net Net::readFromModelOptimizer(
3942         const uchar* bufferModelConfigPtr, size_t bufferModelConfigSize,
3943         const uchar* bufferWeightsPtr, size_t bufferWeightsSize
3944 )
3945 {
3946     CV_TRACE_FUNCTION();
3947 #ifndef HAVE_INF_ENGINE
3948     CV_UNUSED(bufferModelConfigPtr); CV_UNUSED(bufferWeightsPtr);
3949     CV_UNUSED(bufferModelConfigSize); CV_UNUSED(bufferModelConfigSize);
3950     CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer.");
3951 #else
3952 
3953 #if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R3)
3954     InferenceEngine::CNNNetReader reader;
3955 
3956     try
3957     {
3958         reader.ReadNetwork(bufferModelConfigPtr, bufferModelConfigSize);
3959 
3960         InferenceEngine::TensorDesc tensorDesc(InferenceEngine::Precision::U8, { bufferWeightsSize }, InferenceEngine::Layout::C);
3961         InferenceEngine::TBlob<uint8_t>::Ptr weightsBlobPtr(new InferenceEngine::TBlob<uint8_t>(tensorDesc));
3962         weightsBlobPtr->allocate();
3963         std::memcpy(weightsBlobPtr->buffer(), (uchar*)bufferWeightsPtr, bufferWeightsSize);
3964         reader.SetWeights(weightsBlobPtr);
3965     }
3966     catch (const std::exception& e)
3967     {
3968         CV_Error(Error::StsError, std::string("DNN: IE failed to load model: ") + e.what());
3969     }
3970 
3971     InferenceEngine::CNNNetwork ieNet = reader.getNetwork();
3972 #else
3973     InferenceEngine::Core& ie = getCore("");
3974 
3975     std::string model; model.assign((char*)bufferModelConfigPtr, bufferModelConfigSize);
3976 
3977     InferenceEngine::CNNNetwork ieNet;
3978     try
3979     {
3980         InferenceEngine::TensorDesc tensorDesc(InferenceEngine::Precision::U8, { bufferWeightsSize }, InferenceEngine::Layout::C);
3981         InferenceEngine::Blob::CPtr weights_blob = InferenceEngine::make_shared_blob<uint8_t>(tensorDesc, (uint8_t*)bufferWeightsPtr, bufferWeightsSize);
3982 
3983         ieNet = ie.ReadNetwork(model, weights_blob);
3984     }
3985     catch (const std::exception& e)
3986     {
3987         CV_Error(Error::StsError, std::string("DNN: IE failed to load model: ") + e.what());
3988     }
3989 #endif
3990 
3991     return Impl::createNetworkFromModelOptimizer(ieNet);
3992 #endif  // HAVE_INF_ENGINE
3993 }
3994 
3995 
~Net()3996 Net::~Net()
3997 {
3998 }
3999 
addLayer(const String & name,const String & type,LayerParams & params)4000 int Net::addLayer(const String &name, const String &type, LayerParams &params)
4001 {
4002     CV_TRACE_FUNCTION();
4003 
4004     if (impl->getLayerId(name) >= 0)
4005     {
4006         CV_Error(Error::StsBadArg, "Layer \"" + name + "\" already into net");
4007         return -1;
4008     }
4009 
4010     int id = ++impl->lastLayerId;
4011     impl->layerNameToId.insert(std::make_pair(name, id));
4012     impl->layers.insert(std::make_pair(id, LayerData(id, name, type, params)));
4013     if (params.get<bool>("has_dynamic_shapes", false))
4014         impl->hasDynamicShapes = true;
4015 
4016     return id;
4017 }
4018 
addLayerToPrev(const String & name,const String & type,LayerParams & params)4019 int Net::addLayerToPrev(const String &name, const String &type, LayerParams &params)
4020 {
4021     CV_TRACE_FUNCTION();
4022 
4023     int prvLid = impl->lastLayerId;
4024     int newLid = this->addLayer(name, type, params);
4025     this->connect(prvLid, 0, newLid, 0);
4026     return newLid;
4027 }
4028 
connect(int outLayerId,int outNum,int inpLayerId,int inpNum)4029 void Net::connect(int outLayerId, int outNum, int inpLayerId, int inpNum)
4030 {
4031     CV_TRACE_FUNCTION();
4032 
4033     impl->connect(outLayerId, outNum, inpLayerId, inpNum);
4034 }
4035 
connect(String _outPin,String _inPin)4036 void Net::connect(String _outPin, String _inPin)
4037 {
4038     CV_TRACE_FUNCTION();
4039 
4040     LayerPin outPin = impl->getPinByAlias(_outPin);
4041     LayerPin inpPin = impl->getPinByAlias(_inPin);
4042 
4043     CV_Assert(outPin.valid() && inpPin.valid());
4044 
4045     impl->connect(outPin.lid, outPin.oid, inpPin.lid, inpPin.oid);
4046 }
4047 
forward(const String & outputName)4048 Mat Net::forward(const String& outputName)
4049 {
4050     CV_TRACE_FUNCTION();
4051     CV_Assert(!empty());
4052 
4053     String layerName = outputName;
4054 
4055     if (layerName.empty())
4056     {
4057         std::vector<String> layerNames = getLayerNames();
4058         CV_Assert(!layerNames.empty());
4059         layerName = layerNames.back();
4060     }
4061 
4062     std::vector<LayerPin> pins(1, impl->getPinByAlias(layerName));
4063     impl->setUpNet(pins);
4064     impl->forwardToLayer(impl->getLayerData(layerName));
4065 
4066     return impl->getBlob(layerName);
4067 }
4068 
forwardAsync(const String & outputName)4069 AsyncArray Net::forwardAsync(const String& outputName)
4070 {
4071     CV_TRACE_FUNCTION();
4072     CV_Assert(!empty());
4073 
4074 #ifdef CV_CXX11
4075     String layerName = outputName;
4076 
4077     if (layerName.empty())
4078     {
4079         std::vector<String> layerNames = getLayerNames();
4080         CV_Assert(!layerNames.empty());
4081         layerName = layerNames.back();
4082     }
4083 
4084     std::vector<LayerPin> pins(1, impl->getPinByAlias(layerName));
4085     impl->setUpNet(pins);
4086 
4087     if (!(impl->preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || impl->preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH))
4088         CV_Error(Error::StsNotImplemented, "DNN: Asynchronous forward is supported for Inference Engine backends only");
4089 
4090     impl->isAsync = true;
4091     impl->forwardToLayer(impl->getLayerData(layerName));
4092     impl->isAsync = false;
4093 
4094     return impl->getBlobAsync(layerName);
4095 #else
4096     CV_Error(Error::StsNotImplemented, "DNN: Asynchronous forward requires build with enabled C++11");
4097 #endif  // CV_CXX11
4098 }
4099 
forward(OutputArrayOfArrays outputBlobs,const String & outputName)4100 void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName)
4101 {
4102     CV_TRACE_FUNCTION();
4103     CV_Assert(!empty());
4104 
4105     String layerName = outputName;
4106 
4107     if (layerName.empty())
4108     {
4109         std::vector<String> layerNames = getLayerNames();
4110         CV_Assert(!layerNames.empty());
4111         layerName = layerNames.back();
4112     }
4113 
4114     std::vector<LayerPin> pins(1, impl->getPinByAlias(layerName));
4115     impl->setUpNet(pins);
4116     impl->forwardToLayer(impl->getLayerData(layerName));
4117 
4118     LayerPin pin = impl->getPinByAlias(layerName);
4119     LayerData &ld = impl->layers[pin.lid];
4120 
4121     if (outputBlobs.isUMat())
4122     {
4123         impl->getBlob(layerName).copyTo(outputBlobs);
4124     }
4125     else if (outputBlobs.isMat())
4126     {
4127         outputBlobs.assign(impl->getBlob(layerName));
4128     }
4129     else if (outputBlobs.isMatVector())
4130     {
4131         if (impl->preferableTarget != DNN_TARGET_CPU)
4132         {
4133             for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
4134             {
4135                 CV_Assert(!ld.outputBlobsWrappers[i].empty());
4136                 ld.outputBlobsWrappers[i]->copyToHost();
4137             }
4138         }
4139         if (ld.outputBlobs[0].depth() == CV_32F)
4140         {
4141             std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj();
4142             outputvec = ld.outputBlobs;
4143         } else {
4144             std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj();
4145             outputvec.resize(ld.outputBlobs.size());
4146             for (int i = 0; i < outputvec.size(); i++)
4147                 convertFp16(ld.outputBlobs[i], outputvec[i]);
4148         }
4149     }
4150     else if (outputBlobs.isUMatVector())
4151     {
4152         std::vector<UMat> & outputvec = *(std::vector<UMat> *)outputBlobs.getObj();
4153 
4154 #ifdef HAVE_OPENCL
4155         if (impl->preferableBackend == DNN_BACKEND_OPENCV &&
4156             IS_DNN_OPENCL_TARGET(impl->preferableTarget))
4157         {
4158             if (impl->preferableTarget == DNN_TARGET_OPENCL)
4159                 outputvec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
4160             else if (impl->preferableTarget == DNN_TARGET_OPENCL_FP16)
4161             {
4162                 std::vector<UMat> out_vec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
4163                 outputvec.resize(out_vec.size());
4164                 for (int i = 0; i < out_vec.size(); i++)
4165                     convertFp16(out_vec[i], outputvec[i]);
4166             }
4167         }
4168         else
4169 #endif
4170         {
4171             outputvec.resize(ld.outputBlobs.size());
4172             for (int i = 0; i < outputvec.size(); ++i)
4173                 ld.outputBlobs[i].copyTo(outputvec[i]);
4174         }
4175     }
4176 }
4177 
forward(OutputArrayOfArrays outputBlobs,const std::vector<String> & outBlobNames)4178 void Net::forward(OutputArrayOfArrays outputBlobs,
4179                   const std::vector<String>& outBlobNames)
4180 {
4181     CV_TRACE_FUNCTION();
4182 
4183     std::vector<LayerPin> pins;
4184     for (int i = 0; i < outBlobNames.size(); i++)
4185     {
4186         pins.push_back(impl->getPinByAlias(outBlobNames[i]));
4187     }
4188 
4189     impl->setUpNet(pins);
4190 
4191     LayerPin out = impl->getLatestLayerPin(pins);
4192 
4193     impl->forwardToLayer(impl->getLayerData(out.lid));
4194 
4195     std::vector<Mat> matvec;
4196     for (int i = 0; i < pins.size(); i++)
4197     {
4198         matvec.push_back(impl->getBlob(pins[i]));
4199     }
4200 
4201     std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj();
4202     outputvec = matvec;
4203 }
4204 
forward(std::vector<std::vector<Mat>> & outputBlobs,const std::vector<String> & outBlobNames)4205 void Net::forward(std::vector<std::vector<Mat> >& outputBlobs,
4206                      const std::vector<String>& outBlobNames)
4207 {
4208     CV_TRACE_FUNCTION();
4209 
4210     std::vector<LayerPin> pins;
4211     for (int i = 0; i < outBlobNames.size(); i++)
4212     {
4213         pins.push_back(impl->getPinByAlias(outBlobNames[i]));
4214     }
4215 
4216     impl->setUpNet(pins);
4217 
4218     LayerPin out = impl->getLatestLayerPin(pins);
4219 
4220     impl->forwardToLayer(impl->getLayerData(out.lid));
4221 
4222     outputBlobs.resize(outBlobNames.size());
4223     for (int i = 0; i < outBlobNames.size(); i++)
4224     {
4225         std::vector<LayerPin> lp = impl->getLayerOutPins(outBlobNames[i]);
4226         outputBlobs[i].resize(lp.size());
4227         for (int j = 0; j < lp.size(); j++)
4228         {
4229             outputBlobs[i][j] = impl->getBlob(lp[j]);
4230         }
4231     }
4232 }
4233 
setPreferableBackend(int backendId)4234 void Net::setPreferableBackend(int backendId)
4235 {
4236     CV_TRACE_FUNCTION();
4237     CV_TRACE_ARG(backendId);
4238 
4239 #ifdef HAVE_INF_ENGINE
4240     if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
4241         backendId = getInferenceEngineBackendTypeParam();
4242 #endif
4243 
4244     if( impl->preferableBackend != backendId )
4245     {
4246         impl->preferableBackend = backendId;
4247         impl->netWasAllocated = false;
4248         impl->clear();
4249     }
4250 }
4251 
setPreferableTarget(int targetId)4252 void Net::setPreferableTarget(int targetId)
4253 {
4254     CV_TRACE_FUNCTION();
4255     CV_TRACE_ARG(targetId);
4256 
4257     if( impl->preferableTarget != targetId )
4258     {
4259         impl->preferableTarget = targetId;
4260         if (IS_DNN_OPENCL_TARGET(targetId))
4261         {
4262 #ifndef HAVE_OPENCL
4263 #ifdef HAVE_INF_ENGINE
4264             if (impl->preferableBackend == DNN_BACKEND_OPENCV)
4265 #else
4266             if (impl->preferableBackend == DNN_BACKEND_DEFAULT ||
4267                 impl->preferableBackend == DNN_BACKEND_OPENCV)
4268 #endif  // HAVE_INF_ENGINE
4269                 impl->preferableTarget = DNN_TARGET_CPU;
4270 #else
4271             bool fp16 = ocl::Device::getDefault().isExtensionSupported("cl_khr_fp16");
4272             if (!fp16 && targetId == DNN_TARGET_OPENCL_FP16)
4273                 impl->preferableTarget = DNN_TARGET_OPENCL;
4274 #endif
4275         }
4276         impl->netWasAllocated = false;
4277         impl->clear();
4278     }
4279 }
4280 
setInputsNames(const std::vector<String> & inputBlobNames)4281 void Net::setInputsNames(const std::vector<String> &inputBlobNames)
4282 {
4283     CV_TRACE_FUNCTION();
4284 
4285     impl->netInputLayer->setNames(inputBlobNames);
4286 }
4287 
setInputShape(const String & inputName,const MatShape & shape)4288 void Net::setInputShape(const String &inputName, const MatShape& shape)
4289 {
4290     CV_TRACE_FUNCTION();
4291 
4292     impl->netInputLayer->setInputShape(inputName, shape);
4293 }
4294 
setInput(InputArray blob,const String & name,double scalefactor,const Scalar & mean)4295 void Net::setInput(InputArray blob, const String& name, double scalefactor, const Scalar& mean)
4296 {
4297     CV_TRACE_FUNCTION();
4298     CV_TRACE_ARG_VALUE(name, "name", name.c_str());
4299 
4300     LayerPin pin;
4301     pin.lid = 0;
4302     pin.oid = impl->resolvePinOutputName(impl->getLayerData(pin.lid), name);
4303 
4304     if (!pin.valid())
4305         CV_Error(Error::StsObjectNotFound, "Requested blob \"" + name + "\" not found");
4306 
4307     Mat blob_ = blob.getMat();  // can't use InputArray directly due MatExpr stuff
4308     MatShape blobShape = shape(blob_);
4309 
4310     if (pin.lid == 0)
4311     {
4312         CV_Assert(!impl->netInputLayer.empty());
4313         const DataLayer& netInputLayer = *impl->netInputLayer.get();
4314         if (!netInputLayer.shapes.empty())
4315         {
4316             CV_CheckLT(pin.oid, (int)netInputLayer.shapes.size(), "");
4317             const MatShape& inputShapeLimitation = netInputLayer.shapes[pin.oid];
4318             if (!inputShapeLimitation.empty())
4319             {
4320                 CV_CheckEQ(inputShapeLimitation.size(), blobShape.size(), "");
4321 #if 0  // TODO: DNNTestNetwork.MobileNet_SSD_Caffe_Different_Width_Height/0
4322                 const size_t dims = inputShapeLimitation.size();
4323                 for (size_t dim = 0; dim < dims; dim++)
4324                 {
4325                     if (dims >= 3 && dim == 0 && inputShapeLimitation[0] == 1)
4326                         continue;  // don't limit batch
4327                     CV_CheckEQ(inputShapeLimitation[dim], blobShape[dim], "");
4328                 }
4329 #endif
4330             }
4331         }
4332     }
4333 
4334     LayerData &ld = impl->layers[pin.lid];
4335     const int numInputs = std::max(pin.oid+1, (int)ld.requiredOutputs.size());
4336     ld.outputBlobs.resize(numInputs);
4337     ld.outputBlobsWrappers.resize(numInputs);
4338     impl->netInputLayer->inputsData.resize(numInputs);
4339     impl->netInputLayer->scaleFactors.resize(numInputs);
4340     impl->netInputLayer->means.resize(numInputs);
4341 
4342     MatShape prevShape = shape(impl->netInputLayer->inputsData[pin.oid]);
4343     bool oldShape = prevShape == blobShape;
4344 
4345     blob_.copyTo(impl->netInputLayer->inputsData[pin.oid]);
4346     if (!oldShape) {
4347         ld.outputBlobs[pin.oid] = impl->netInputLayer->inputsData[pin.oid];
4348         if (impl->hasDynamicShapes)
4349         {
4350             impl->updateLayersShapes();
4351         }
4352     }
4353 
4354     if (!ld.outputBlobsWrappers[pin.oid].empty())
4355     {
4356         ld.outputBlobsWrappers[pin.oid]->setHostDirty();
4357     }
4358     impl->netInputLayer->scaleFactors[pin.oid] = scalefactor;
4359     impl->netInputLayer->means[pin.oid] = mean;
4360     impl->netWasAllocated = impl->netWasAllocated && oldShape;
4361 }
4362 
getParam(LayerId layer,int numParam)4363 Mat Net::getParam(LayerId layer, int numParam)
4364 {
4365     LayerData &ld = impl->getLayerData(layer);
4366     std::vector<Mat> &layerBlobs = ld.getLayerInstance()->blobs;
4367     CV_Assert(numParam < (int)layerBlobs.size());
4368     return layerBlobs[numParam];
4369 }
4370 
setParam(LayerId layer,int numParam,const Mat & blob)4371 void Net::setParam(LayerId layer, int numParam, const Mat &blob)
4372 {
4373     LayerData &ld = impl->getLayerData(layer);
4374 
4375     std::vector<Mat> &layerBlobs = ld.getLayerInstance()->blobs;
4376     CV_Assert(numParam < (int)layerBlobs.size());
4377     //we don't make strong checks, use this function carefully
4378     layerBlobs[numParam] = blob;
4379 }
4380 
getLayerId(const String & layer)4381 int Net::getLayerId(const String &layer)
4382 {
4383     return impl->getLayerId(layer);
4384 }
4385 
4386 static
dumpLayerParameterSize(const string & name,const LayerParams & lp)4387 string dumpLayerParameterSize(const string& name, const LayerParams& lp)
4388 {
4389     std::ostringstream out(name, std::ios::ate);
4390     DictValue param = lp.get(name);
4391     switch (param.size())
4392     {
4393         case 1: out << " : "; break;
4394         case 2: out << " (HxW): "; break;
4395         case 3: out << " (DxHxW): "; break;
4396         default:
4397             CV_LOG_INFO(NULL, format("DNN/dumpLayerParameterSize(): Unsupported '%s' size = %d", name.c_str(), param.size()));
4398             out << ": ";
4399     }
4400     for (size_t i = 0; i < param.size(); i++)
4401     {
4402         if (i > 0)
4403             out << " x ";
4404         out << param.get<int>(i);
4405     }
4406     return out.str();
4407 }
4408 
dump()4409 String Net::dump()
4410 {
4411     CV_Assert(!empty());
4412 
4413     bool hasInput = !impl->netInputLayer->inputsData.empty();
4414 
4415     if (hasInput)
4416     {
4417         if (!impl->netWasAllocated)
4418             impl->setUpNet();
4419     }
4420 
4421     return impl->dump();
4422 }
4423 
dump()4424 string Net::Impl::dump()
4425 {
4426     bool hasInput = !netInputLayer->inputsData.empty();
4427 
4428     std::ostringstream out;
4429     const std::map<int, LayerData>& map = layers;
4430 
4431     Backend prefBackend = (Backend)preferableBackend;
4432     std::vector<std::vector<int> > skippedLayers;
4433     std::vector<int> skipId;
4434     std::vector<int> allLayers(map.size(), -1);
4435     int idPrev = -1;
4436     Ptr<BackendNode> prevNode;
4437     for (std::map<int, LayerData>::const_reverse_iterator rit = map.rbegin(); rit != map.rend(); ++rit)
4438     {
4439         std::map<int, Ptr<BackendNode> >::const_iterator itBackend = rit->second.backendNodes.find(prefBackend);
4440         if (prefBackend == DNN_BACKEND_OPENCV || itBackend == rit->second.backendNodes.end() ||
4441             itBackend->second.empty())
4442         {
4443                 if (rit->second.skip)
4444                     skipId.push_back(rit->first);
4445                 else if (!skipId.empty())
4446                 {
4447                     if (prefBackend == DNN_BACKEND_OPENCV || prevNode.empty())
4448                         skipId.push_back(rit->first);
4449                     else if (idPrev != -1)
4450                         skipId.push_back(idPrev);
4451 
4452                     std::sort(skipId.begin(), skipId.end());
4453                     for (int i = 0; i < skipId.size(); i++) {
4454                         allLayers[skipId[i]] = skippedLayers.size();
4455                     }
4456                     skippedLayers.push_back(skipId);
4457                     skipId.clear();
4458                 }
4459         }
4460         else
4461         {
4462             if (itBackend->second == prevNode)
4463                 skipId.push_back(idPrev);
4464             else if (!skipId.empty())
4465             {
4466                 skipId.push_back(idPrev);
4467                 std::sort(skipId.begin(), skipId.end());
4468                 for (int i = 0; i < skipId.size(); i++) {
4469                     allLayers[skipId[i]] = skippedLayers.size();
4470                 }
4471                 skippedLayers.push_back(skipId);
4472                 skipId.clear();
4473             }
4474             idPrev = rit->first;
4475             prevNode = itBackend->second;
4476         }
4477     }
4478     std::vector<string> colors = {"#ffffb3", "#fccde5", "#8dd3c7", "#bebada", "#80b1d3", "#fdb462", "#ff4848", "#b35151", "#b266ff"};
4479     string backend;
4480     switch (prefBackend)
4481     {
4482         case DNN_BACKEND_DEFAULT: backend = "DEFAULT/"; break;
4483         case DNN_BACKEND_HALIDE: backend = "HALIDE/"; break;
4484         case DNN_BACKEND_INFERENCE_ENGINE: // fallthru
4485         case DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019: backend = "DLIE/"; break;
4486         case DNN_BACKEND_INFERENCE_ENGINE_NGRAPH: backend = "NGRAPH/"; break;
4487         case DNN_BACKEND_OPENCV: backend = "OCV/"; break;
4488         case DNN_BACKEND_VKCOM: backend = "VULKAN/"; break;
4489         case DNN_BACKEND_CUDA: backend = "CUDA/"; break;
4490         // don't use default:
4491     }
4492     out << "digraph G {\n";
4493     // Add nodes
4494     for (std::map<int, LayerData>::const_iterator it = map.begin(); it != map.end(); ++it)
4495     {
4496         const LayerData& ld = it->second;
4497         string name = ld.params.name;
4498         std::vector<int> clusterIds(1, it->first);
4499         if (allLayers[it->first] == -1 && !name.empty())
4500         {
4501             out << "\t\"" << name << "\" [label=\"";
4502         }
4503         else if (name.empty() || it->first != skippedLayers[allLayers[it->first]][0])
4504         {
4505             continue;
4506         }
4507         else // first node in cluster : it->first == skippedLayers[allLayers[it->first]][0]
4508         {
4509             int cluster = allLayers[it->first];
4510             out << "\t\"" << "cluster_" << cluster << "\" [label=\"{";
4511             clusterIds = skippedLayers[allLayers[it->first]]; // vertices in current cluster
4512         }
4513         for (int i = 0; i < clusterIds.size(); i++)
4514         {
4515             CV_DbgAssert(map.find(clusterIds[i]) != map.end());
4516             const LayerParams& lp = map.find(clusterIds[i])->second.params;
4517             if (!lp.name.empty()) {
4518                 if (i > 0) {
4519                     out << " | ";
4520                 }
4521                 out << lp.name << "\\n" << lp.type << "\\n";  // align center
4522                 if (lp.has("kernel_size"))
4523                 {
4524                     string kernel = dumpLayerParameterSize("kernel_size", lp);
4525                     out << kernel;
4526                     out << "\\l";  // align left
4527                 } else if (lp.has("kernel_h") && lp.has("kernel_w")) {
4528                     DictValue h = lp.get("kernel_h");
4529                     DictValue w = lp.get("kernel_w");
4530                     out << "kernel (HxW): " << h << " x " << w;
4531                     out << "\\l";  // align left
4532                 }
4533                 if (lp.has("stride")) {
4534                     string stride = dumpLayerParameterSize("stride", lp);
4535                     out << stride;
4536                     out << "\\l";  // align left
4537                 } else if (lp.has("stride_h") && lp.has("stride_w")) {
4538                     DictValue h = lp.get("stride_h");
4539                     DictValue w = lp.get("stride_w");
4540                     out << "stride (HxW): " << h << " x " << w;
4541                     out << "\\l";  // align left
4542                 }
4543                 if (lp.has("dilation")) {
4544                     string dilation = dumpLayerParameterSize("dilation", lp);
4545                     out << dilation;
4546                     out << "\\l";  // align left
4547                 } else if (lp.has("dilation_h") && lp.has("dilation_w")) {
4548                     DictValue h = lp.get("dilation_h");
4549                     DictValue w = lp.get("dilation_w");
4550                     out << "dilation (HxW): " << h << " x " << w;
4551                     out << "\\l";  // align left
4552                 }
4553                 if (lp.has("pad")) {
4554                     DictValue pad = lp.get("pad");
4555                     out << "pad ";
4556                     switch (pad.size())
4557                     {
4558                         case 1: out << ": " << pad; break;
4559                         case 2:
4560                             out << "(HxW): (" << pad.get<int>(0) << " x " << pad.get<int>(1) << ")";
4561                             break;
4562                         case 4:
4563                             out << "(HxW): (" << pad.get<int>(0) << ", " << pad.get<int>(2)
4564                                 << ") x (" << pad.get<int>(1) << ", " << pad.get<int>(3) << ")";
4565                             break;
4566                         case 6:
4567                             out << "(DxHxW): (" << pad.get<int>(0) << ", " << pad.get<int>(3)
4568                                 << ") x (" << pad.get<int>(1) << ", " << pad.get<int>(4)
4569                                 << ") x (" << pad.get<int>(2) << ", " << pad.get<int>(5) << ")";
4570                             break;
4571                         default: CV_Error(Error::StsNotImplemented,  format("Unsupported pad size = %d", pad.size()));
4572                     }
4573                     out << "\\l";  // align left
4574                 } else if (lp.has("pad_l") && lp.has("pad_t") && lp.has("pad_r") && lp.has("pad_b")) {
4575                     DictValue l = lp.get("pad_l");
4576                     DictValue t = lp.get("pad_t");
4577                     DictValue r = lp.get("pad_r");
4578                     DictValue b = lp.get("pad_b");
4579                     out << "pad (HxW): (" << t << ", " << b << ") x (" << l << ", " << r << ")";
4580                     out << "\\l";  // align left
4581                 }
4582                 else if (lp.has("pooled_w") || lp.has("pooled_h")) {
4583                     DictValue h = lp.get("pooled_h");
4584                     DictValue w = lp.get("pooled_w");
4585                     out << "pad pooled (HxW): " << h << " x " << w;
4586                     out << "\\l";  // align left
4587                 }
4588                 if (lp.has("pool")) {
4589                     out << "pool: " << lp.get("pool");
4590                     out << "\\l";  // align left
4591                 }
4592                 if (lp.has("global_pooling")) {
4593                     out << "global_pooling: " << lp.get("global_pooling");
4594                     out << "\\l";  // align left
4595                 }
4596                 if (lp.has("group")) {
4597                     out << "group: " << lp.get("group");
4598                     out << "\\l";  // align left
4599                 }
4600             }
4601         }
4602         if (!ld.outputBlobs.empty())
4603         {
4604             out << "output: " << ld.outputBlobs[0].size;
4605             out << "\\l";  // align left
4606         }
4607 
4608         Ptr<BackendNode> layerBackend;
4609         std::map<int, Ptr<BackendNode> >::const_iterator ibn = ld.backendNodes.find(prefBackend);
4610         if (ibn != ld.backendNodes.end())
4611             layerBackend = ibn->second;
4612         out << (!layerBackend.empty() ? backend : "OCV/");
4613         int colorId = 0;
4614         const Target target = ld.layerInstance.empty()
4615                          ? DNN_TARGET_CPU
4616                                  : (Target)(ld.layerInstance->preferableTarget);  // TODO fix preferableTarget type
4617         switch (target)
4618         {
4619             case DNN_TARGET_CPU: out << "CPU"; colorId = layerBackend.empty() ? 0 : 5; break;
4620             case DNN_TARGET_OPENCL: out << "OCL"; colorId = 1; break;
4621             case DNN_TARGET_OPENCL_FP16: out << "OCL_FP16"; colorId = 2; break;
4622             case DNN_TARGET_MYRIAD: out << "MYRIAD"; colorId = 3; break;
4623             case DNN_TARGET_HDDL: out << "HDDL"; colorId = 8; break;
4624             case DNN_TARGET_VULKAN: out << "VULKAN"; colorId = 7; break;
4625             case DNN_TARGET_FPGA: out << "FPGA"; colorId = 4; break;
4626             case DNN_TARGET_CUDA: out << "CUDA"; colorId = 5; break;
4627             case DNN_TARGET_CUDA_FP16: out << "CUDA_FP16"; colorId = 6; break;
4628             // don't use default:
4629         }
4630         CV_Assert(colorId < colors.size());
4631         out << "\\n";  // align center
4632         out << ((clusterIds.size() == 1)? "\" " : " }\" ");
4633         out << "fillcolor=\"" << colors[colorId] << "\" ";
4634         out << "style=filled ";
4635         out << "shape=" << ((clusterIds.size() == 1)? "box" : "record") << "]\n";
4636     }
4637     out << '\n';
4638     // Add edges
4639     int inputsSize = hasInput ? netInputLayer->outNames.size() : 0;
4640     for (std::map<int, LayerData>::const_iterator it = map.begin(); it != map.end(); ++it)
4641     {
4642         const LayerData& ld = it->second;
4643         if (allLayers[it->first] == -1)  // node
4644         {
4645             for (int i = 0; i < ld.consumers.size(); i++)
4646             {
4647                 int outId = ld.consumers[i].lid;
4648                 if (it == map.begin() && inputsSize > 1)
4649                     out << "\t\"" << ld.name << "_" << i << "\"" << " -> ";
4650                 else
4651                     out << "\t\"" << ld.name << "\"" << " -> ";
4652                 if (allLayers[outId] == -1)  // node
4653                 {
4654                     CV_DbgAssert(map.find(outId) != map.end());
4655                     out << "\"" << map.find(outId)->second.name << "\"\n";
4656                 }
4657                 else  // cluster
4658                 {
4659                     out << "\"" << "cluster_" << allLayers[outId] << "\"\n";
4660                 }
4661             }
4662         }
4663         else if (it->first == skippedLayers[allLayers[it->first]].back())  // edges from last layer in cluster
4664         {
4665             for (int i = 0; i < ld.consumers.size(); i++)
4666             {
4667                 int outId = ld.consumers[i].lid;
4668                 if (allLayers[outId] == -1) // node
4669                 {
4670                     CV_DbgAssert(map.find(outId) != map.end());
4671                     out << "\t\"" << "cluster_" << allLayers[it->first] << "\"" << " -> ";
4672                     out << "\"" << map.find(outId)->second.name << "\"\n";
4673                 }
4674                 else if (allLayers[outId] != allLayers[it->first]) { // another cluster
4675                     out << "\t\"" << "cluster_" << allLayers[it->first] << "\"" << " -> ";
4676                     out << "\"" << "cluster_" << allLayers[outId] << "\"\n";
4677                 }
4678             }
4679         }
4680     }
4681     out << "}\n";
4682     return out.str();
4683 }
4684 
dumpToFile(const String & path)4685 void Net::dumpToFile(const String& path) {
4686     std::ofstream file(path.c_str());
4687     file << dump();
4688     file.close();
4689 }
4690 
getLayer(LayerId layerId)4691 Ptr<Layer> Net::getLayer(LayerId layerId)
4692 {
4693     LayerData &ld = impl->getLayerData(layerId);
4694     return ld.getLayerInstance();
4695 }
4696 
getLayerInputs(LayerId layerId)4697 std::vector<Ptr<Layer> > Net::getLayerInputs(LayerId layerId)
4698 {
4699     LayerData &ld = impl->getLayerData(layerId);
4700 
4701     std::vector<Ptr<Layer> > inputLayers;
4702     inputLayers.reserve(ld.inputBlobsId.size());
4703     for (int i = 0; i < ld.inputBlobsId.size(); ++i) {
4704         inputLayers.push_back(getLayer(ld.inputBlobsId[i].lid));
4705     }
4706     return inputLayers;
4707 }
4708 
getLayerNames() const4709 std::vector<String> Net::getLayerNames() const
4710 {
4711     CV_TRACE_FUNCTION();
4712 
4713     std::vector<String> res;
4714     res.reserve(impl->layers.size());
4715 
4716     Impl::MapIdToLayerData::iterator it;
4717     for (it = impl->layers.begin(); it != impl->layers.end(); it++)
4718     {
4719         if (it->second.id) //skip Data layer
4720             res.push_back(it->second.name);
4721     }
4722 
4723     return res;
4724 }
4725 
empty() const4726 bool Net::empty() const
4727 {
4728     return impl->layers.size() <= 1; //first layer is default Data layer
4729 }
4730 
getUnconnectedOutLayers() const4731 std::vector<int> Net::getUnconnectedOutLayers() const
4732 {
4733     std::vector<int> layersIds;
4734 
4735     Impl::MapIdToLayerData::iterator it;
4736     for (it = impl->layers.begin(); it != impl->layers.end(); it++)
4737     {
4738         int lid = it->first;
4739         LayerData &ld = it->second;
4740 
4741         if (ld.requiredOutputs.size() == 0)
4742             layersIds.push_back(lid);
4743     }
4744 
4745     return layersIds;
4746 }
4747 
getUnconnectedOutLayersNames() const4748 std::vector<String> Net::getUnconnectedOutLayersNames() const
4749 {
4750     std::vector<int> ids = getUnconnectedOutLayers();
4751     const size_t n = ids.size();
4752     std::vector<String> names(n);
4753     for (size_t i = 0; i < n; ++i)
4754     {
4755         names[i] = impl->layers[ids[i]].name;
4756     }
4757     return names;
4758 }
4759 
getLayersShapes(const ShapesVec & netInputShapes,std::vector<int> & layersIds,std::vector<ShapesVec> & inLayersShapes,std::vector<ShapesVec> & outLayersShapes) const4760 void Net::getLayersShapes(const ShapesVec& netInputShapes,
4761                           std::vector<int>& layersIds,
4762                           std::vector<ShapesVec>& inLayersShapes,
4763                           std::vector<ShapesVec>& outLayersShapes) const
4764 {
4765     layersIds.clear();
4766     inLayersShapes.clear();
4767     outLayersShapes.clear();
4768 
4769     Impl::LayersShapesMap inOutShapes;
4770     impl->getLayersShapes(netInputShapes, inOutShapes);
4771 
4772     for(Impl::LayersShapesMap::const_iterator it = inOutShapes.begin();
4773         it != inOutShapes.end(); it++)
4774     {
4775         layersIds.push_back(it->first);
4776         inLayersShapes.push_back(it->second.in);
4777         outLayersShapes.push_back(it->second.out);
4778     }
4779 }
4780 
getLayersShapes(const MatShape & netInputShape,std::vector<int> & layerIds,std::vector<ShapesVec> & inLayersShapes,std::vector<ShapesVec> & outLayersShapes) const4781 void Net::getLayersShapes(const MatShape& netInputShape,
4782                           std::vector<int>& layerIds,
4783                           std::vector<ShapesVec>& inLayersShapes,
4784                           std::vector<ShapesVec>& outLayersShapes) const
4785 {
4786     getLayersShapes(ShapesVec(1, netInputShape),
4787                     layerIds, inLayersShapes, outLayersShapes);
4788 }
4789 
getLayerShapes(const MatShape & netInputShape,const int layerId,ShapesVec & inLayerShapes,ShapesVec & outLayerShapes) const4790 void Net::getLayerShapes(const MatShape& netInputShape,
4791                          const int layerId,
4792                          ShapesVec& inLayerShapes,
4793                          ShapesVec& outLayerShapes) const
4794 {
4795     getLayerShapes(ShapesVec(1, netInputShape),
4796                    layerId, inLayerShapes, outLayerShapes);
4797 
4798 }
4799 
getLayerShapes(const ShapesVec & netInputShapes,const int layerId,ShapesVec & inLayerShapes,ShapesVec & outLayerShapes) const4800 void Net::getLayerShapes(const ShapesVec& netInputShapes,
4801                     const int layerId,
4802                     ShapesVec& inLayerShapes,
4803                     ShapesVec& outLayerShapes) const
4804 {
4805     LayerShapes shapes;
4806     impl->getLayerShapes(netInputShapes, layerId, shapes);
4807     inLayerShapes = shapes.in;
4808     outLayerShapes = shapes.out;
4809 }
4810 
getFLOPS(const std::vector<MatShape> & netInputShapes) const4811 int64 Net::getFLOPS(const std::vector<MatShape>& netInputShapes) const
4812 {
4813     CV_TRACE_FUNCTION();
4814 
4815     int64 flops = 0;
4816     std::vector<int> ids;
4817     std::vector<std::vector<MatShape> > inShapes, outShapes;
4818     getLayersShapes(netInputShapes, ids, inShapes, outShapes);
4819     CV_Assert(inShapes.size() == outShapes.size());
4820     CV_Assert(inShapes.size() == ids.size());
4821 
4822     for(int i = 0; i < ids.size(); i++)
4823     {
4824         flops += impl->layers[ids[i]].getLayerInstance()->getFLOPS(inShapes[i],
4825                                                                    outShapes[i]);
4826     }
4827 
4828     return flops;
4829 }
4830 
getFLOPS(const MatShape & netInputShape) const4831 int64 Net::getFLOPS(const MatShape& netInputShape) const
4832 {
4833     return getFLOPS(std::vector<MatShape>(1, netInputShape));
4834 }
4835 
getFLOPS(const int layerId,const std::vector<MatShape> & netInputShapes) const4836 int64 Net::getFLOPS(const int layerId,
4837               const std::vector<MatShape>& netInputShapes) const
4838 {
4839     Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerId);
4840     CV_Assert(layer != impl->layers.end());
4841 
4842     LayerShapes shapes;
4843     impl->getLayerShapes(netInputShapes, layerId, shapes);
4844 
4845     return layer->second.getLayerInstance()->getFLOPS(shapes.in, shapes.out);
4846 }
4847 
getFLOPS(const int layerId,const MatShape & netInputShape) const4848 int64 Net::getFLOPS(const int layerId,
4849               const MatShape& netInputShape) const
4850 {
4851     return getFLOPS(layerId, std::vector<MatShape>(1, netInputShape));
4852 }
4853 
getLayerTypes(std::vector<String> & layersTypes) const4854 void Net::getLayerTypes(std::vector<String>& layersTypes) const
4855 {
4856     layersTypes.clear();
4857 
4858     std::map<String, int> layers;
4859     for (Impl::MapIdToLayerData::iterator it = impl->layers.begin();
4860          it != impl->layers.end(); it++)
4861     {
4862         if (layers.find(it->second.type) == layers.end())
4863             layers[it->second.type] = 0;
4864         layers[it->second.type]++;
4865     }
4866 
4867     for (std::map<String, int>::iterator it = layers.begin();
4868          it != layers.end(); it++)
4869     {
4870         layersTypes.push_back(it->first);
4871     }
4872 }
4873 
getLayersCount(const String & layerType) const4874 int Net::getLayersCount(const String& layerType) const
4875 {
4876     int count = 0;
4877     for (Impl::MapIdToLayerData::iterator it = impl->layers.begin();
4878          it != impl->layers.end(); it++)
4879     {
4880         if (it->second.type == layerType)
4881             count++;
4882     }
4883     return count;
4884 }
4885 
getMemoryConsumption(const int layerId,const std::vector<MatShape> & netInputShapes,size_t & weights,size_t & blobs) const4886 void Net::getMemoryConsumption(const int layerId,
4887                                const std::vector<MatShape>& netInputShapes,
4888                                size_t& weights, size_t& blobs) const
4889 {
4890     CV_TRACE_FUNCTION();
4891 
4892     Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerId);
4893     CV_Assert(layer != impl->layers.end());
4894 
4895     weights = blobs = 0;
4896 
4897     for(int i = 0; i < layer->second.params.blobs.size(); i++)
4898     {
4899         const Mat& weightsBlob = layer->second.params.blobs[i];
4900         weights += weightsBlob.total()*weightsBlob.elemSize();
4901     }
4902 
4903     ShapesVec inLayerShapes, outLayerShapes;
4904     getLayerShapes(netInputShapes, layerId, inLayerShapes, outLayerShapes);
4905     for(int i = 0; i < outLayerShapes.size(); i++)
4906     {
4907         blobs += total(outLayerShapes[i]) * sizeof(float);
4908     }
4909 }
4910 
getMemoryConsumption(const std::vector<MatShape> & netInputShapes,size_t & weights,size_t & blobs) const4911 void Net::getMemoryConsumption(const std::vector<MatShape>& netInputShapes,
4912                                size_t& weights, size_t& blobs) const
4913 {
4914     CV_TRACE_FUNCTION();
4915 
4916     std::vector<int> layerIds;
4917     std::vector<size_t> w, b;
4918     getMemoryConsumption(netInputShapes, layerIds, w, b);
4919 
4920     weights = blobs = 0;
4921     for(int i = 0; i < layerIds.size(); i++)
4922     {
4923         weights += w[i];
4924         blobs += b[i];
4925     }
4926 }
4927 
getMemoryConsumption(const int layerId,const MatShape & netInputShape,size_t & weights,size_t & blobs) const4928 void Net::getMemoryConsumption(const int layerId,
4929                                const MatShape& netInputShape,
4930                                size_t& weights, size_t& blobs) const
4931 {
4932     getMemoryConsumption(layerId, std::vector<MatShape>(1, netInputShape),
4933                          weights, blobs);
4934 }
4935 
getMemoryConsumption(const MatShape & netInputShape,size_t & weights,size_t & blobs) const4936 void Net::getMemoryConsumption(const MatShape& netInputShape,
4937                                size_t& weights, size_t& blobs) const
4938 {
4939     getMemoryConsumption(std::vector<MatShape>(1, netInputShape),
4940                          weights, blobs);
4941 }
4942 
getMemoryConsumption(const std::vector<MatShape> & netInputShapes,std::vector<int> & layerIds,std::vector<size_t> & weights,std::vector<size_t> & blobs) const4943 void Net::getMemoryConsumption(const std::vector<MatShape>& netInputShapes,
4944                                   std::vector<int>& layerIds, std::vector<size_t>& weights,
4945                                   std::vector<size_t>& blobs) const
4946 {
4947     CV_TRACE_FUNCTION();
4948 
4949     layerIds.clear();
4950     weights.clear();
4951     blobs.clear();
4952 
4953     std::vector<std::vector<MatShape> > inLayerShapes, outLayerShapes;
4954 
4955     getLayersShapes(netInputShapes, layerIds, inLayerShapes, outLayerShapes);
4956 
4957     for(int i = 0; i < layerIds.size(); i++)
4958     {
4959         int w = 0, b = 0;
4960         Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerIds[i]);
4961         CV_Assert(layer != impl->layers.end());
4962 
4963         for(int j = 0; j < layer->second.params.blobs.size(); j++)
4964         {
4965             const Mat& weightsBlob = layer->second.params.blobs[j];
4966             w += weightsBlob.total()*weightsBlob.elemSize();
4967         }
4968 
4969         for(int j = 0; j < outLayerShapes[i].size(); j++)
4970         {
4971             b += total(outLayerShapes[i][j]) * sizeof(float);
4972         }
4973 
4974         weights.push_back(w);
4975         blobs.push_back(b);
4976     }
4977 }
4978 
getMemoryConsumption(const MatShape & netInputShape,std::vector<int> & layerIds,std::vector<size_t> & weights,std::vector<size_t> & blobs) const4979 void Net::getMemoryConsumption(const MatShape& netInputShape, std::vector<int>& layerIds,
4980                                std::vector<size_t>& weights, std::vector<size_t>& blobs) const
4981 {
4982     getMemoryConsumption(std::vector<MatShape>(1, netInputShape), layerIds,
4983                          weights, blobs);
4984 }
4985 
enableFusion(bool fusion)4986 void Net::enableFusion(bool fusion)
4987 {
4988     if( impl->fusion != fusion )
4989     {
4990         impl->fusion = fusion;
4991         impl->netWasAllocated = false;
4992         impl->clear();
4993     }
4994 }
4995 
setHalideScheduler(const String & scheduler)4996 void Net::setHalideScheduler(const String& scheduler)
4997 {
4998     CV_TRACE_FUNCTION();
4999     CV_TRACE_ARG_VALUE(scheduler, "scheduler", scheduler.c_str());
5000 
5001     impl->halideConfigFile = scheduler;
5002 }
5003 
getPerfProfile(std::vector<double> & timings)5004 int64 Net::getPerfProfile(std::vector<double>& timings)
5005 {
5006     timings = std::vector<double>(impl->layersTimings.begin() + 1, impl->layersTimings.end());
5007     int64 total = (int64)std::accumulate(timings.begin(), timings.end(), 0.0);
5008     return total;
5009 }
5010 
5011 //////////////////////////////////////////////////////////////////////////
5012 
Layer()5013 Layer::Layer() { preferableTarget = DNN_TARGET_CPU; }
5014 
Layer(const LayerParams & params)5015 Layer::Layer(const LayerParams &params)
5016     : blobs(params.blobs), name(params.name), type(params.type)
5017 {
5018     preferableTarget = DNN_TARGET_CPU;
5019 }
5020 
setParamsFrom(const LayerParams & params)5021 void Layer::setParamsFrom(const LayerParams &params)
5022 {
5023     blobs = params.blobs;
5024     name = params.name;
5025     type = params.type;
5026 }
5027 
inputNameToIndex(String)5028 int Layer::inputNameToIndex(String)
5029 {
5030     return -1;
5031 }
5032 
outputNameToIndex(const String &)5033 int Layer::outputNameToIndex(const String&)
5034 {
5035     return 0;
5036 }
5037 
supportBackend(int backendId)5038 bool Layer::supportBackend(int backendId)
5039 {
5040     return backendId == DNN_BACKEND_OPENCV;
5041 }
5042 
initCUDA(void *,const std::vector<Ptr<BackendWrapper>> &,const std::vector<Ptr<BackendWrapper>> &)5043 Ptr<BackendNode> Layer::initCUDA(
5044     void*,
5045     const std::vector<Ptr<BackendWrapper>>&,
5046     const std::vector<Ptr<BackendWrapper>>&)
5047 {
5048     CV_Error(Error::StsNotImplemented, "CUDA pipeline of " + type +
5049                                        " layers is not defined.");
5050     return Ptr<BackendNode>();
5051 }
5052 
initVkCom(const std::vector<Ptr<BackendWrapper>> &)5053 Ptr<BackendNode> Layer::initVkCom(const std::vector<Ptr<BackendWrapper> > &)
5054 {
5055     CV_Error(Error::StsNotImplemented, "VkCom pipeline of " + type +
5056                                        " layers is not defined.");
5057     return Ptr<BackendNode>();
5058 }
5059 
initHalide(const std::vector<Ptr<BackendWrapper>> &)5060 Ptr<BackendNode> Layer::initHalide(const std::vector<Ptr<BackendWrapper> > &)
5061 {
5062     CV_Error(Error::StsNotImplemented, "Halide pipeline of " + type +
5063                                        " layers is not defined.");
5064     return Ptr<BackendNode>();
5065 }
5066 
initInfEngine(const std::vector<Ptr<BackendWrapper>> &)5067 Ptr<BackendNode> Layer::initInfEngine(const std::vector<Ptr<BackendWrapper> > &)
5068 {
5069     CV_Error(Error::StsNotImplemented, "Inference Engine pipeline of " + type +
5070                                        " layers is not defined.");
5071     return Ptr<BackendNode>();
5072 }
5073 
initNgraph(const std::vector<Ptr<BackendWrapper>> & inputs,const std::vector<Ptr<BackendNode>> & nodes)5074 Ptr<BackendNode> Layer::initNgraph(const std::vector<Ptr<BackendWrapper> > & inputs, const std::vector<Ptr<BackendNode> >& nodes)
5075 {
5076     CV_Error(Error::StsNotImplemented, "Inference Engine pipeline of " + type +
5077                                        " layers is not defined.");
5078     return Ptr<BackendNode>();
5079 }
5080 
applyHalideScheduler(Ptr<BackendNode> & node,const std::vector<Mat * > & inputs,const std::vector<Mat> & outputs,int targetId) const5081 void Layer::applyHalideScheduler(Ptr<BackendNode>& node, const std::vector<Mat*> &inputs,
5082                                  const std::vector<Mat> &outputs, int targetId) const
5083 {
5084 #ifdef  HAVE_HALIDE
5085     CV_TRACE_FUNCTION();
5086 
5087     Halide::Var x("x"), y("y"), c("c"), n("n"), co("co"), ci("ci"),
5088                 xo("xo"), xi("xi"), yo("yo"), yi("yi"), tile("tile");
5089     Halide::Func& top = node.dynamicCast<HalideBackendNode>()->funcs.back();
5090 
5091     int outW, outH, outC, outN;
5092     getCanonicalSize(outputs[0].size, &outW, &outH, &outC, &outN);
5093 
5094     if (targetId == DNN_TARGET_CPU)
5095     {
5096         if (outW == 1 && outH == 1)
5097         {
5098             if (outC + outN == 1)
5099                 return;
5100 
5101             if (outC > 8)
5102               top.split(c, co, ci, 8)
5103                  .fuse(x, y, tile).fuse(co, tile, tile).fuse(n, tile, tile)
5104                  .parallel(tile)
5105                  .vectorize(ci, 8);
5106             else
5107               top.fuse(x, y, tile).fuse(c, tile, tile).fuse(n, tile, tile)
5108                  .parallel(tile);
5109         }
5110         else
5111         {
5112             if (outH > 2)
5113             {
5114                 top.reorder(x, c, y)
5115                    .split(y, yo, yi, 2)
5116                    .fuse(yo, n, tile)
5117                    .parallel(tile)
5118                    .unroll(yi)
5119                    .vectorize(x, outW >= 16 ? 16 : outW);
5120             }
5121         }
5122     }
5123     else if (targetId == DNN_TARGET_OPENCL)
5124     {
5125         if (outW == 1 && outH == 1)
5126         {
5127             int c_split = outC > 8 ? (outC > 16 ? 8 : 4) : outC;
5128             top.split(c, co, ci, c_split)
5129                .fuse(x, y, tile).fuse(co, tile, tile).fuse(n, tile, tile)
5130                .gpu_blocks(tile)
5131                .gpu_threads(ci);
5132         }
5133         else
5134         {
5135             int x_split = outW > 8 ? (outW >= 32 ? 16 : 8) : outW;
5136             int y_split = outH > 8 ? (outH >= 32 ? 16 : 8) : outH;
5137             // Supported vectorization widths: 2, 3, 4, 8, 16
5138             int c_split = outC > 8 ? (outC > 16 ? 8 : 4) : std::min(4, outC);
5139             top.split(x, xo, xi, x_split).split(y, yo, yi, y_split)
5140                .split(c, co, ci, c_split)
5141                .gpu_blocks(xo, yo, co)
5142                .gpu_threads(xi, yi)
5143                .reorder(xi, yi, ci, xo, yo, co)
5144                .vectorize(ci);
5145         }
5146     }
5147     else
5148         CV_Error(Error::StsNotImplemented, "Unknown target identifier");
5149 #endif  // HAVE_HALIDE
5150 }
5151 
tryAttach(const Ptr<BackendNode> & node)5152 Ptr<BackendNode> Layer::tryAttach(const Ptr<BackendNode>& node)
5153 {
5154     return Ptr<BackendNode>();
5155 }
5156 
setActivation(const Ptr<ActivationLayer> &)5157 bool Layer::setActivation(const Ptr<ActivationLayer>&) { return false; }
tryFuse(Ptr<Layer> &)5158 bool Layer::tryFuse(Ptr<Layer>&) { return false; }
getScaleShift(Mat & scale,Mat & shift) const5159 void Layer::getScaleShift(Mat& scale, Mat& shift) const
5160 {
5161     scale = Mat();
5162     shift = Mat();
5163 }
5164 
unsetAttached()5165 void Layer::unsetAttached()
5166 {
5167     setActivation(Ptr<ActivationLayer>());
5168 }
5169 
5170 template <typename T>
vecToPVec(const std::vector<T> & v,std::vector<T * > & pv)5171 static void vecToPVec(const std::vector<T> &v, std::vector<T*> &pv)
5172 {
5173     pv.resize(v.size());
5174     for (size_t i = 0; i < v.size(); i++)
5175         pv[i] = const_cast<T*>(&v[i]);
5176 }
5177 
finalize(const std::vector<Mat> & inputs,std::vector<Mat> & outputs)5178 void Layer::finalize(const std::vector<Mat> &inputs, std::vector<Mat> &outputs)
5179 {
5180     CV_TRACE_FUNCTION();
5181     this->finalize((InputArrayOfArrays)inputs, (OutputArrayOfArrays)outputs);
5182 }
5183 
finalize(const std::vector<Mat * > & input,std::vector<Mat> & output)5184 void Layer::finalize(const std::vector<Mat*> &input, std::vector<Mat> &output)
5185 {
5186     CV_UNUSED(input);CV_UNUSED(output);
5187 }
5188 
finalize(InputArrayOfArrays inputs_arr,OutputArrayOfArrays outputs_arr)5189 void Layer::finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr)
5190 {
5191     CV_TRACE_FUNCTION();
5192     std::vector<Mat> inputs, outputs;
5193     inputs_arr.getMatVector(inputs);
5194     outputs_arr.getMatVector(outputs);
5195 
5196     std::vector<Mat*> inputsp;
5197     vecToPVec(inputs, inputsp);
5198     this->finalize(inputsp, outputs);
5199 }
5200 
finalize(const std::vector<Mat> & inputs)5201 std::vector<Mat> Layer::finalize(const std::vector<Mat> &inputs)
5202 {
5203     CV_TRACE_FUNCTION();
5204 
5205     std::vector<Mat> outputs;
5206     this->finalize(inputs, outputs);
5207     return outputs;
5208 }
5209 
forward(std::vector<Mat * > & input,std::vector<Mat> & output,std::vector<Mat> & internals)5210 void Layer::forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &internals)
5211 {
5212     // We kept this method for compatibility. DNN calls it now only to support users' implementations.
5213 }
5214 
forward(InputArrayOfArrays inputs_arr,OutputArrayOfArrays outputs_arr,OutputArrayOfArrays internals_arr)5215 void Layer::forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
5216 {
5217     CV_TRACE_FUNCTION();
5218     CV_TRACE_ARG_VALUE(name, "name", name.c_str());
5219 
5220     Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
5221 }
5222 
forward_fallback(InputArrayOfArrays inputs_arr,OutputArrayOfArrays outputs_arr,OutputArrayOfArrays internals_arr)5223 void Layer::forward_fallback(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
5224 {
5225     CV_TRACE_FUNCTION();
5226     CV_TRACE_ARG_VALUE(name, "name", name.c_str());
5227 
5228     if (preferableTarget == DNN_TARGET_OPENCL_FP16 && inputs_arr.depth() == CV_16S)
5229     {
5230         std::vector<UMat> inputs;
5231         std::vector<UMat> outputs;
5232         std::vector<UMat> internals;
5233 
5234         std::vector<UMat> orig_inputs;
5235         std::vector<UMat> orig_outputs;
5236         std::vector<UMat> orig_internals;
5237 
5238         inputs_arr.getUMatVector(orig_inputs);
5239         outputs_arr.getUMatVector(orig_outputs);
5240         internals_arr.getUMatVector(orig_internals);
5241 
5242         inputs.resize(orig_inputs.size());
5243         for (size_t i = 0; i < orig_inputs.size(); i++)
5244             convertFp16(orig_inputs[i], inputs[i]);
5245 
5246         outputs.resize(orig_outputs.size());
5247         for (size_t i = 0; i < orig_outputs.size(); i++)
5248             outputs[i].create(shape(orig_outputs[i]), CV_32F);
5249 
5250         internals.resize(orig_internals.size());
5251         for (size_t i = 0; i < orig_internals.size(); i++)
5252             internals[i].create(shape(orig_internals[i]), CV_32F);
5253 
5254         forward(inputs, outputs, internals);
5255 
5256         for (size_t i = 0; i < outputs.size(); i++)
5257             convertFp16(outputs[i], orig_outputs[i]);
5258 
5259         // sync results back
5260         outputs_arr.assign(orig_outputs);
5261         internals_arr.assign(orig_internals);
5262         return;
5263     }
5264     std::vector<Mat> inpvec;
5265     std::vector<Mat> outputs;
5266     std::vector<Mat> internals;
5267 
5268     inputs_arr.getMatVector(inpvec);
5269     outputs_arr.getMatVector(outputs);
5270     internals_arr.getMatVector(internals);
5271 
5272     std::vector<Mat*> inputs(inpvec.size());
5273     for (int i = 0; i < inpvec.size(); i++)
5274         inputs[i] = &inpvec[i];
5275 
5276     this->forward(inputs, outputs, internals);
5277 
5278     // sync results back
5279     outputs_arr.assign(outputs);
5280     internals_arr.assign(internals);
5281 }
5282 
run(const std::vector<Mat> & inputs,std::vector<Mat> & outputs,std::vector<Mat> & internals)5283 void Layer::run(const std::vector<Mat> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
5284 {
5285     CV_TRACE_FUNCTION();
5286 
5287     this->finalize(inputs, outputs);
5288     this->forward(inputs, outputs, internals);
5289 }
5290 
~Layer()5291 Layer::~Layer() {}
5292 
getMemoryShapes(const std::vector<MatShape> & inputs,const int requiredOutputs,std::vector<MatShape> & outputs,std::vector<MatShape> & internals) const5293 bool Layer::getMemoryShapes(const std::vector<MatShape> &inputs,
5294                             const int requiredOutputs,
5295                             std::vector<MatShape> &outputs,
5296                             std::vector<MatShape> &internals) const
5297 {
5298     CV_Assert(inputs.size());
5299     outputs.assign(std::max(requiredOutputs, (int)inputs.size()), inputs[0]);
5300     return false;
5301 }
5302 
updateMemoryShapes(const std::vector<MatShape> & inputs)5303 bool Layer::updateMemoryShapes(const std::vector<MatShape> &inputs)
5304 {
5305     return true;
5306 }
5307 //////////////////////////////////////////////////////////////////////////
5308 
getLayerFactoryMutex()5309 static Mutex& getLayerFactoryMutex()
5310 {
5311     static Mutex* volatile instance = NULL;
5312     if (instance == NULL)
5313     {
5314         cv::AutoLock lock(getInitializationMutex());
5315         if (instance == NULL)
5316             instance = new Mutex();
5317     }
5318     return *instance;
5319 }
5320 
getLayerFactoryImpl_()5321 static LayerFactory_Impl& getLayerFactoryImpl_()
5322 {
5323     static LayerFactory_Impl impl;
5324     return impl;
5325 }
5326 
getLayerFactoryImpl()5327 LayerFactory_Impl& getLayerFactoryImpl()
5328 {
5329     static LayerFactory_Impl* volatile instance = NULL;
5330     if (instance == NULL)
5331     {
5332         cv::AutoLock lock(getLayerFactoryMutex());
5333         if (instance == NULL)
5334         {
5335             instance = &getLayerFactoryImpl_();
5336             initializeLayerFactory();
5337         }
5338     }
5339     return *instance;
5340 }
5341 
registerLayer(const String & type,Constructor constructor)5342 void LayerFactory::registerLayer(const String &type, Constructor constructor)
5343 {
5344     CV_TRACE_FUNCTION();
5345     CV_TRACE_ARG_VALUE(type, "type", type.c_str());
5346 
5347     cv::AutoLock lock(getLayerFactoryMutex());
5348     LayerFactory_Impl::iterator it = getLayerFactoryImpl().find(type);
5349 
5350     if (it != getLayerFactoryImpl().end())
5351     {
5352         if (it->second.back() == constructor)
5353             CV_Error(cv::Error::StsBadArg, "Layer \"" + type + "\" already was registered");
5354         it->second.push_back(constructor);
5355     }
5356     getLayerFactoryImpl().insert(std::make_pair(type, std::vector<Constructor>(1, constructor)));
5357 }
5358 
unregisterLayer(const String & type)5359 void LayerFactory::unregisterLayer(const String &type)
5360 {
5361     CV_TRACE_FUNCTION();
5362     CV_TRACE_ARG_VALUE(type, "type", type.c_str());
5363 
5364     cv::AutoLock lock(getLayerFactoryMutex());
5365 
5366     LayerFactory_Impl::iterator it = getLayerFactoryImpl().find(type);
5367     if (it != getLayerFactoryImpl().end())
5368     {
5369         if (it->second.size() > 1)
5370             it->second.pop_back();
5371         else
5372             getLayerFactoryImpl().erase(it);
5373     }
5374 }
5375 
createLayerInstance(const String & type,LayerParams & params)5376 Ptr<Layer> LayerFactory::createLayerInstance(const String &type, LayerParams& params)
5377 {
5378     CV_TRACE_FUNCTION();
5379     CV_TRACE_ARG_VALUE(type, "type", type.c_str());
5380 
5381     cv::AutoLock lock(getLayerFactoryMutex());
5382     LayerFactory_Impl::const_iterator it = getLayerFactoryImpl().find(type);
5383 
5384     if (it != getLayerFactoryImpl().end())
5385     {
5386         CV_Assert(!it->second.empty());
5387         return it->second.back()(params);
5388     }
5389     else
5390     {
5391         return Ptr<Layer>(); //NULL
5392     }
5393 }
5394 
BackendNode(int backendId)5395 BackendNode::BackendNode(int backendId) : backendId(backendId) {}
5396 
~BackendNode()5397 BackendNode::~BackendNode() {};
5398 
BackendWrapper(int backendId,int targetId)5399 BackendWrapper::BackendWrapper(int backendId, int targetId)
5400     : backendId(backendId), targetId(targetId) {}
5401 
BackendWrapper(int targetId,const cv::Mat & m)5402 BackendWrapper::BackendWrapper(int targetId, const cv::Mat& m)
5403 {
5404     CV_Error(Error::StsNotImplemented,
5405              "Constructor of backend wrapper must be implemented");
5406 }
5407 
BackendWrapper(const Ptr<BackendWrapper> & base,const MatShape & shape)5408 BackendWrapper::BackendWrapper(const Ptr<BackendWrapper>& base, const MatShape& shape)
5409 {
5410     CV_Error(Error::StsNotImplemented,
5411              "Constructor of backend wrapper must be implemented");
5412 }
5413 
~BackendWrapper()5414 BackendWrapper::~BackendWrapper() {}
5415 
readNet(const String & _model,const String & _config,const String & _framework)5416 Net readNet(const String& _model, const String& _config, const String& _framework)
5417 {
5418     String framework = toLowerCase(_framework);
5419     String model = _model;
5420     String config = _config;
5421     const std::string modelExt = model.substr(model.rfind('.') + 1);
5422     const std::string configExt = config.substr(config.rfind('.') + 1);
5423     if (framework == "caffe" || modelExt == "caffemodel" || configExt == "caffemodel" ||
5424                                 modelExt == "prototxt" || configExt == "prototxt")
5425     {
5426         if (modelExt == "prototxt" || configExt == "caffemodel")
5427             std::swap(model, config);
5428         return readNetFromCaffe(config, model);
5429     }
5430     if (framework == "tensorflow" || modelExt == "pb" || configExt == "pb" ||
5431                                      modelExt == "pbtxt" || configExt == "pbtxt")
5432     {
5433         if (modelExt == "pbtxt" || configExt == "pb")
5434             std::swap(model, config);
5435         return readNetFromTensorflow(model, config);
5436     }
5437     if (framework == "torch" || modelExt == "t7" || modelExt == "net" ||
5438                                 configExt == "t7" || configExt == "net")
5439     {
5440         return readNetFromTorch(model.empty() ? config : model);
5441     }
5442     if (framework == "darknet" || modelExt == "weights" || configExt == "weights" ||
5443                                   modelExt == "cfg" || configExt == "cfg")
5444     {
5445         if (modelExt == "cfg" || configExt == "weights")
5446             std::swap(model, config);
5447         return readNetFromDarknet(config, model);
5448     }
5449     if (framework == "dldt" || modelExt == "bin" || configExt == "bin" ||
5450                                modelExt == "xml" || configExt == "xml")
5451     {
5452         if (modelExt == "xml" || configExt == "bin")
5453             std::swap(model, config);
5454         return readNetFromModelOptimizer(config, model);
5455     }
5456     if (framework == "onnx" || modelExt == "onnx")
5457     {
5458         return readNetFromONNX(model);
5459     }
5460     CV_Error(Error::StsError, "Cannot determine an origin framework of files: " +
5461                                       model + (config.empty() ? "" : ", " + config));
5462 }
5463 
readNet(const String & _framework,const std::vector<uchar> & bufferModel,const std::vector<uchar> & bufferConfig)5464 Net readNet(const String& _framework, const std::vector<uchar>& bufferModel,
5465             const std::vector<uchar>& bufferConfig)
5466 {
5467     String framework = toLowerCase(_framework);
5468     if (framework == "caffe")
5469         return readNetFromCaffe(bufferConfig, bufferModel);
5470     else if (framework == "tensorflow")
5471         return readNetFromTensorflow(bufferModel, bufferConfig);
5472     else if (framework == "darknet")
5473         return readNetFromDarknet(bufferConfig, bufferModel);
5474     else if (framework == "torch")
5475         CV_Error(Error::StsNotImplemented, "Reading Torch models from buffers");
5476     else if (framework == "dldt")
5477         return readNetFromModelOptimizer(bufferConfig, bufferModel);
5478     CV_Error(Error::StsError, "Cannot determine an origin framework with a name " + framework);
5479 }
5480 
readNetFromModelOptimizer(const String & xml,const String & bin)5481 Net readNetFromModelOptimizer(const String &xml, const String &bin)
5482 {
5483     return Net::readFromModelOptimizer(xml, bin);
5484 }
5485 
readNetFromModelOptimizer(const std::vector<uchar> & bufferCfg,const std::vector<uchar> & bufferModel)5486 Net readNetFromModelOptimizer(const std::vector<uchar>& bufferCfg, const std::vector<uchar>& bufferModel)
5487 {
5488     return Net::readFromModelOptimizer(bufferCfg, bufferModel);
5489 }
5490 
readNetFromModelOptimizer(const uchar * bufferModelConfigPtr,size_t bufferModelConfigSize,const uchar * bufferWeightsPtr,size_t bufferWeightsSize)5491 Net readNetFromModelOptimizer(
5492         const uchar* bufferModelConfigPtr, size_t bufferModelConfigSize,
5493         const uchar* bufferWeightsPtr, size_t bufferWeightsSize
5494 )
5495 {
5496     return Net::readFromModelOptimizer(
5497         bufferModelConfigPtr, bufferModelConfigSize,
5498         bufferWeightsPtr, bufferWeightsSize
5499     );
5500 }
5501 
5502 CV__DNN_INLINE_NS_END
5503 }} // namespace
5504