1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
8 //
9 //
10 // License Agreement
11 // For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14 // Third party copyrights are property of their respective owners.
15 //
16 // Redistribution and use in source and binary forms, with or without modification,
17 // are permitted provided that the following conditions are met:
18 //
19 // * Redistribution's of source code must retain the above copyright notice,
20 // this list of conditions and the following disclaimer.
21 //
22 // * Redistribution's in binary form must reproduce the above copyright notice,
23 // this list of conditions and the following disclaimer in the documentation
24 // and/or other materials provided with the distribution.
25 //
26 // * The name of the copyright holders may not be used to endorse or promote products
27 // derived from this software without specific prior written permission.
28 //
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the Intel Corporation or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
39 //
40 //M*/
41
42 #include "precomp.hpp"
43 #include "op_halide.hpp"
44 #include "op_inf_engine.hpp"
45 #include "ie_ngraph.hpp"
46 #include "op_vkcom.hpp"
47 #include "op_cuda.hpp"
48
49 #ifdef HAVE_CUDA
50 #include "cuda4dnn/init.hpp"
51 #include "cuda4dnn/primitives/eltwise.hpp" // required by fuseLayers
52 #endif
53
54 #include "halide_scheduler.hpp"
55
56 #include <set>
57 #include <algorithm>
58 #include <iostream>
59 #include <sstream>
60 #include <fstream>
61 #include <iterator>
62 #include <numeric>
63 #include <memory>
64 #include <opencv2/dnn/shape_utils.hpp>
65 #include <opencv2/imgproc.hpp>
66 #include <opencv2/dnn/layer_reg.private.hpp>
67
68 #include <opencv2/core/utils/configuration.private.hpp>
69 #include <opencv2/core/utils/logger.hpp>
70
71 namespace cv {
72 namespace dnn {
73 CV__DNN_INLINE_NS_BEGIN
74
75 static size_t DNN_NETWORK_DUMP = utils::getConfigurationParameterSizeT("OPENCV_DNN_NETWORK_DUMP", 0);
76
77 // this option is useful to run valgrind memory errors detection
78 static bool DNN_DISABLE_MEMORY_OPTIMIZATIONS = utils::getConfigurationParameterBool("OPENCV_DNN_DISABLE_MEMORY_OPTIMIZATIONS", false);
79
80 #ifdef HAVE_OPENCL
81 static bool DNN_OPENCL_ALLOW_ALL_DEVICES = utils::getConfigurationParameterBool("OPENCV_DNN_OPENCL_ALLOW_ALL_DEVICES", false);
82 #endif
83
84 static int PARAM_DNN_BACKEND_DEFAULT = (int)utils::getConfigurationParameterSizeT("OPENCV_DNN_BACKEND_DEFAULT",
85 #ifdef HAVE_INF_ENGINE
86 (size_t)DNN_BACKEND_INFERENCE_ENGINE
87 #else
88 (size_t)DNN_BACKEND_OPENCV
89 #endif
90 );
91
92 // Additional checks (slowdowns execution!)
93 static bool DNN_CHECK_NAN_INF = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF", false);
94 static bool DNN_CHECK_NAN_INF_DUMP = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_DUMP", false);
95 static bool DNN_CHECK_NAN_INF_RAISE_ERROR = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_RAISE_ERROR", false);
96
97 bool DNN_DIAGNOSTICS_RUN = false;
98
enableModelDiagnostics(bool isDiagnosticsMode)99 void enableModelDiagnostics(bool isDiagnosticsMode)
100 {
101 DNN_DIAGNOSTICS_RUN = isDiagnosticsMode;
102 }
103
104 using std::vector;
105 using std::map;
106 using std::make_pair;
107 using std::set;
108 using std::string;
109
110 //==================================================================================================
111
112 class BackendRegistry
113 {
114 public:
115 typedef std::vector< std::pair<Backend, Target> > BackendsList;
getBackends() const116 const BackendsList & getBackends() const { return backends; }
getRegistry()117 static BackendRegistry & getRegistry()
118 {
119 static BackendRegistry impl;
120 return impl;
121 }
122
123 #ifdef HAVE_INF_ENGINE
checkIETarget(Target target)124 static inline bool checkIETarget(Target target)
125 {
126 #if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R3)
127 // Lightweight detection
128 const std::vector<std::string> devices = getCore("").GetAvailableDevices();
129 for (std::vector<std::string>::const_iterator i = devices.begin(); i != devices.end(); ++i)
130 {
131 if (std::string::npos != i->find("MYRIAD") && target == DNN_TARGET_MYRIAD)
132 return true;
133 if (std::string::npos != i->find("HDDL") && target == DNN_TARGET_HDDL)
134 return true;
135 else if (std::string::npos != i->find("FPGA") && target == DNN_TARGET_FPGA)
136 return true;
137 else if (std::string::npos != i->find("CPU") && target == DNN_TARGET_CPU)
138 return true;
139 else if (std::string::npos != i->find("GPU") && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
140 return true;
141 }
142 return false;
143 #else
144 cv::dnn::Net net;
145 cv::dnn::LayerParams lp;
146 lp.set("kernel_size", 1);
147 lp.set("num_output", 1);
148 lp.set("bias_term", false);
149 lp.type = "Convolution";
150 lp.name = "testLayer";
151 lp.blobs.push_back(Mat({1, 2, 1, 1}, CV_32F, Scalar(1)));
152 net.addLayerToPrev(lp.name, lp.type, lp);
153 net.setPreferableBackend(cv::dnn::DNN_BACKEND_INFERENCE_ENGINE);
154 net.setPreferableTarget(target);
155 static int inpDims[] = {1, 2, 3, 4};
156 net.setInput(cv::Mat(4, &inpDims[0], CV_32FC1, cv::Scalar(0)));
157 try
158 {
159 net.forward();
160 }
161 catch(const std::exception& e)
162 {
163 CV_LOG_INFO(NULL, "checkIETarget(" << (int)target << ") has failed with message: " << e.what());
164 return false;
165 }
166 return true;
167 #endif
168 }
169 #endif
170
171 private:
BackendRegistry()172 BackendRegistry()
173 {
174 #ifdef HAVE_HALIDE
175 backends.push_back(std::make_pair(DNN_BACKEND_HALIDE, DNN_TARGET_CPU));
176 # ifdef HAVE_OPENCL
177 if (cv::ocl::useOpenCL())
178 backends.push_back(std::make_pair(DNN_BACKEND_HALIDE, DNN_TARGET_OPENCL));
179 # endif
180 #endif // HAVE_HALIDE
181
182 #ifdef HAVE_INF_ENGINE
183 if (checkIETarget(DNN_TARGET_CPU)) {
184 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
185 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, DNN_TARGET_CPU));
186 #endif
187 #ifdef HAVE_DNN_NGRAPH
188 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_CPU));
189 #endif
190 }
191 if (checkIETarget(DNN_TARGET_MYRIAD)) {
192 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
193 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, DNN_TARGET_MYRIAD));
194 #endif
195 #ifdef HAVE_DNN_NGRAPH
196 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_MYRIAD));
197 #endif
198 }
199 if (checkIETarget(DNN_TARGET_HDDL)) {
200 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
201 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, DNN_TARGET_HDDL));
202 #endif
203 #ifdef HAVE_DNN_NGRAPH
204 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_HDDL));
205 #endif
206 }
207 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
208 if (checkIETarget(DNN_TARGET_FPGA))
209 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, DNN_TARGET_FPGA));
210 #endif
211 #ifdef HAVE_OPENCL
212 if (cv::ocl::useOpenCL() && ocl::Device::getDefault().isIntel())
213 {
214 if (checkIETarget(DNN_TARGET_OPENCL)) {
215 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
216 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, DNN_TARGET_OPENCL));
217 #endif
218 #ifdef HAVE_DNN_NGRAPH
219 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_OPENCL));
220 #endif
221 }
222 if (checkIETarget(DNN_TARGET_OPENCL_FP16)) {
223 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
224 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, DNN_TARGET_OPENCL_FP16));
225 #endif
226 #ifdef HAVE_DNN_NGRAPH
227 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_OPENCL_FP16));
228 #endif
229 }
230 }
231 #endif
232 #endif // HAVE_INF_ENGINE
233
234 #ifdef HAVE_OPENCL
235 if (cv::ocl::useOpenCL())
236 {
237 backends.push_back(std::make_pair(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL));
238 backends.push_back(std::make_pair(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL_FP16));
239 }
240 #endif
241
242 backends.push_back(std::make_pair(DNN_BACKEND_OPENCV, DNN_TARGET_CPU));
243
244 #ifdef HAVE_VULKAN
245 if (haveVulkan())
246 backends.push_back(std::make_pair(DNN_BACKEND_VKCOM, DNN_TARGET_VULKAN));
247 #endif
248
249 #ifdef HAVE_CUDA
250 if (haveCUDA())
251 {
252 backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA));
253 backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA_FP16));
254 }
255 #endif
256 }
257
258 BackendsList backends;
259 };
260
261
getAvailableBackends()262 std::vector< std::pair<Backend, Target> > getAvailableBackends()
263 {
264 return BackendRegistry::getRegistry().getBackends();
265 }
266
getAvailableTargets(Backend be)267 std::vector<Target> getAvailableTargets(Backend be)
268 {
269 if (be == DNN_BACKEND_DEFAULT)
270 be = (Backend)PARAM_DNN_BACKEND_DEFAULT;
271 #ifdef HAVE_INF_ENGINE
272 if (be == DNN_BACKEND_INFERENCE_ENGINE)
273 be = getInferenceEngineBackendTypeParam();
274 #endif
275
276 std::vector<Target> result;
277 const BackendRegistry::BackendsList all_backends = getAvailableBackends();
278 for(BackendRegistry::BackendsList::const_iterator i = all_backends.begin(); i != all_backends.end(); ++i )
279 {
280 if (i->first == be)
281 result.push_back(i->second);
282 }
283 return result;
284 }
285
286 //==================================================================================================
287
288 namespace
289 {
290 typedef std::vector<MatShape> ShapesVec;
291
292 struct LayerShapes
293 {
294 ShapesVec in, out, internal;
295 // No guarantees that layer which support in-place computations
296 // will be computed in-place (input.data_ptr == output.data_ptr).
297 // If layer said that it could work in-place and layers after it
298 // no longer use input blob, we'll set output = input.
299 bool supportInPlace;
LayerShapescv::dnn::__anon140d32020111::LayerShapes300 LayerShapes() {supportInPlace = false;}
301 };
302 }
303
blobFromImage(InputArray image,double scalefactor,const Size & size,const Scalar & mean,bool swapRB,bool crop,int ddepth)304 Mat blobFromImage(InputArray image, double scalefactor, const Size& size,
305 const Scalar& mean, bool swapRB, bool crop, int ddepth)
306 {
307 CV_TRACE_FUNCTION();
308 Mat blob;
309 blobFromImage(image, blob, scalefactor, size, mean, swapRB, crop, ddepth);
310 return blob;
311 }
312
blobFromImage(InputArray image,OutputArray blob,double scalefactor,const Size & size,const Scalar & mean,bool swapRB,bool crop,int ddepth)313 void blobFromImage(InputArray image, OutputArray blob, double scalefactor,
314 const Size& size, const Scalar& mean, bool swapRB, bool crop, int ddepth)
315 {
316 CV_TRACE_FUNCTION();
317 std::vector<Mat> images(1, image.getMat());
318 blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop, ddepth);
319 }
320
blobFromImages(InputArrayOfArrays images,double scalefactor,Size size,const Scalar & mean,bool swapRB,bool crop,int ddepth)321 Mat blobFromImages(InputArrayOfArrays images, double scalefactor, Size size,
322 const Scalar& mean, bool swapRB, bool crop, int ddepth)
323 {
324 CV_TRACE_FUNCTION();
325 Mat blob;
326 blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop, ddepth);
327 return blob;
328 }
329
blobFromImages(InputArrayOfArrays images_,OutputArray blob_,double scalefactor,Size size,const Scalar & mean_,bool swapRB,bool crop,int ddepth)330 void blobFromImages(InputArrayOfArrays images_, OutputArray blob_, double scalefactor,
331 Size size, const Scalar& mean_, bool swapRB, bool crop, int ddepth)
332 {
333 CV_TRACE_FUNCTION();
334 CV_CheckType(ddepth, ddepth == CV_32F || ddepth == CV_8U, "Blob depth should be CV_32F or CV_8U");
335 if (ddepth == CV_8U)
336 {
337 CV_CheckEQ(scalefactor, 1.0, "Scaling is not supported for CV_8U blob depth");
338 CV_Assert(mean_ == Scalar() && "Mean subtraction is not supported for CV_8U blob depth");
339 }
340
341 std::vector<Mat> images;
342 images_.getMatVector(images);
343 CV_Assert(!images.empty());
344 for (size_t i = 0; i < images.size(); i++)
345 {
346 Size imgSize = images[i].size();
347 if (size == Size())
348 size = imgSize;
349 if (size != imgSize)
350 {
351 if(crop)
352 {
353 float resizeFactor = std::max(size.width / (float)imgSize.width,
354 size.height / (float)imgSize.height);
355 resize(images[i], images[i], Size(), resizeFactor, resizeFactor, INTER_LINEAR);
356 Rect crop(Point(0.5 * (images[i].cols - size.width),
357 0.5 * (images[i].rows - size.height)),
358 size);
359 images[i] = images[i](crop);
360 }
361 else
362 resize(images[i], images[i], size, 0, 0, INTER_LINEAR);
363 }
364 if(images[i].depth() == CV_8U && ddepth == CV_32F)
365 images[i].convertTo(images[i], CV_32F);
366 Scalar mean = mean_;
367 if (swapRB)
368 std::swap(mean[0], mean[2]);
369
370 images[i] -= mean;
371 images[i] *= scalefactor;
372 }
373
374 size_t nimages = images.size();
375 Mat image0 = images[0];
376 int nch = image0.channels();
377 CV_Assert(image0.dims == 2);
378 if (nch == 3 || nch == 4)
379 {
380 int sz[] = { (int)nimages, nch, image0.rows, image0.cols };
381 blob_.create(4, sz, ddepth);
382 Mat blob = blob_.getMat();
383 Mat ch[4];
384
385 for(size_t i = 0; i < nimages; i++ )
386 {
387 const Mat& image = images[i];
388 CV_Assert(image.depth() == blob_.depth());
389 nch = image.channels();
390 CV_Assert(image.dims == 2 && (nch == 3 || nch == 4));
391 CV_Assert(image.size() == image0.size());
392
393 for( int j = 0; j < nch; j++ )
394 ch[j] = Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, j));
395 if(swapRB)
396 std::swap(ch[0], ch[2]);
397 split(image, ch);
398 }
399 }
400 else
401 {
402 CV_Assert(nch == 1);
403 int sz[] = { (int)nimages, 1, image0.rows, image0.cols };
404 blob_.create(4, sz, ddepth);
405 Mat blob = blob_.getMat();
406
407 for(size_t i = 0; i < nimages; i++ )
408 {
409 const Mat& image = images[i];
410 CV_Assert(image.depth() == blob_.depth());
411 nch = image.channels();
412 CV_Assert(image.dims == 2 && (nch == 1));
413 CV_Assert(image.size() == image0.size());
414
415 image.copyTo(Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, 0)));
416 }
417 }
418 }
419
imagesFromBlob(const cv::Mat & blob_,OutputArrayOfArrays images_)420 void imagesFromBlob(const cv::Mat& blob_, OutputArrayOfArrays images_)
421 {
422 CV_TRACE_FUNCTION();
423
424 //A blob is a 4 dimensional matrix in floating point precision
425 //blob_[0] = batchSize = nbOfImages
426 //blob_[1] = nbOfChannels
427 //blob_[2] = height
428 //blob_[3] = width
429 CV_Assert(blob_.depth() == CV_32F);
430 CV_Assert(blob_.dims == 4);
431
432 images_.create(cv::Size(1, blob_.size[0]), blob_.depth());
433
434 std::vector<Mat> vectorOfChannels(blob_.size[1]);
435 for (int n = 0; n < blob_.size[0]; ++n)
436 {
437 for (int c = 0; c < blob_.size[1]; ++c)
438 {
439 vectorOfChannels[c] = getPlane(blob_, n, c);
440 }
441 cv::merge(vectorOfChannels, images_.getMatRef(n));
442 }
443 }
444
445 #ifdef HAVE_OPENCL
446 class OpenCLBackendWrapper : public BackendWrapper
447 {
448 public:
OpenCLBackendWrapper(Mat & m)449 OpenCLBackendWrapper(Mat& m) : BackendWrapper(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL)
450 {
451 m.copyTo(umat);
452 host = &m;
453 hostDirty = false;
454 }
455
OpenCLBackendWrapper(const Ptr<BackendWrapper> & baseBuffer,Mat & m)456 OpenCLBackendWrapper(const Ptr<BackendWrapper>& baseBuffer, Mat& m)
457 : BackendWrapper(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL)
458 {
459 Ptr<OpenCLBackendWrapper> base = baseBuffer.dynamicCast<OpenCLBackendWrapper>();
460 CV_Assert(!base.empty());
461
462 host = &m;
463
464 int shape[] = {1, (int)base->umat.total()};
465 umat = base->umat.reshape(1, 2, &shape[0])
466 .colRange(0, host->total())
467 .reshape(1, host->dims, &host->size[0]);
468 hostDirty = false;
469 }
470
create(Mat & m)471 static Ptr<BackendWrapper> create(Mat& m)
472 {
473 return Ptr<BackendWrapper>(new OpenCLBackendWrapper(m));
474 }
475
create(const Ptr<BackendWrapper> & baseBuffer,Mat & m)476 static Ptr<BackendWrapper> create(const Ptr<BackendWrapper>& baseBuffer, Mat& m)
477 {
478 return Ptr<BackendWrapper>(new OpenCLBackendWrapper(baseBuffer, m));
479 }
480
getUMatVector(const std::vector<Ptr<BackendWrapper>> & wrappers)481 static std::vector<UMat> getUMatVector(const std::vector<Ptr<BackendWrapper> >& wrappers)
482 {
483 const int numWrappers = wrappers.size();
484 std::vector<UMat> mats(wrappers.size());
485 for (int i = 0; i < numWrappers; ++i)
486 {
487 Ptr<OpenCLBackendWrapper> umatWrapper = wrappers[i].dynamicCast<OpenCLBackendWrapper>();
488 CV_Assert(!umatWrapper.empty());
489 umatWrapper->copyToDevice();
490 mats[i] = umatWrapper->umat;
491 }
492 return mats;
493 }
494
495 // Replaces all umats in wrappers to specific ones.
update(const std::vector<Ptr<BackendWrapper>> & wrappers,const std::vector<UMat> & umats)496 static void update(const std::vector<Ptr<BackendWrapper> >& wrappers,
497 const std::vector<UMat>& umats)
498 {
499 CV_Assert(wrappers.size() == umats.size());
500 for (int i = 0, n = umats.size(); i < n; ++i)
501 {
502 Ptr<OpenCLBackendWrapper> umatWrapper = wrappers[i].dynamicCast<OpenCLBackendWrapper>();
503 CV_Assert(!umatWrapper.empty());
504 umatWrapper->umat = umats[i];
505 }
506 }
507
~OpenCLBackendWrapper()508 ~OpenCLBackendWrapper() {}
509
510 // Copies data from device to a host memory.
copyToHost()511 virtual void copyToHost() CV_OVERRIDE
512 {
513 umat.copyTo(*host);
514 }
515
setHostDirty()516 virtual void setHostDirty() CV_OVERRIDE
517 {
518 hostDirty = true;
519 };
520
copyToDevice()521 void copyToDevice()
522 {
523 if (hostDirty)
524 {
525 host->copyTo(umat);
526 hostDirty = false;
527 }
528 }
529
530 private:
531 UMat umat;
532 Mat* host;
533 bool hostDirty;
534 };
535 #endif
536
537 struct LayerPin
538 {
539 int lid;
540 int oid;
541
LayerPincv::dnn::LayerPin542 LayerPin(int layerId = -1, int outputId = -1)
543 : lid(layerId), oid(outputId) {}
544
validcv::dnn::LayerPin545 bool valid() const
546 {
547 return (lid >= 0 && oid >= 0);
548 }
549
equalcv::dnn::LayerPin550 bool equal(const LayerPin &r) const
551 {
552 return (lid == r.lid && oid == r.oid);
553 }
554
operator <cv::dnn::LayerPin555 bool operator<(const LayerPin &r) const
556 {
557 return lid < r.lid || (lid == r.lid && oid < r.oid);
558 }
559
operator ==cv::dnn::LayerPin560 bool operator ==(const LayerPin &r) const
561 {
562 return lid == r.lid && oid == r.oid;
563 }
564 };
565
566 struct LayerData
567 {
LayerDatacv::dnn::LayerData568 LayerData() : id(-1), skip(false), flag(0) {}
LayerDatacv::dnn::LayerData569 LayerData(int _id, const String &_name, const String &_type, LayerParams &_params)
570 : id(_id), name(_name), type(_type), params(_params), skip(false), flag(0)
571 {
572 CV_TRACE_FUNCTION();
573
574 //add logging info
575 params.name = name;
576 params.type = type;
577 }
578
579 int id;
580 String name;
581 String type;
582 LayerParams params;
583
584 std::vector<LayerPin> inputBlobsId;
585 std::set<int> inputLayersId;
586 std::set<int> requiredOutputs;
587 std::vector<LayerPin> consumers;
588 std::vector<Ptr<BackendWrapper> > outputBlobsWrappers;
589 std::vector<Ptr<BackendWrapper> > inputBlobsWrappers;
590 std::vector<Ptr<BackendWrapper> > internalBlobsWrappers;
591
592 #ifdef HAVE_CUDA
593 /* output ids which must be transferred to the host in the background
594 * after the completion of the forward pass of the layer
595 */
596 std::vector<int> cudaD2HBackgroundTransfers;
597 #endif
598
599 Ptr<Layer> layerInstance;
600 std::vector<Mat> outputBlobs;
601 std::vector<Mat*> inputBlobs;
602 std::vector<Mat> internals;
603 // Computation nodes of implemented backends (except DEFAULT).
604 std::map<int, Ptr<BackendNode> > backendNodes;
605 // Flag for skip layer computation for specific backend.
606 bool skip;
607
608 int flag;
609
getLayerInstancecv::dnn::LayerData610 Ptr<Layer> getLayerInstance()
611 {
612 CV_TRACE_FUNCTION();
613 CV_TRACE_ARG_VALUE(type, "type", type.c_str());
614
615 if (layerInstance)
616 return layerInstance;
617
618 layerInstance = LayerFactory::createLayerInstance(type, params);
619 if (!layerInstance)
620 {
621 CV_Error(Error::StsError, "Can't create layer \"" + name + "\" of type \"" + type + "\"");
622 }
623
624 return layerInstance;
625 }
626 };
627
628 //fake layer containing network input blobs
629 struct DataLayer : public Layer
630 {
DataLayercv::dnn::DataLayer631 DataLayer() : Layer()
632 {
633 skip = false;
634 }
635
supportBackendcv::dnn::DataLayer636 virtual bool supportBackend(int backendId) CV_OVERRIDE
637 {
638 return backendId == DNN_BACKEND_OPENCV ||
639 (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && inputsData.size() == 1);
640 }
641
forwardcv::dnn::DataLayer642 void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
643 {
644 CV_TRACE_FUNCTION();
645 CV_TRACE_ARG_VALUE(name, "name", name.c_str());
646
647 CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
648 forward_ocl(inputs_arr, outputs_arr, internals_arr))
649
650 if (outputs_arr.depth() == CV_16S)
651 {
652 forward_fallback(inputs_arr, outputs_arr, internals_arr);
653 return;
654 }
655
656 std::vector<Mat> outputs, internals;
657 outputs_arr.getMatVector(outputs);
658 internals_arr.getMatVector(internals);
659
660 // Supported modes:
661 // | Input type | Output type |
662 // | fp32 | fp32 |
663 // | uint8 | fp32 |
664 for (int i = 0; i < inputsData.size(); ++i)
665 {
666 double scale = scaleFactors[i];
667 Scalar& mean = means[i];
668 CV_Assert(mean == Scalar() || inputsData[i].size[1] <= 4);
669 CV_CheckTypeEQ(outputs[i].type(), CV_32FC1, "");
670
671 bool singleMean = true;
672 for (int j = 1; j < std::min(4, inputsData[i].size[1]) && singleMean; ++j)
673 {
674 singleMean = mean[j] == mean[j - 1];
675 }
676
677 if (singleMean)
678 {
679 inputsData[i].convertTo(outputs[i], CV_32F, scale, -mean[0] * scale);
680 }
681 else
682 {
683 for (int n = 0; n < inputsData[i].size[0]; ++n)
684 for (int c = 0; c < inputsData[i].size[1]; ++c)
685 {
686 Mat inp = getPlane(inputsData[i], n, c);
687 Mat out = getPlane(outputs[i], n, c);
688 inp.convertTo(out, CV_32F, scale, -mean[c] * scale);
689 }
690 }
691 }
692 }
693
694 #ifdef HAVE_OPENCL
695 std::vector<Mat> tmp_expressions;
forward_oclcv::dnn::DataLayer696 bool forward_ocl(InputArrayOfArrays, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
697 {
698 // Supported modes:
699 // | Input type | Output type |
700 // | fp32 | fp32 |
701 // | fp32 | fp16 |
702 // | uint8 | fp32 |
703 std::vector<UMat> outputs;
704 outputs_.getUMatVector(outputs);
705
706 tmp_expressions.clear();
707 for (int i = 0; i < inputsData.size(); ++i)
708 {
709 Mat inputData = inputsData[i];
710
711 double scale = scaleFactors[i];
712 Scalar& mean = means[i];
713
714 CV_Assert(mean == Scalar() || inputsData[i].size[1] <= 4);
715 bool singleMean = true;
716 for (int j = 1; j < std::min(4, inputsData[i].size[1]) && singleMean; ++j)
717 {
718 singleMean = mean[j] == mean[j - 1];
719 }
720
721 if (outputs_.depth() == CV_16S)
722 {
723 if (singleMean)
724 {
725 tmp_expressions.push_back(Mat(scale * (inputsData[i] - mean[0])));
726 convertFp16(tmp_expressions.back(), outputs[i]);
727 }
728 else
729 {
730 for (int n = 0; n < inputsData[i].size[0]; ++n)
731 for (int c = 0; c < inputsData[i].size[1]; ++c)
732 {
733 Mat inp = getPlane(inputsData[i], n, c);
734
735 std::vector<cv::Range> plane(4, Range::all());
736 plane[0] = Range(n, n + 1);
737 plane[1] = Range(c, c + 1);
738 UMat out = outputs[i](plane).reshape(1, inp.dims, inp.size);
739
740 tmp_expressions.push_back(scale * (inp - mean[c]));
741 convertFp16(tmp_expressions.back(), out);
742 }
743 }
744 }
745 else
746 {
747 CV_Assert(outputs_.depth() == CV_32F);
748 if (singleMean)
749 {
750 inputsData[i].convertTo(outputs[i], CV_32F, scale, -mean[0] * scale);
751 }
752 else
753 {
754 for (int n = 0; n < inputsData[i].size[0]; ++n)
755 for (int c = 0; c < inputsData[i].size[1]; ++c)
756 {
757 Mat inp = getPlane(inputsData[i], n, c);
758
759 std::vector<cv::Range> plane(4, Range::all());
760 plane[0] = Range(n, n + 1);
761 plane[1] = Range(c, c + 1);
762 UMat out = outputs[i](plane).reshape(1, inp.dims, inp.size);
763
764 inp.convertTo(out, CV_32F, scale, -mean[c] * scale);
765 }
766 }
767 }
768 }
769 return true;
770 }
771 #endif
772
outputNameToIndexcv::dnn::DataLayer773 int outputNameToIndex(const String& tgtName) CV_OVERRIDE
774 {
775 int idx = (int)(std::find(outNames.begin(), outNames.end(), tgtName) - outNames.begin());
776 return (idx < (int)outNames.size()) ? idx : -1;
777 }
778
setNamescv::dnn::DataLayer779 void setNames(const std::vector<String> &names)
780 {
781 outNames.assign(names.begin(), names.end());
782 shapes.clear(); shapes.resize(outNames.size());
783 }
784
setInputShapecv::dnn::DataLayer785 void setInputShape(const String& tgtName, const MatShape& shape)
786 {
787 std::vector<String>::const_iterator it = std::find(outNames.begin(), outNames.end(), tgtName);
788 CV_Check(tgtName, it != outNames.end(), "Unknown input");
789 int idx = (int)(it - outNames.begin());
790
791 CV_Assert(idx < (int)shapes.size());
792 CV_Check(tgtName, shapes[idx].empty(), "Input shape redefinition is not allowed");
793 shapes[idx] = shape;
794 }
795
getMemoryShapescv::dnn::DataLayer796 bool getMemoryShapes(const std::vector<MatShape> &inputs,
797 const int requiredOutputs,
798 std::vector<MatShape> &outputs,
799 std::vector<MatShape> &internals) const CV_OVERRIDE
800 {
801 CV_Assert(inputs.size() == requiredOutputs);
802 outputs.assign(inputs.begin(), inputs.end());
803 return false;
804 }
805
finalizecv::dnn::DataLayer806 virtual void finalize(InputArrayOfArrays, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
807 {
808 std::vector<Mat> outputs;
809 outputs_arr.getMatVector(outputs);
810
811 CV_Assert_N(outputs.size() == scaleFactors.size(), outputs.size() == means.size(),
812 inputsData.size() == outputs.size());
813 skip = true;
814 for (int i = 0; skip && i < inputsData.size(); ++i)
815 {
816 if (inputsData[i].data != outputs[i].data || scaleFactors[i] != 1.0 || means[i] != Scalar())
817 skip = false;
818 }
819 }
820
821 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
initInfEnginecv::dnn::DataLayer822 virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
823 {
824 CV_CheckEQ(inputsData.size(), (size_t)1, "");
825 CV_CheckEQ(inputsData[0].dims, 4, "");
826 const size_t numChannels = inputsData[0].size[1];
827 CV_Assert(numChannels <= 4);
828
829 // Scale
830 InferenceEngine::TensorDesc td(InferenceEngine::Precision::FP32, {numChannels},
831 InferenceEngine::Layout::C);
832 auto weights = InferenceEngine::make_shared_blob<float>(td);
833 weights->allocate();
834
835 float* weight_buf = weights->buffer().as<float*>();
836 std::fill(weight_buf, weight_buf + numChannels, scaleFactors[0]);
837
838 // Mean subtraction
839 auto biases = InferenceEngine::make_shared_blob<float>(td);
840 biases->allocate();
841 float* bias_buf = biases->buffer().as<float*>();
842
843 for (int i = 0; i < numChannels; ++i)
844 {
845 bias_buf[i] = -means[0][i] * scaleFactors[0];
846 }
847
848 InferenceEngine::Builder::Layer ieLayer = InferenceEngine::Builder::ScaleShiftLayer(name);
849 addConstantData("weights", weights, ieLayer);
850 addConstantData("biases", biases, ieLayer);
851 return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
852 }
853 #endif // HAVE_DNN_IE_NN_BUILDER_2019
854
855 std::vector<String> outNames;
856 std::vector<MatShape> shapes;
857 // Preprocessing parameters for each network's input.
858 std::vector<double> scaleFactors;
859 std::vector<Scalar> means;
860 std::vector<Mat> inputsData;
861 bool skip;
862 };
863
864 struct BlobManager
865 {
866 public:
867 // Increase references counter to layer output.
addReferencecv::dnn::BlobManager868 void addReference(const LayerPin& lp)
869 {
870 std::map<LayerPin, int>::iterator it = refCounter.find(lp);
871 if (it == refCounter.end())
872 refCounter[lp] = 1;
873 else
874 it->second += 1;
875 }
876
addReferencescv::dnn::BlobManager877 void addReferences(const std::vector<LayerPin>& pins)
878 {
879 for (int i = 0; i < pins.size(); i++)
880 {
881 addReference(pins[i]);
882 }
883 }
884
885 // Returns number of references to allocated memory that used in specific
886 // layer blob.
numReferencescv::dnn::BlobManager887 int numReferences(const LayerPin& lp)
888 {
889 std::map<LayerPin, LayerPin>::iterator mapIt = reuseMap.find(lp);
890 CV_Assert(mapIt != reuseMap.end());
891 LayerPin memHost = mapIt->second;
892
893 std::map<LayerPin, int>::iterator refIt = refCounter.find(memHost);
894 CV_Assert(refIt != refCounter.end());
895 return refIt->second;
896 }
897
898 // Reuse data allocated in <host> inside the <user> blob.
reusecv::dnn::BlobManager899 void reuse(const LayerPin& host, const LayerPin& user)
900 {
901 CV_Assert(reuseMap.find(user) == reuseMap.end());
902 CV_Assert(reuseMap.find(host) != reuseMap.end());
903 LayerPin memHost = reuseMap[host];
904 reuseMap[user] = memHost;
905 if (refCounter.find(memHost) != refCounter.end())
906 {
907 std::map<LayerPin, int>::iterator userRefIt = refCounter.find(user);
908 if (userRefIt != refCounter.end())
909 {
910 refCounter[memHost] += userRefIt->second;
911 refCounter.erase(userRefIt);
912 }
913 else
914 refCounter[memHost] += 1;
915 }
916 }
917
918 // Decrease references counter to allocated memory inside specific blob.
releaseReferencecv::dnn::BlobManager919 void releaseReference(const LayerPin& lp)
920 {
921 std::map<LayerPin, LayerPin>::iterator mapIt = reuseMap.find(lp);
922 CV_Assert(mapIt != reuseMap.end());
923
924 std::map<LayerPin, int>::iterator refIt = refCounter.find(mapIt->second);
925 CV_Assert(refIt != refCounter.end());
926 CV_Assert(refIt->second > 0);
927 refIt->second -= 1;
928 }
929
releaseReferencescv::dnn::BlobManager930 void releaseReferences(const std::vector<LayerPin>& pins)
931 {
932 for (int i = 0; i < pins.size(); i++)
933 {
934 releaseReference(pins[i]);
935 }
936 }
937
reuseOrCreatecv::dnn::BlobManager938 void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst, bool use_half)
939 {
940 if (!DNN_DISABLE_MEMORY_OPTIMIZATIONS)
941 {
942 Mat bestBlob;
943 LayerPin bestBlobPin;
944
945 std::map<LayerPin, Mat>::iterator hostIt;
946 std::map<LayerPin, int>::iterator refIt;
947
948 const int targetTotal = total(shape);
949 int bestBlobTotal = INT_MAX;
950
951 for (hostIt = memHosts.begin(); hostIt != memHosts.end(); ++hostIt)
952 {
953 refIt = refCounter.find(hostIt->first);
954 // Use only blobs that had references before because if not,
955 // it might be used as output.
956 if (refIt != refCounter.end() && refIt->second == 0)
957 {
958 Mat& unusedBlob = hostIt->second;
959 if (unusedBlob.total() >= targetTotal &&
960 unusedBlob.total() < bestBlobTotal)
961 {
962 bestBlobPin = hostIt->first;
963 bestBlob = unusedBlob;
964 bestBlobTotal = unusedBlob.total();
965 }
966 }
967 }
968 if (!bestBlob.empty())
969 {
970 reuse(bestBlobPin, lp);
971 dst = bestBlob.reshape(1, 1).colRange(0, targetTotal).reshape(1, shape);
972 return;
973 }
974 }
975
976 {
977 // if dst already has been allocated with total(shape) elements,
978 // it won't be recreated and pointer of dst.data remains the same.
979 dst.create(shape, use_half ? CV_16S : CV_32F);
980 addHost(lp, dst);
981 }
982 }
983
allocateBlobsForLayercv::dnn::BlobManager984 void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes,
985 std::vector<LayerPin>& pinsForInternalBlobs,
986 bool use_half = false)
987 {
988 CV_TRACE_FUNCTION();
989
990 pinsForInternalBlobs.clear();
991
992 std::vector<Mat>& outputBlobs = ld.outputBlobs,
993 &internalBlobs = ld.internals;
994
995 const ShapesVec& outShapes = layerShapes.out,
996 internalShapes = layerShapes.internal;
997
998 outputBlobs.resize(std::max((size_t)1, outShapes.size())); //layer produce at least one output blob
999 internalBlobs.resize(internalShapes.size());
1000
1001 CV_Assert(ld.requiredOutputs.size() <= outShapes.size());
1002
1003 // Check that layer could work in-place.
1004 bool inPlace = false;
1005 if (layerShapes.supportInPlace)
1006 {
1007 if (ld.inputBlobs.size() == 1)
1008 {
1009 // Get number of references to the input memory.
1010 int numRef = numReferences(ld.inputBlobsId[0]);
1011 // If current layer is one and only customer of this blob.
1012 inPlace = numRef == 1;
1013 }
1014 }
1015
1016 ShapesVec shapes(outShapes);
1017 shapes.insert(shapes.end(), internalShapes.begin(), internalShapes.end());
1018 std::vector<Mat*> blobs;
1019 for(int i = 0; i < outputBlobs.size(); i++)
1020 {
1021 blobs.push_back(&outputBlobs[i]);
1022 }
1023
1024 for(int i = 0; i < internalBlobs.size(); i++)
1025 {
1026 blobs.push_back(&internalBlobs[i]);
1027 if (total(internalShapes[i]))
1028 {
1029 pinsForInternalBlobs.push_back(LayerPin(ld.id, ld.outputBlobs.size() + i));
1030 }
1031 }
1032
1033 addReferences(pinsForInternalBlobs);
1034
1035 std::map<int, std::vector<int> > idxSizes;
1036 for(int i = 0; i < shapes.size(); i++)
1037 {
1038 idxSizes[total(shapes[i])].push_back(i);
1039 }
1040
1041 std::map<int, std::vector<int> >::reverse_iterator it;
1042 for(it = idxSizes.rbegin(); it != idxSizes.rend(); it++)
1043 {
1044 for(int j = 0; j < it->second.size(); j++)
1045 {
1046 int index = it->second[j];
1047 if (total(shapes[index]))
1048 {
1049 LayerPin blobPin(ld.id, index);
1050 if (index < outShapes.size() && inPlace)
1051 {
1052 CV_Assert(ld.inputBlobs[0]->total() == total(shapes[index]));
1053 ld.outputBlobs[index] = ld.inputBlobs[0]->reshape(1, shapes[index]);
1054 reuse(ld.inputBlobsId[0], blobPin);
1055 }
1056 else
1057 reuseOrCreate(shapes[index], blobPin, *blobs[index], use_half);
1058 }
1059 }
1060 }
1061 }
1062
1063 // Clear internal state. Calls before an every reallocation.
resetcv::dnn::BlobManager1064 void reset()
1065 {
1066 CV_TRACE_FUNCTION();
1067
1068 refCounter.clear();
1069 reuseMap.clear();
1070 memHosts.clear();
1071 }
1072
1073 private:
1074 // Register allocated memory.
addHostcv::dnn::BlobManager1075 void addHost(const LayerPin& lp, const Mat& mat)
1076 {
1077 CV_Assert(memHosts.find(lp) == memHosts.end());
1078 reuseMap[lp] = lp;
1079 memHosts[lp] = mat;
1080 }
1081
1082 std::map<LayerPin, int> refCounter;
1083 // Maps pin to origin blob (for whom memory was allocated firstly).
1084 // For origin blobs key == value.
1085 std::map<LayerPin, LayerPin> reuseMap;
1086 std::map<LayerPin, Mat> memHosts;
1087 };
1088
wrapMat(int backendId,int targetId,cv::Mat & m)1089 static Ptr<BackendWrapper> wrapMat(int backendId, int targetId, cv::Mat& m)
1090 {
1091 if (backendId == DNN_BACKEND_OPENCV)
1092 {
1093 if (targetId == DNN_TARGET_CPU)
1094 return Ptr<BackendWrapper>();
1095 #ifdef HAVE_OPENCL
1096 else if (IS_DNN_OPENCL_TARGET(targetId))
1097 return OpenCLBackendWrapper::create(m);
1098 #endif
1099 else
1100 CV_Error(Error::StsNotImplemented, "Unknown/unsupported target identifier");
1101 }
1102 else if (backendId == DNN_BACKEND_HALIDE)
1103 {
1104 CV_Assert(haveHalide());
1105 #ifdef HAVE_HALIDE
1106 return Ptr<BackendWrapper>(new HalideBackendWrapper(targetId, m));
1107 #endif // HAVE_HALIDE
1108 }
1109 else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
1110 {
1111 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
1112 return Ptr<BackendWrapper>(new InfEngineBackendWrapper(targetId, m));
1113 #else
1114 CV_Error(Error::StsNotImplemented, "This OpenCV version is built without Inference Engine NN Builder API support");
1115 #endif
1116 }
1117 else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
1118 {
1119 #ifdef HAVE_DNN_NGRAPH
1120 return Ptr<BackendWrapper>(new NgraphBackendWrapper(targetId, m));
1121 #else
1122 CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of Inference Engine + nGraph");
1123 #endif
1124 }
1125 else if (backendId == DNN_BACKEND_VKCOM)
1126 {
1127 CV_Assert(haveVulkan());
1128 #ifdef HAVE_VULKAN
1129 return Ptr<BackendWrapper>(new VkComBackendWrapper(m));
1130 #endif // HAVE_VULKAN
1131 }
1132 else if (backendId == DNN_BACKEND_CUDA)
1133 {
1134 CV_Assert(haveCUDA());
1135
1136 #ifdef HAVE_CUDA
1137 switch (targetId)
1138 {
1139 case DNN_TARGET_CUDA:
1140 return CUDABackendWrapperFP32::create(m);
1141 case DNN_TARGET_CUDA_FP16:
1142 return CUDABackendWrapperFP16::create(m);
1143 default:
1144 CV_Assert(IS_DNN_CUDA_TARGET(targetId));
1145 }
1146 #endif
1147 }
1148 else
1149 CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
1150 return Ptr<BackendWrapper>(); // TODO Error?
1151 }
1152
1153 static int g_networkId = 0;
1154
NetImplBase()1155 detail::NetImplBase::NetImplBase()
1156 : networkId(CV_XADD(&g_networkId, 1))
1157 , networkDumpCounter(0)
1158 , dumpLevel(DNN_NETWORK_DUMP)
1159 {
1160 // nothing
1161 }
1162
getDumpFileNameBase()1163 std::string detail::NetImplBase::getDumpFileNameBase()
1164 {
1165 std::string dumpFileNameBase = cv::format("ocv_dnn_net_%05d_%02d", networkId, networkDumpCounter++);
1166 return dumpFileNameBase;
1167 }
1168
1169 struct Net::Impl : public detail::NetImplBase
1170 {
1171 typedef std::map<int, LayerShapes> LayersShapesMap;
1172 typedef std::map<int, LayerData> MapIdToLayerData;
1173
Implcv::dnn::Net::Impl1174 Impl()
1175 {
1176 //allocate fake net input layer
1177 netInputLayer = Ptr<DataLayer>(new DataLayer());
1178 LayerData &inpl = layers.insert( make_pair(0, LayerData()) ).first->second;
1179 inpl.id = 0;
1180 netInputLayer->name = inpl.name = "_input";
1181 inpl.type = "__NetInputLayer__";
1182 inpl.layerInstance = netInputLayer;
1183 layerNameToId.insert(std::make_pair(inpl.name, inpl.id));
1184
1185 lastLayerId = 0;
1186 netWasAllocated = false;
1187 fusion = true;
1188 isAsync = false;
1189 preferableBackend = DNN_BACKEND_DEFAULT;
1190 preferableTarget = DNN_TARGET_CPU;
1191 skipInfEngineInit = false;
1192 hasDynamicShapes = false;
1193 }
1194
1195 Ptr<DataLayer> netInputLayer;
1196 std::vector<LayerPin> blobsToKeep;
1197 MapIdToLayerData layers;
1198 std::map<String, int> layerNameToId;
1199 BlobManager blobManager;
1200 int preferableBackend;
1201 int preferableTarget;
1202 String halideConfigFile;
1203 bool skipInfEngineInit;
1204 bool hasDynamicShapes;
1205 // Map host data to backend specific wrapper.
1206 std::map<void*, Ptr<BackendWrapper> > backendWrappers;
1207
1208 int lastLayerId;
1209
1210 bool netWasAllocated;
1211 bool fusion;
1212 bool isAsync;
1213 std::vector<int64> layersTimings;
1214 Mat output_blob;
1215
1216 #ifdef HAVE_CUDA
1217 struct CudaInfo_t
1218 {
CudaInfo_tcv::dnn::Net::Impl::CudaInfo_t1219 CudaInfo_t(cuda4dnn::csl::CSLContext ctxt, cuda4dnn::csl::Stream d2h_stream_)
1220 : context(std::move(ctxt)), d2h_stream(std::move(d2h_stream_)) { }
1221 cuda4dnn::csl::CSLContext context;
1222 cuda4dnn::csl::Stream d2h_stream;
1223 cuda4dnn::csl::Workspace workspace;
1224 };
1225
1226 std::unique_ptr<CudaInfo_t> cudaInfo;
1227 #endif
1228
wrapcv::dnn::Net::Impl1229 Ptr<BackendWrapper> wrap(Mat& host)
1230 {
1231 if (preferableBackend == DNN_BACKEND_OPENCV && preferableTarget == DNN_TARGET_CPU)
1232 return Ptr<BackendWrapper>();
1233
1234 MatShape shape(host.dims);
1235 for (int i = 0; i < host.dims; ++i)
1236 shape[i] = host.size[i];
1237
1238 void* data = host.data;
1239 if (backendWrappers.find(data) != backendWrappers.end())
1240 {
1241 Ptr<BackendWrapper> baseBuffer = backendWrappers[data];
1242 if (preferableBackend == DNN_BACKEND_OPENCV)
1243 {
1244 #ifdef HAVE_OPENCL
1245 CV_Assert(IS_DNN_OPENCL_TARGET(preferableTarget));
1246 return OpenCLBackendWrapper::create(baseBuffer, host);
1247 #else
1248 CV_Error(Error::StsInternal, "");
1249 #endif
1250 }
1251 else if (preferableBackend == DNN_BACKEND_HALIDE)
1252 {
1253 CV_Assert(haveHalide());
1254 #ifdef HAVE_HALIDE
1255 return Ptr<BackendWrapper>(new HalideBackendWrapper(baseBuffer, shape));
1256 #endif
1257 }
1258 else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
1259 {
1260 return wrapMat(preferableBackend, preferableTarget, host);
1261 }
1262 else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
1263 {
1264 return wrapMat(preferableBackend, preferableTarget, host);
1265 }
1266 else if (preferableBackend == DNN_BACKEND_VKCOM)
1267 {
1268 #ifdef HAVE_VULKAN
1269 return Ptr<BackendWrapper>(new VkComBackendWrapper(baseBuffer, host));
1270 #endif
1271 }
1272 else if (preferableBackend == DNN_BACKEND_CUDA)
1273 {
1274 CV_Assert(haveCUDA());
1275 #ifdef HAVE_CUDA
1276 switch (preferableTarget)
1277 {
1278 case DNN_TARGET_CUDA:
1279 return CUDABackendWrapperFP32::create(baseBuffer, shape);
1280 case DNN_TARGET_CUDA_FP16:
1281 return CUDABackendWrapperFP16::create(baseBuffer, shape);
1282 default:
1283 CV_Assert(IS_DNN_CUDA_TARGET(preferableTarget));
1284 }
1285 #endif
1286 }
1287 else
1288 CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
1289 }
1290
1291 Ptr<BackendWrapper> wrapper = wrapMat(preferableBackend, preferableTarget, host);
1292 backendWrappers[data] = wrapper;
1293 return wrapper;
1294 }
1295
1296 #ifdef HAVE_HALIDE
compileHalidecv::dnn::Net::Impl1297 void compileHalide()
1298 {
1299 CV_TRACE_FUNCTION();
1300
1301 CV_Assert(preferableBackend == DNN_BACKEND_HALIDE);
1302
1303 HalideScheduler scheduler(halideConfigFile);
1304 std::vector< std::reference_wrapper<LayerData> > compileList; compileList.reserve(64);
1305 for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it)
1306 {
1307 LayerData &ld = it->second;
1308 Ptr<Layer> layer = ld.layerInstance;
1309 if (layer->supportBackend(DNN_BACKEND_HALIDE) && !ld.skip)
1310 {
1311 CV_Assert(!ld.backendNodes[DNN_BACKEND_HALIDE].empty());
1312 bool scheduled = scheduler.process(ld.backendNodes[DNN_BACKEND_HALIDE]);
1313 if (!scheduled)
1314 {
1315 // Use automatic scheduling provided by layer.
1316 layer->applyHalideScheduler(ld.backendNodes[DNN_BACKEND_HALIDE],
1317 ld.inputBlobs, ld.outputBlobs,
1318 preferableTarget);
1319 }
1320 compileList.emplace_back(ld);
1321 }
1322 }
1323 std::atomic<int> progress(0);
1324 auto fn = ([&] () -> void
1325 {
1326 for (;;)
1327 {
1328 int id = progress.fetch_add(1);
1329 if ((size_t)id >= compileList.size())
1330 return;
1331 const LayerData& ld = compileList[id].get();
1332 Ptr<BackendNode> node = ld.backendNodes.find(DNN_BACKEND_HALIDE)->second;
1333 dnn::compileHalide(ld.outputBlobs, node, preferableTarget);
1334 }
1335 });
1336 size_t num_threads = std::min(compileList.size(), (size_t)std::thread::hardware_concurrency());
1337 num_threads = std::max((size_t)1u, std::min((size_t)8u, num_threads));
1338 std::vector<std::thread> threads(num_threads - 1);
1339 for (auto& t: threads) t = std::thread(fn);
1340 fn(); // process own tasks
1341 for (auto& t: threads) t.join();
1342 }
1343 #endif
1344
clearcv::dnn::Net::Impl1345 void clear()
1346 {
1347 CV_TRACE_FUNCTION();
1348
1349 MapIdToLayerData::iterator it;
1350 for (it = layers.begin(); it != layers.end(); it++)
1351 {
1352 if (it->second.id != 0) {
1353 it->second.inputBlobs.clear();
1354 it->second.outputBlobs.clear();
1355 it->second.internals.clear();
1356 }
1357 it->second.skip = false;
1358 //it->second.consumers.clear();
1359 Ptr<Layer> currLayer = it->second.layerInstance;
1360
1361 if( currLayer.empty() )
1362 continue;
1363
1364 currLayer->unsetAttached();
1365 }
1366
1367 layersTimings.clear();
1368 }
1369
setUpNetcv::dnn::Net::Impl1370 void setUpNet(const std::vector<LayerPin>& blobsToKeep_ = std::vector<LayerPin>())
1371 {
1372 CV_TRACE_FUNCTION();
1373
1374 if (dumpLevel && networkDumpCounter == 0)
1375 {
1376 dumpNetworkToFile();
1377 }
1378
1379 if (preferableBackend == DNN_BACKEND_DEFAULT)
1380 preferableBackend = (Backend)PARAM_DNN_BACKEND_DEFAULT;
1381 #ifdef HAVE_INF_ENGINE
1382 if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE)
1383 preferableBackend = getInferenceEngineBackendTypeParam();
1384 #endif
1385
1386 CV_Assert(preferableBackend != DNN_BACKEND_OPENCV ||
1387 preferableTarget == DNN_TARGET_CPU ||
1388 preferableTarget == DNN_TARGET_OPENCL ||
1389 preferableTarget == DNN_TARGET_OPENCL_FP16);
1390 CV_Assert(preferableBackend != DNN_BACKEND_HALIDE ||
1391 preferableTarget == DNN_TARGET_CPU ||
1392 preferableTarget == DNN_TARGET_OPENCL);
1393 #ifdef HAVE_INF_ENGINE
1394 if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
1395 preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
1396 {
1397 CV_Assert(
1398 (preferableTarget == DNN_TARGET_CPU && (!isArmComputePlugin() || preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)) ||
1399 preferableTarget == DNN_TARGET_OPENCL ||
1400 preferableTarget == DNN_TARGET_OPENCL_FP16 ||
1401 preferableTarget == DNN_TARGET_MYRIAD ||
1402 preferableTarget == DNN_TARGET_HDDL ||
1403 preferableTarget == DNN_TARGET_FPGA
1404 );
1405 }
1406 #endif
1407 CV_Assert(preferableBackend != DNN_BACKEND_VKCOM ||
1408 preferableTarget == DNN_TARGET_VULKAN);
1409 CV_Assert(preferableBackend != DNN_BACKEND_CUDA ||
1410 IS_DNN_CUDA_TARGET(preferableTarget));
1411 if (!netWasAllocated || this->blobsToKeep != blobsToKeep_)
1412 {
1413 if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
1414 #ifndef HAVE_OPENCL
1415 {
1416 CV_LOG_WARNING(NULL, "DNN: OpenCL target is not available in this OpenCV build, switching to CPU.");
1417 preferableTarget = DNN_TARGET_CPU;
1418 }
1419 #else
1420 {
1421 if (!DNN_OPENCL_ALLOW_ALL_DEVICES)
1422 {
1423 // Current implementation is only valid for GPU (#11494)
1424 if (ocl::Device::getDefault().type() != ocl::Device::TYPE_GPU)
1425 {
1426 CV_LOG_WARNING(NULL, "DNN: OpenCL target is not supported with current OpenCL device (tested with GPUs only), switching to CPU.");
1427 preferableTarget = DNN_TARGET_CPU;
1428 }
1429 else if (preferableTarget == DNN_TARGET_OPENCL_FP16 && !ocl::Device::getDefault().isIntel())
1430 {
1431 CV_LOG_WARNING(NULL,
1432 "DNN: OpenCL target with fp16 precision is not supported "
1433 "with current OpenCL device (tested with Intel GPUs only), "
1434 "switching to OpenCL with fp32 precision.");
1435 preferableTarget = DNN_TARGET_OPENCL;
1436 }
1437 }
1438 }
1439 #endif
1440 if (preferableBackend == DNN_BACKEND_VKCOM && !haveVulkan())
1441 {
1442 preferableBackend = DNN_BACKEND_OPENCV;
1443 preferableTarget = DNN_TARGET_CPU;
1444 }
1445
1446 if (preferableBackend == DNN_BACKEND_CUDA && !haveCUDA())
1447 {
1448 #ifdef HAVE_CUDA
1449 CV_LOG_WARNING(NULL, "unable to use CUDA backend; switching to CPU");
1450 #else
1451 CV_LOG_WARNING(NULL, "DNN module was not built with CUDA backend; switching to CPU");
1452 #endif
1453 preferableBackend = DNN_BACKEND_OPENCV;
1454 preferableTarget = DNN_TARGET_CPU;
1455 }
1456
1457 clear();
1458
1459 this->blobsToKeep = blobsToKeep_;
1460
1461 allocateLayers(blobsToKeep_);
1462
1463 MapIdToLayerData::iterator it = layers.find(0);
1464 CV_Assert(it != layers.end());
1465 it->second.skip = netInputLayer->skip;
1466
1467 initBackend(blobsToKeep_);
1468
1469 if (!netWasAllocated)
1470 {
1471 #ifdef HAVE_HALIDE
1472 if (preferableBackend == DNN_BACKEND_HALIDE)
1473 compileHalide();
1474 #else
1475 CV_Assert(preferableBackend != DNN_BACKEND_HALIDE);
1476 #endif
1477 }
1478
1479 netWasAllocated = true;
1480
1481 if (dumpLevel)
1482 {
1483 dumpNetworkToFile();
1484 }
1485 }
1486 }
1487
getLayerIdcv::dnn::Net::Impl1488 int getLayerId(const String &layerName)
1489 {
1490 std::map<String, int>::iterator it = layerNameToId.find(layerName);
1491 return (it != layerNameToId.end()) ? it->second : -1;
1492 }
1493
getLayerIdcv::dnn::Net::Impl1494 int getLayerId(int id)
1495 {
1496 MapIdToLayerData::iterator it = layers.find(id);
1497 return (it != layers.end()) ? id : -1;
1498 }
1499
getLayerIdcv::dnn::Net::Impl1500 int getLayerId(DictValue &layerDesc)
1501 {
1502 if (layerDesc.isInt())
1503 return getLayerId(layerDesc.get<int>());
1504 else if (layerDesc.isString())
1505 return getLayerId(layerDesc.get<String>());
1506
1507 CV_Assert(layerDesc.isInt() || layerDesc.isString());
1508 return -1;
1509 }
1510
getLayerNamecv::dnn::Net::Impl1511 String getLayerName(int id)
1512 {
1513 MapIdToLayerData::iterator it = layers.find(id);
1514 return (it != layers.end()) ? it->second.name : "(unknown layer)";
1515 }
1516
getLayerDatacv::dnn::Net::Impl1517 LayerData& getLayerData(int id)
1518 {
1519 MapIdToLayerData::iterator it = layers.find(id);
1520
1521 if (it == layers.end())
1522 CV_Error(Error::StsObjectNotFound, format("Layer with requested id=%d not found", id));
1523
1524 return it->second;
1525 }
1526
getLayerDatacv::dnn::Net::Impl1527 LayerData& getLayerData(const String &layerName)
1528 {
1529 int id = getLayerId(layerName);
1530
1531 if (id < 0)
1532 CV_Error(Error::StsError, "Requested layer \"" + layerName + "\" not found");
1533
1534 return getLayerData(id);
1535 }
1536
getLayerDatacv::dnn::Net::Impl1537 LayerData& getLayerData(const DictValue &layerDesc)
1538 {
1539 CV_Assert(layerDesc.isInt() || layerDesc.isString());
1540 if (layerDesc.isInt())
1541 return getLayerData(layerDesc.get<int>());
1542 else /*if (layerDesc.isString())*/
1543 return getLayerData(layerDesc.get<String>());
1544 }
1545
addLayerInputcv::dnn::Net::Impl1546 static void addLayerInput(LayerData &ld, int inNum, LayerPin from)
1547 {
1548 if ((int)ld.inputBlobsId.size() <= inNum)
1549 {
1550 ld.inputBlobsId.resize(inNum + 1);
1551 }
1552 else
1553 {
1554 LayerPin storedFrom = ld.inputBlobsId[inNum];
1555 if (storedFrom.valid() && !storedFrom.equal(from))
1556 CV_Error(Error::StsError, format("Input #%d of layer \"%s\" already was connected",
1557 inNum, ld.name.c_str()));
1558 }
1559
1560 ld.inputBlobsId[inNum] = from;
1561 }
1562
resolvePinOutputNamecv::dnn::Net::Impl1563 int resolvePinOutputName(LayerData &ld, const String &outName)
1564 {
1565 if (outName.empty())
1566 return 0;
1567 return ld.getLayerInstance()->outputNameToIndex(outName);
1568 }
1569
getPinByAliascv::dnn::Net::Impl1570 LayerPin getPinByAlias(const String &layerName)
1571 {
1572 LayerPin pin;
1573 pin.lid = (layerName.empty()) ? 0 : getLayerId(layerName);
1574
1575 if (pin.lid >= 0)
1576 pin.oid = resolvePinOutputName(getLayerData(pin.lid), layerName);
1577
1578 return pin;
1579 }
1580
getLayerOutPinscv::dnn::Net::Impl1581 std::vector<LayerPin> getLayerOutPins(const String &layerName)
1582 {
1583 int lid = (layerName.empty()) ? 0 : getLayerId(layerName);
1584
1585 std::vector<LayerPin> pins;
1586
1587 for (int i = 0; i < layers[lid].outputBlobs.size(); i++)
1588 {
1589 pins.push_back(LayerPin(lid, i));
1590 }
1591
1592 return pins;
1593 }
1594
connectcv::dnn::Net::Impl1595 void connect(int outLayerId, int outNum, int inLayerId, int inNum)
1596 {
1597 CV_Assert(outLayerId < inLayerId);
1598 LayerData &ldOut = getLayerData(outLayerId);
1599 LayerData &ldInp = getLayerData(inLayerId);
1600
1601 addLayerInput(ldInp, inNum, LayerPin(outLayerId, outNum));
1602 ldOut.requiredOutputs.insert(outNum);
1603 ldOut.consumers.push_back(LayerPin(inLayerId, outNum));
1604 }
1605
initBackendcv::dnn::Net::Impl1606 void initBackend(const std::vector<LayerPin>& blobsToKeep_)
1607 {
1608 CV_TRACE_FUNCTION();
1609 if (preferableBackend == DNN_BACKEND_OPENCV)
1610 {
1611 CV_Assert(preferableTarget == DNN_TARGET_CPU || IS_DNN_OPENCL_TARGET(preferableTarget));
1612 }
1613 else if (preferableBackend == DNN_BACKEND_HALIDE)
1614 initHalideBackend();
1615 else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
1616 {
1617 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
1618 initInfEngineBackend(blobsToKeep_);
1619 #else
1620 CV_Assert(false && "This OpenCV version is built without Inference Engine NN Builder API support");
1621 #endif
1622 }
1623 else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
1624 {
1625 #ifdef HAVE_DNN_NGRAPH
1626 initNgraphBackend(blobsToKeep_);
1627 #else
1628 CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of Inference Engine + nGraph");
1629 #endif
1630 }
1631 else if (preferableBackend == DNN_BACKEND_VKCOM)
1632 initVkComBackend();
1633 else if (preferableBackend == DNN_BACKEND_CUDA)
1634 initCUDABackend(blobsToKeep_);
1635 else
1636 CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
1637 }
1638
initHalideBackendcv::dnn::Net::Impl1639 void initHalideBackend()
1640 {
1641 CV_TRACE_FUNCTION();
1642 CV_Assert_N(preferableBackend == DNN_BACKEND_HALIDE, haveHalide());
1643
1644 // Iterator to current layer.
1645 MapIdToLayerData::iterator it = layers.begin();
1646 // Iterator to base layer for fusion. In example, in case of conv+bn+relu
1647 // it'll be a conv layer.
1648 MapIdToLayerData::iterator baseIt = layers.begin();
1649 for (; it != layers.end(); it++)
1650 {
1651 LayerData &ldTop = it->second;
1652 Ptr<Layer> layerTop = ldTop.layerInstance;
1653 if (!layerTop->supportBackend(preferableBackend))
1654 {
1655 // Move base iterator to layer that don't support preferable
1656 // backend to prevent fusion over layer of different backend.
1657 baseIt = it;
1658 continue;
1659 }
1660 // Try to do layers fusion.
1661 LayerData &ldBot = baseIt->second;
1662 Ptr<Layer> layerBot = ldBot.layerInstance;
1663 // 1. Check that bottom and top from the same backends.
1664 if (it != layers.begin() && layerBot->supportBackend(preferableBackend))
1665 {
1666 // 2. Check that current layer works in-place.
1667 bool inPlace = ldTop.inputBlobs.size() == 1 &&
1668 ldBot.outputBlobs.size() == 1 &&
1669 ldTop.inputBlobs[0]->data ==
1670 ldBot.outputBlobs[0].data;
1671 if (inPlace)
1672 {
1673 // 3. Try to attach node.
1674 CV_Assert(!ldBot.backendNodes[preferableBackend].empty());
1675 Ptr<BackendNode> fusedNode =
1676 layerTop->tryAttach(ldBot.backendNodes[preferableBackend]);
1677 if (!fusedNode.empty())
1678 {
1679 ldTop.skip = true;
1680 ldBot.backendNodes[preferableBackend] = fusedNode;
1681 ldBot.outputBlobsWrappers = ldTop.outputBlobsWrappers;
1682 continue;
1683 }
1684 }
1685 }
1686 // No layers fusion.
1687 ldTop.skip = false;
1688 ldTop.backendNodes[DNN_BACKEND_HALIDE] =
1689 layerTop->initHalide(ldTop.inputBlobsWrappers);
1690 baseIt = it;
1691 }
1692 }
1693
1694 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
1695 // Before launching Inference Engine graph we need to specify output blobs.
1696 // This function requests output blobs based on inputs references of
1697 // layers from default backend or layers from different graphs.
addInfEngineNetOutputscv::dnn::Net::Impl1698 void addInfEngineNetOutputs(LayerData &ld)
1699 {
1700 CV_TRACE_FUNCTION();
1701 Ptr<InfEngineBackendNet> layerNet;
1702 if (ld.backendNodes.find(preferableBackend) != ld.backendNodes.end())
1703 {
1704 Ptr<BackendNode> node = ld.backendNodes[preferableBackend];
1705 if (!node.empty())
1706 {
1707 Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
1708 CV_Assert(!ieNode.empty()); CV_Assert(!ieNode->net.empty());
1709 layerNet = ieNode->net;
1710 }
1711 }
1712 // For an every input reference we check that it belongs to one of
1713 // the Inference Engine backend graphs. Request an output blob if it is.
1714 // Do nothing if layer's input is from the same graph.
1715 for (int i = 0; i < ld.inputBlobsId.size(); ++i)
1716 {
1717 LayerData &inpLd = layers[ld.inputBlobsId[i].lid];
1718 Ptr<BackendNode> inpNode = inpLd.backendNodes[preferableBackend];
1719 if (!inpNode.empty())
1720 {
1721 Ptr<InfEngineBackendNode> ieInpNode = inpNode.dynamicCast<InfEngineBackendNode>();
1722 CV_Assert(!ieInpNode.empty()); CV_Assert(!ieInpNode->net.empty());
1723 if (layerNet != ieInpNode->net)
1724 {
1725 // layerNet is empty or nodes are from different graphs.
1726 ieInpNode->net->addOutput(ieInpNode->layer.getName());
1727 }
1728 }
1729 }
1730 }
1731
initInfEngineBackendcv::dnn::Net::Impl1732 void initInfEngineBackend(const std::vector<LayerPin>& blobsToKeep_)
1733 {
1734 CV_TRACE_FUNCTION();
1735 CV_Assert_N(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, haveInfEngine());
1736 MapIdToLayerData::iterator it;
1737 Ptr<InfEngineBackendNet> net;
1738
1739 for (it = layers.begin(); it != layers.end(); ++it)
1740 {
1741 LayerData &ld = it->second;
1742 if (ld.id == 0)
1743 {
1744 CV_Assert((netInputLayer->outNames.empty() && ld.outputBlobsWrappers.size() == 1) ||
1745 (netInputLayer->outNames.size() == ld.outputBlobsWrappers.size()));
1746 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
1747 {
1748 InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]);
1749 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2019010000)
1750 dataPtr->name = netInputLayer->outNames.empty() ? ld.name : netInputLayer->outNames[i];
1751 #else
1752 dataPtr->setName(netInputLayer->outNames.empty() ? ld.name : netInputLayer->outNames[i]);
1753 #endif
1754 }
1755 }
1756 else
1757 {
1758 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
1759 {
1760 InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]);
1761 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2019010000)
1762 dataPtr->name = ld.name;
1763 #else
1764 dataPtr->setName(ld.name);
1765 #endif
1766 }
1767 }
1768 }
1769
1770 if (skipInfEngineInit)
1771 {
1772 Ptr<BackendNode> node = layers[lastLayerId].backendNodes[preferableBackend];
1773 CV_Assert(!node.empty());
1774
1775 Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
1776 CV_Assert(!ieNode.empty());
1777 ieNode->net->reset();
1778
1779 for (it = layers.begin(); it != layers.end(); ++it)
1780 {
1781 LayerData &ld = it->second;
1782 if (ld.id == 0)
1783 {
1784 for (int i = 0; i < ld.inputBlobsWrappers.size(); ++i)
1785 {
1786 InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.inputBlobsWrappers[i]);
1787 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2019010000)
1788 dataPtr->name = netInputLayer->outNames[i];
1789 #else
1790 dataPtr->setName(netInputLayer->outNames[i]);
1791 #endif
1792 }
1793 }
1794 else
1795 {
1796 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
1797 {
1798 InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]);
1799 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2019010000)
1800 dataPtr->name = ld.name;
1801 #else
1802 dataPtr->setName(ld.name);
1803 #endif
1804 }
1805 }
1806 ieNode->net->addBlobs(ld.inputBlobsWrappers);
1807 ieNode->net->addBlobs(ld.outputBlobsWrappers);
1808 ld.skip = true;
1809 }
1810 layers[lastLayerId].skip = false;
1811 ieNode->net->init((Target)preferableTarget);
1812 return;
1813 }
1814
1815 // Build Inference Engine networks from sets of layers that support this
1816 // backend. Split a whole model on several Inference Engine networks if
1817 // some of layers are not implemented.
1818
1819 bool supportsCPUFallback = preferableTarget == DNN_TARGET_CPU ||
1820 BackendRegistry::checkIETarget(DNN_TARGET_CPU);
1821
1822 // Set of all input and output blobs wrappers for current network.
1823 std::map<LayerPin, Ptr<BackendWrapper> > netBlobsWrappers;
1824 for (it = layers.begin(); it != layers.end(); ++it)
1825 {
1826 LayerData &ld = it->second;
1827 if (ld.id == 0 && ld.skip)
1828 continue;
1829 bool fused = ld.skip;
1830
1831 Ptr<Layer> layer = ld.layerInstance;
1832 if (!fused && !layer->supportBackend(preferableBackend))
1833 {
1834 bool customizable = ld.id != 0 &&
1835 INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R2) &&
1836 supportsCPUFallback;
1837 // TODO: there is a bug in Myriad plugin with custom layers shape infer.
1838 if (preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL)
1839 {
1840 for (int i = 0; customizable && i < ld.inputBlobs.size(); ++i)
1841 {
1842 customizable = ld.inputBlobs[i]->size[0] == 1;
1843 }
1844 }
1845
1846 // TODO: fix these workarounds
1847 if (preferableTarget == DNN_TARGET_MYRIAD ||
1848 preferableTarget == DNN_TARGET_HDDL ||
1849 preferableTarget == DNN_TARGET_OPENCL ||
1850 preferableTarget == DNN_TARGET_OPENCL_FP16)
1851 customizable &= ld.type != "Concat";
1852
1853 if (preferableTarget == DNN_TARGET_OPENCL ||
1854 preferableTarget == DNN_TARGET_OPENCL_FP16)
1855 customizable &= ld.type != "Power";
1856
1857 if (preferableTarget == DNN_TARGET_OPENCL)
1858 customizable &= ld.type != "Eltwise";
1859
1860 if (!customizable)
1861 {
1862 addInfEngineNetOutputs(ld);
1863 net = Ptr<InfEngineBackendNet>();
1864 netBlobsWrappers.clear(); // Is not used for R5 release but we don't wrap it to #ifdef.
1865 layer->preferableTarget = DNN_TARGET_CPU;
1866 continue;
1867 }
1868 }
1869 ld.skip = true; // Initially skip all Inference Engine supported layers.
1870
1871 // Create a new network if one of inputs from different Inference Engine graph.
1872 for (int i = 0; i < ld.inputBlobsId.size(); ++i)
1873 {
1874 LayerData &inpLd = layers[ld.inputBlobsId[i].lid];
1875 Ptr<BackendNode> inpNode = inpLd.backendNodes[preferableBackend];
1876 if (!inpNode.empty())
1877 {
1878 Ptr<InfEngineBackendNode> ieInpNode = inpNode.dynamicCast<InfEngineBackendNode>();
1879 CV_Assert(!ieInpNode.empty()); CV_Assert(!ieInpNode->net.empty());
1880 if (ieInpNode->net != net)
1881 {
1882 net = Ptr<InfEngineBackendNet>();
1883 netBlobsWrappers.clear(); // Is not used for R5 release but we don't wrap it to #ifdef.
1884 break;
1885 }
1886 }
1887 }
1888
1889 Ptr<BackendNode> node;
1890 if (!net.empty())
1891 {
1892 if (fused)
1893 {
1894 bool inPlace = ld.inputBlobsId.size() == 1 && ld.outputBlobs.size() == 1 &&
1895 ld.inputBlobs[0]->data == ld.outputBlobs[0].data;
1896 CV_Assert(inPlace);
1897 node = layers[ld.inputBlobsId[0].lid].backendNodes[preferableBackend];
1898 ld.inputBlobsWrappers = layers[ld.inputBlobsId[0].lid].inputBlobsWrappers;
1899 }
1900 }
1901 else
1902 net = Ptr<InfEngineBackendNet>(new InfEngineBackendNet());
1903
1904 if (!fused)
1905 {
1906 if (layer->supportBackend(preferableBackend))
1907 node = layer->initInfEngine(ld.inputBlobsWrappers);
1908 else
1909 {
1910 node = Ptr<BackendNode>(new InfEngineBackendNode(
1911 ld.layerInstance, ld.inputBlobs, ld.outputBlobs, ld.internals));
1912 }
1913 }
1914 else if (node.empty())
1915 continue;
1916
1917 CV_Assert(!node.empty());
1918 ld.backendNodes[preferableBackend] = node;
1919
1920 Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
1921 CV_Assert(!ieNode.empty());
1922 ieNode->net = net;
1923
1924 for (const auto& pin : blobsToKeep_)
1925 {
1926 if (pin.lid == ld.id)
1927 {
1928 ieNode->net->addOutput(ieNode->layer.getName());
1929 break;
1930 }
1931 }
1932
1933 // Convert weights in FP16 for specific targets.
1934 if ((preferableTarget == DNN_TARGET_OPENCL_FP16 ||
1935 preferableTarget == DNN_TARGET_MYRIAD ||
1936 preferableTarget == DNN_TARGET_HDDL ||
1937 preferableTarget == DNN_TARGET_FPGA) && !fused)
1938 {
1939 #if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1)
1940 for (const std::string& name : {"weights", "biases"})
1941 {
1942 auto it = ieNode->layer.getParameters().find(name);
1943 if (it != ieNode->layer.getParameters().end())
1944 {
1945 InferenceEngine::Blob::Ptr bp = it->second.as<InferenceEngine::Blob::Ptr>();
1946 it->second = convertFp16(std::const_pointer_cast<InferenceEngine::Blob>(bp));
1947 }
1948 }
1949 #else
1950 auto& blobs = ieNode->layer.getConstantData();
1951 if (blobs.empty())
1952 {
1953 // In case of non weightable layer we have to specify
1954 // it's precision adding dummy blob.
1955 auto blob = InferenceEngine::make_shared_blob<int16_t>(
1956 InferenceEngine::Precision::FP16,
1957 InferenceEngine::Layout::C, {1});
1958 blob->allocate();
1959 blobs[""] = blob;
1960 }
1961 else
1962 {
1963 for (auto& it : blobs)
1964 it.second = convertFp16(std::const_pointer_cast<InferenceEngine::Blob>(it.second));
1965 }
1966 #endif
1967 }
1968
1969 if (!fused)
1970 net->addLayer(ieNode->layer);
1971
1972 net->connect(ld.inputBlobsWrappers, ld.outputBlobsWrappers, ieNode->layer.getName());
1973 net->addBlobs(ld.inputBlobsWrappers);
1974 net->addBlobs(ld.outputBlobsWrappers);
1975 addInfEngineNetOutputs(ld);
1976 }
1977
1978 // Initialize all networks.
1979 for (MapIdToLayerData::reverse_iterator it = layers.rbegin(); it != layers.rend(); ++it)
1980 {
1981 LayerData &ld = it->second;
1982 if (ld.backendNodes.find(preferableBackend) == ld.backendNodes.end())
1983 continue;
1984
1985 Ptr<BackendNode> node = ld.backendNodes[preferableBackend];
1986 if (node.empty())
1987 continue;
1988
1989 Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
1990 if (ieNode.empty())
1991 continue;
1992
1993 CV_Assert(!ieNode->net.empty());
1994
1995 if (!ieNode->net->isInitialized())
1996 {
1997 ieNode->net->init((Target)preferableTarget);
1998 ld.skip = false;
1999 }
2000 }
2001 }
2002 #endif // HAVE_DNN_IE_NN_BUILDER_2019
2003
2004
2005 #ifdef HAVE_DNN_NGRAPH
addNgraphOutputscv::dnn::Net::Impl2006 void addNgraphOutputs(LayerData &ld)
2007 {
2008 CV_TRACE_FUNCTION();
2009
2010 Ptr<InfEngineNgraphNet> layerNet;
2011 auto it = ld.backendNodes.find(preferableBackend);
2012 if (it != ld.backendNodes.end())
2013 {
2014 Ptr<BackendNode> node = it->second;
2015 if (!node.empty())
2016 {
2017 Ptr<InfEngineNgraphNode> ieNode = node.dynamicCast<InfEngineNgraphNode>();
2018 CV_Assert(!ieNode.empty()); CV_Assert(!ieNode->net.empty());
2019 layerNet = ieNode->net;
2020 }
2021 }
2022
2023 for (int i = 0; i < ld.inputBlobsId.size(); ++i)
2024 {
2025 LayerData &inpLd = layers[ld.inputBlobsId[i].lid];
2026 Ptr<BackendNode> inpNode = inpLd.backendNodes[preferableBackend];
2027 if (!inpNode.empty())
2028 {
2029 Ptr<InfEngineNgraphNode> ieInpNode = inpNode.dynamicCast<InfEngineNgraphNode>();
2030 CV_Assert(!ieInpNode.empty()); CV_Assert(!ieInpNode->net.empty());
2031 if (layerNet != ieInpNode->net)
2032 {
2033 ieInpNode->net->addOutput(ieInpNode->node->get_friendly_name());
2034 ieInpNode->net->setUnconnectedNodes(ieInpNode);
2035 }
2036 }
2037 }
2038 }
2039
initNgraphBackendcv::dnn::Net::Impl2040 void initNgraphBackend(const std::vector<LayerPin>& blobsToKeep_)
2041 {
2042 CV_TRACE_FUNCTION();
2043 CV_Assert_N(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, haveInfEngine());
2044
2045 MapIdToLayerData::iterator it;
2046 Ptr<InfEngineNgraphNet> net;
2047
2048 for (it = layers.begin(); it != layers.end(); ++it)
2049 {
2050 LayerData &ld = it->second;
2051 if (ld.id == 0)
2052 {
2053 CV_Assert((netInputLayer->outNames.empty() && ld.outputBlobsWrappers.size() == 1) ||
2054 (netInputLayer->outNames.size() == ld.outputBlobsWrappers.size()));
2055 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
2056 {
2057 InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]);
2058 std::string outputName = netInputLayer->outNames.empty() ? ld.name : netInputLayer->outNames[i];
2059 outputName = ld.outputBlobsWrappers.size() > 1 ? (outputName + "." + std::to_string(i)) : outputName;
2060 dataPtr->setName(outputName);
2061 }
2062 }
2063 else
2064 {
2065 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
2066 {
2067 InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]);
2068 std::string outputName = ld.outputBlobsWrappers.size() > 1 ? (ld.name + "." + std::to_string(i)) : ld.name;
2069 dataPtr->setName(outputName);
2070 }
2071 }
2072 }
2073
2074 if (skipInfEngineInit)
2075 {
2076 Ptr<BackendNode> node = layers[lastLayerId].backendNodes[preferableBackend];
2077 CV_Assert(!node.empty());
2078
2079 Ptr<InfEngineNgraphNode> ieNode = node.dynamicCast<InfEngineNgraphNode>();
2080 CV_Assert(!ieNode.empty());
2081 ieNode->net->reset();
2082
2083 for (it = layers.begin(); it != layers.end(); ++it)
2084 {
2085 LayerData &ld = it->second;
2086 if (ld.id == 0)
2087 {
2088 for (int i = 0; i < ld.inputBlobsWrappers.size(); ++i)
2089 {
2090 InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.inputBlobsWrappers[i]);
2091 dataPtr->setName(netInputLayer->outNames[i]);
2092 }
2093 }
2094 else
2095 {
2096 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
2097 {
2098 InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]);
2099 dataPtr->setName(ld.name);
2100 }
2101 }
2102 ieNode->net->addBlobs(ld.inputBlobsWrappers);
2103 ieNode->net->addBlobs(ld.outputBlobsWrappers);
2104 ld.skip = true;
2105 }
2106 layers[lastLayerId].skip = false;
2107 ieNode->net->init((Target)preferableTarget);
2108 return;
2109 }
2110
2111 bool supportsCPUFallback = !isArmComputePlugin() && (preferableTarget == DNN_TARGET_CPU ||
2112 BackendRegistry::checkIETarget(DNN_TARGET_CPU));
2113
2114 // Build Inference Engine networks from sets of layers that support this
2115 // backend. Split a whole model on several Inference Engine networks if
2116 // some of layers are not implemented.
2117 for (it = layers.begin(); it != layers.end(); ++it)
2118 {
2119 LayerData &ld = it->second;
2120
2121 if (ld.id == 0 && ld.skip)
2122 continue;
2123
2124 bool fused = ld.skip;
2125 Ptr<Layer> layer = ld.layerInstance;
2126 if (!fused && !layer->supportBackend(preferableBackend))
2127 {
2128 bool customizable = ld.id != 0 && supportsCPUFallback;
2129
2130 // TODO: there is a bug in Myriad plugin with custom layers shape infer.
2131 if (preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL)
2132 {
2133 for (int i = 0; customizable && i < ld.inputBlobs.size(); ++i)
2134 {
2135 customizable = ld.inputBlobs[i]->size[0] == 1;
2136 }
2137 }
2138
2139 // TODO: fix these workarounds
2140 if (preferableTarget == DNN_TARGET_MYRIAD ||
2141 preferableTarget == DNN_TARGET_HDDL ||
2142 preferableTarget == DNN_TARGET_OPENCL ||
2143 preferableTarget == DNN_TARGET_OPENCL_FP16)
2144 customizable &= ld.type != "Concat";
2145
2146 if (preferableTarget == DNN_TARGET_OPENCL ||
2147 preferableTarget == DNN_TARGET_OPENCL_FP16)
2148 customizable &= ld.type != "Power";
2149
2150 if (preferableTarget == DNN_TARGET_OPENCL)
2151 customizable &= ld.type != "Eltwise";
2152
2153 if (!customizable)
2154 {
2155 addNgraphOutputs(ld);
2156 net = Ptr<InfEngineNgraphNet>();
2157 layer->preferableTarget = DNN_TARGET_CPU;
2158
2159 for (int i = 0; i < ld.inputBlobsId.size(); ++i)
2160 {
2161 LayerData &inpLd = layers[ld.inputBlobsId[i].lid];
2162 Ptr<BackendNode> inpNode = inpLd.backendNodes[preferableBackend];
2163 if (!inpNode.empty()) {
2164 Ptr<InfEngineNgraphNode> ieNode = inpNode.dynamicCast<InfEngineNgraphNode>();
2165 CV_Assert(!ieNode.empty());
2166 ieNode->net->setUnconnectedNodes(ieNode);
2167 }
2168 }
2169 continue;
2170 }
2171 }
2172 ld.skip = true; // Initially skip all Inference Engine supported layers.
2173
2174 // Create a new network if one of inputs from different Inference Engine graph.
2175 std::vector<Ptr<BackendNode>> inputNodes;
2176 for (int i = 0; i < ld.inputBlobsId.size(); ++i)
2177 {
2178 // Layer_Test_ROIPooling.Accuracy has 2 inputs inpLD = 0, 0 -> has 4 inputNodes (input, rois, input, rois)
2179 if (inputNodes.size() == ld.inputBlobsId.size()) {
2180 break;
2181 }
2182 LayerData &inpLd = layers[ld.inputBlobsId[i].lid];
2183 Ptr<BackendNode> inpNode = inpLd.backendNodes[preferableBackend];
2184 if (!inpNode.empty())
2185 {
2186 Ptr<InfEngineNgraphNode> ieInpNode = inpNode.dynamicCast<InfEngineNgraphNode>();
2187 CV_Assert(!ieInpNode.empty()); CV_Assert(!ieInpNode->net.empty());
2188 if (ieInpNode->net == net && !fused) {
2189 inputNodes.push_back(inpNode);
2190 continue;
2191 }
2192 }
2193
2194 if (net.empty()) {
2195 net = Ptr<InfEngineNgraphNet>(new InfEngineNgraphNet(*this));
2196 }
2197
2198 if (!fused) {
2199 std::vector<std::string> inputNames;
2200 std::vector<cv::Mat> inputs;
2201
2202 auto curr_pos = inpLd.consumers.begin();
2203 auto compare = [&ld] (const LayerPin& lp) { return lp.lid == ld.id; };
2204 auto cons = curr_pos;
2205 while ((cons = std::find_if(curr_pos, inpLd.consumers.end(), compare)) !=
2206 inpLd.consumers.end()) {
2207 int cons_inp = cons->oid;
2208 Ptr<NgraphBackendWrapper> inpWrapper = inpLd.outputBlobsWrappers[cons_inp].
2209 dynamicCast<NgraphBackendWrapper>();
2210 CV_Assert(!inpWrapper.empty());
2211 auto iter = std::find(inputNames.begin(), inputNames.end(),
2212 inpWrapper->dataPtr->getName());
2213 if (iter == inputNames.end()) {
2214 inputNames.push_back(inpWrapper->dataPtr->getName());
2215 inputs.push_back(inpLd.outputBlobs[cons_inp]);
2216 }
2217 curr_pos = cons + 1;
2218 }
2219
2220 auto inps = net->setInputs(inputs, inputNames);
2221 for (auto& inp : inps) {
2222 inputNodes.emplace_back(Ptr<BackendNode>(new InfEngineNgraphNode(inp)));
2223 }
2224 }
2225 }
2226
2227 Ptr<BackendNode> node;
2228 if (!net.empty())
2229 {
2230 if (fused)
2231 {
2232 bool inPlace = ld.inputBlobsId.size() == 1 && ld.outputBlobs.size() == 1 &&
2233 ld.inputBlobs[0]->data == ld.outputBlobs[0].data;
2234 CV_Assert(inPlace);
2235 node = layers[ld.inputBlobsId[0].lid].backendNodes[preferableBackend];
2236 ld.inputBlobsWrappers = layers[ld.inputBlobsId[0].lid].inputBlobsWrappers;
2237 }
2238 }
2239 else {
2240 net = Ptr<InfEngineNgraphNet>(new InfEngineNgraphNet(*this));
2241 }
2242
2243 if (!fused)
2244 {
2245 CV_Assert(ld.inputBlobsId.size() == inputNodes.size());
2246 for (int i = 0; i < ld.inputBlobsId.size(); ++i)
2247 {
2248 int lid = ld.inputBlobsId[i].lid;
2249 int oid = ld.inputBlobsId[i].oid;
2250 if (oid == 0 || lid == 0)
2251 continue;
2252
2253 auto ieInpNode = inputNodes[i].dynamicCast<InfEngineNgraphNode>();
2254 CV_Assert(oid < ieInpNode->node->get_output_size());
2255 #if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
2256 inputNodes[i] = Ptr<BackendNode>(new InfEngineNgraphNode(ieInpNode->node));
2257 #elif INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_3)
2258 inputNodes[i] = Ptr<BackendNode>(new InfEngineNgraphNode(ieInpNode->node->get_output_as_single_output_node(oid)));
2259 #else
2260 inputNodes[i] = Ptr<BackendNode>(new InfEngineNgraphNode(ieInpNode->node->get_output_as_single_output_node(oid, false)));
2261 #endif
2262 }
2263
2264 if (layer->supportBackend(preferableBackend))
2265 {
2266 node = layer->initNgraph(ld.inputBlobsWrappers, inputNodes);
2267 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
2268 {
2269 InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]);
2270 node.dynamicCast<InfEngineNgraphNode>()->setName(dataPtr->getName());
2271 }
2272 }
2273 else
2274 {
2275 node = Ptr<BackendNode>(new InfEngineNgraphNode(inputNodes,
2276 ld.layerInstance, ld.inputBlobs, ld.outputBlobs, ld.internals));
2277 }
2278 }
2279 else if (node.empty())
2280 continue;
2281
2282 ld.backendNodes[preferableBackend] = node;
2283
2284 Ptr<InfEngineNgraphNode> ieNode = node.dynamicCast<InfEngineNgraphNode>();
2285 CV_Assert(!ieNode.empty());
2286 ieNode->net = net;
2287
2288 if (ld.consumers.empty()) {
2289 // TF EAST_text_detection
2290 ieNode->net->setUnconnectedNodes(ieNode);
2291 }
2292 for (const auto& pin : blobsToKeep_)
2293 {
2294 if (pin.lid == ld.id)
2295 {
2296 ieNode->net->addOutput(ieNode->node->get_friendly_name());
2297 break;
2298 }
2299 }
2300 ieNode->net->setNodePtr(&ieNode->node);
2301
2302 net->addBlobs(ld.inputBlobsWrappers);
2303 net->addBlobs(ld.outputBlobsWrappers);
2304 addNgraphOutputs(ld);
2305 }
2306
2307 // Initialize all networks.
2308 for (MapIdToLayerData::reverse_iterator it = layers.rbegin(); it != layers.rend(); ++it)
2309 {
2310 LayerData &ld = it->second;
2311 auto iter = ld.backendNodes.find(preferableBackend);
2312 if (iter == ld.backendNodes.end())
2313 continue;
2314
2315 Ptr<BackendNode>& node = iter->second;
2316 if (node.empty())
2317 continue;
2318
2319 Ptr<InfEngineNgraphNode> ieNode = node.dynamicCast<InfEngineNgraphNode>();
2320 if (ieNode.empty())
2321 continue;
2322
2323 CV_Assert(!ieNode->net.empty());
2324
2325 if (!ieNode->net->isInitialized())
2326 {
2327 ieNode->net->setUnconnectedNodes(ieNode);
2328 ieNode->net->createNet((Target)preferableTarget);
2329 ld.skip = false;
2330 }
2331 }
2332 }
2333 #endif // HAVE_DNN_NGRAPH
2334
initVkComBackendcv::dnn::Net::Impl2335 void initVkComBackend()
2336 {
2337 CV_TRACE_FUNCTION();
2338 CV_Assert(preferableBackend == DNN_BACKEND_VKCOM);
2339 #ifdef HAVE_VULKAN
2340 if (!haveVulkan())
2341 return;
2342
2343 MapIdToLayerData::iterator it = layers.begin();
2344 for (; it != layers.end(); it++)
2345 {
2346 LayerData &ld = it->second;
2347 Ptr<Layer> layer = ld.layerInstance;
2348 if (!layer->supportBackend(preferableBackend))
2349 {
2350 continue;
2351 }
2352
2353 ld.skip = false;
2354
2355 try
2356 {
2357 ld.backendNodes[DNN_BACKEND_VKCOM] =
2358 layer->initVkCom(ld.inputBlobsWrappers);
2359 }
2360 catch (const cv::Exception& e)
2361 {
2362 CV_LOG_ERROR(NULL, "initVkCom failed, fallback to CPU implementation. " << e.what());
2363 ld.backendNodes[DNN_BACKEND_VKCOM] = Ptr<BackendNode>();
2364 }
2365 }
2366 #endif
2367 }
2368
initCUDABackendcv::dnn::Net::Impl2369 void initCUDABackend(const std::vector<LayerPin>& blobsToKeep_)
2370 {
2371 CV_Assert(haveCUDA());
2372 CV_Assert(preferableBackend == DNN_BACKEND_CUDA);
2373
2374 #ifdef HAVE_CUDA
2375 if (!cudaInfo) /* we need to check only once */
2376 cuda4dnn::checkVersions();
2377
2378 if (cuda4dnn::getDeviceCount() <= 0)
2379 CV_Error(Error::StsError, "No CUDA capable device found.");
2380
2381 if (cuda4dnn::getDevice() < 0)
2382 CV_Error(Error::StsError, "No CUDA capable device selected.");
2383
2384 if (!cuda4dnn::isDeviceCompatible())
2385 CV_Error(Error::GpuNotSupported, "OpenCV was not built to work with the selected device. Please check CUDA_ARCH_PTX or CUDA_ARCH_BIN in your build configuration.");
2386
2387 if (preferableTarget == DNN_TARGET_CUDA_FP16 && !cuda4dnn::doesDeviceSupportFP16())
2388 {
2389 CV_LOG_WARNING(NULL, "The selected CUDA device does not support FP16 target; switching to FP32 target.");
2390 preferableTarget = DNN_TARGET_CUDA;
2391 }
2392
2393 if (!cudaInfo)
2394 {
2395 cuda4dnn::csl::CSLContext context;
2396 context.stream = cuda4dnn::csl::Stream(true);
2397 context.cublas_handle = cuda4dnn::csl::cublas::Handle(context.stream);
2398 context.cudnn_handle = cuda4dnn::csl::cudnn::Handle(context.stream);
2399
2400 auto d2h_stream = cuda4dnn::csl::Stream(true); // stream for background D2H data transfers
2401 cudaInfo = std::unique_ptr<CudaInfo_t>(new CudaInfo_t(std::move(context), std::move(d2h_stream)));
2402 }
2403
2404 cudaInfo->workspace = cuda4dnn::csl::Workspace(); // release workspace memory if any
2405
2406 for (auto& layer : layers)
2407 {
2408 auto& ld = layer.second;
2409 if (ld.id == 0)
2410 {
2411 for (auto& wrapper : ld.inputBlobsWrappers)
2412 {
2413 auto cudaWrapper = wrapper.dynamicCast<CUDABackendWrapper>();
2414 cudaWrapper->setStream(cudaInfo->context.stream, cudaInfo->d2h_stream);
2415 }
2416 }
2417
2418 for (auto& wrapper : ld.outputBlobsWrappers)
2419 {
2420 auto cudaWrapper = wrapper.dynamicCast<CUDABackendWrapper>();
2421 cudaWrapper->setStream(cudaInfo->context.stream, cudaInfo->d2h_stream);
2422 }
2423 }
2424
2425 for (auto& layer : layers)
2426 {
2427 auto& ld = layer.second;
2428 auto& layerInstance = ld.layerInstance;
2429
2430 if (!layerInstance->supportBackend(DNN_BACKEND_CUDA))
2431 {
2432 std::ostringstream os;
2433 os << "CUDA backend will fallback to the CPU implementation for the layer \"" << ld.name
2434 << "\" of type " << ld.type << '\n';
2435 CV_LOG_INFO(NULL, os.str().c_str());
2436 continue;
2437 }
2438
2439 /* we make a copy so that `initCUDA` doesn't modify `cudaInfo->context` */
2440 auto context = cudaInfo->context;
2441 auto node = layerInstance->initCUDA(&context, ld.inputBlobsWrappers, ld.outputBlobsWrappers);
2442 ld.backendNodes[DNN_BACKEND_CUDA] = node;
2443
2444 auto cudaNode = node.dynamicCast<CUDABackendNode>();
2445 cudaInfo->workspace.require(cudaNode->get_workspace_memory_in_bytes());
2446 }
2447
2448 if (blobsToKeep_.size() > 1)
2449 {
2450 for (const auto& pin : blobsToKeep_)
2451 {
2452 LayerData& ld = layers[pin.lid];
2453 ld.cudaD2HBackgroundTransfers.push_back(pin.oid);
2454 }
2455 }
2456 #endif
2457 }
2458
allocateLayercv::dnn::Net::Impl2459 void allocateLayer(int lid, const LayersShapesMap& layersShapes)
2460 {
2461 CV_TRACE_FUNCTION();
2462
2463 LayerData &ld = layers[lid];
2464
2465 //already allocated
2466 if (ld.flag)
2467 return;
2468
2469 size_t ninputs = ld.inputBlobsId.size();
2470 #if 0
2471 printf("layer %s:", ld.name.c_str());
2472 for (size_t i = 0; i < ninputs; i++)
2473 {
2474 int inp_lid = ld.inputBlobsId[i].lid;
2475 LayerData &inp_ld = layers[inp_lid];
2476 int inp_outputs = (int)inp_ld.outputBlobs.size();
2477 std::cout << " " << inp_ld.name << "(" << inp_outputs;
2478
2479 for( int j = 0; j < inp_outputs; j++ )
2480 {
2481 std::cout << (j == 0 ? ": " : ", ") << inp_ld.outputBlobs[j].size;
2482 }
2483 std::cout << ")";
2484 }
2485 printf("\n");
2486 #endif
2487
2488 //determine parent layers
2489 for (size_t i = 0; i < ninputs; i++)
2490 ld.inputLayersId.insert(ld.inputBlobsId[i].lid);
2491
2492 //allocate parents
2493 for (set<int>::iterator i = ld.inputLayersId.begin(); i != ld.inputLayersId.end(); i++)
2494 allocateLayer(*i, layersShapes);
2495
2496 //bind inputs
2497 if (ld.id == 0) // DataLayer
2498 {
2499 ninputs = netInputLayer->inputsData.size();
2500 ld.inputBlobsWrappers.resize(ninputs);
2501 for (size_t i = 0; i < ninputs; i++)
2502 ld.inputBlobsWrappers[i] = wrap(netInputLayer->inputsData[i]);
2503 }
2504 else
2505 {
2506 ld.inputBlobs.resize(ninputs);
2507 ld.inputBlobsWrappers.resize(ninputs);
2508 for (size_t i = 0; i < ninputs; i++)
2509 {
2510 LayerPin from = ld.inputBlobsId[i];
2511 CV_Assert(from.valid());
2512 CV_DbgAssert(layers.count(from.lid) && (int)layers[from.lid].outputBlobs.size() > from.oid);
2513 ld.inputBlobs[i] = &layers[from.lid].outputBlobs[from.oid];
2514 ld.inputBlobsWrappers[i] = layers[from.lid].outputBlobsWrappers[from.oid];
2515 }
2516 }
2517
2518 LayersShapesMap::const_iterator layerShapesIt = layersShapes.find(lid);
2519
2520 CV_Assert(layerShapesIt != layersShapes.end());
2521
2522 std::vector<LayerPin> pinsForInternalBlobs;
2523 blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs,
2524 preferableBackend == DNN_BACKEND_OPENCV &&
2525 preferableTarget == DNN_TARGET_OPENCL_FP16);
2526 ld.outputBlobsWrappers.resize(ld.outputBlobs.size());
2527 for (int i = 0; i < ld.outputBlobs.size(); ++i)
2528 ld.outputBlobsWrappers[i] = wrap(ld.outputBlobs[i]);
2529
2530 /* CUDA backend has its own system for internal blobs; we don't need these */
2531 ld.internalBlobsWrappers.resize((preferableBackend == DNN_BACKEND_CUDA) ? 0 : ld.internals.size());
2532 for (int i = 0; i < ld.internalBlobsWrappers.size(); ++i)
2533 ld.internalBlobsWrappers[i] = wrap(ld.internals[i]);
2534
2535 Ptr<Layer> layerPtr = ld.getLayerInstance();
2536 {
2537 std::vector<Mat> inps(ld.inputBlobs.size());
2538 for (int i = 0; i < ld.inputBlobs.size(); ++i)
2539 {
2540 inps[i] = *ld.inputBlobs[i];
2541 }
2542 layerPtr->finalize(inps, ld.outputBlobs);
2543 layerPtr->preferableTarget = preferableTarget;
2544 #if 0
2545 std::cout << "\toutputs:";
2546 size_t noutputs = ld.outputBlobs.size();
2547 for (size_t j = 0; j < noutputs; j++)
2548 {
2549 std::cout << (j == 0 ? " " : ", ") << ld.outputBlobs[j].size;
2550 }
2551 std::cout << "\n";
2552 #endif
2553 }
2554
2555 // After allocation of layer, we decrease counters to it's input blobs.
2556 blobManager.releaseReferences(ld.inputBlobsId);
2557 blobManager.releaseReferences(pinsForInternalBlobs);
2558
2559 ld.flag = 1;
2560 }
2561
2562 #if 0
2563 #define printf_(args) printf args
2564 #else
2565 #define printf_(args)
2566 #endif
2567
fuseLayerscv::dnn::Net::Impl2568 void fuseLayers(const std::vector<LayerPin>& blobsToKeep_)
2569 {
2570 CV_TRACE_FUNCTION();
2571
2572 if(!fusion || (preferableBackend != DNN_BACKEND_OPENCV &&
2573 preferableBackend != DNN_BACKEND_CUDA &&
2574 preferableBackend != DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 &&
2575 preferableBackend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH))
2576 return;
2577
2578 // scan through all the layers. If there is convolution layer followed by the activation layer,
2579 // we try to embed this activation into the convolution and disable separate execution of the activation
2580 std::set<LayerPin> pinsToKeep(blobsToKeep_.begin(),
2581 blobsToKeep_.end());
2582 MapIdToLayerData::iterator it;
2583 for (it = layers.begin(); it != layers.end(); it++)
2584 {
2585 int lid = it->first;
2586 LayerData& ld = layers[lid];
2587 if( ld.skip )
2588 {
2589 printf_(("skipped %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str()));
2590 continue;
2591 }
2592 printf_(("analyzing %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str()));
2593
2594 // the optimization #1. try to fuse batch norm, scaling and/or activation layers
2595 // with the current layer if they follow it. Normally, the are fused with the convolution layer,
2596 // but some of them (like activation) may be fused with fully-connected, elemwise (+) and
2597 // some other layers.
2598 Ptr<Layer>& currLayer = ld.layerInstance;
2599 if( ld.consumers.size() == 1 && pinsToKeep.count(LayerPin(lid, 0)) == 0 )
2600 {
2601 LayerData* nextData = &layers[ld.consumers[0].lid];
2602 LayerPin lpNext(ld.consumers[0].lid, 0);
2603 while (nextData)
2604 {
2605 /* we use `tryFuse` member of convolution layer to fuse eltwise later
2606 * it's not intended to be fused here; hence, we stop when we encounter eltwise
2607 */
2608 if (preferableBackend == DNN_BACKEND_CUDA && ld.type == "Convolution" && nextData->type == "Eltwise")
2609 break;
2610 Ptr<Layer> nextLayer = nextData->layerInstance;
2611 if (currLayer->tryFuse(nextLayer))
2612 {
2613 printf_(("\tfused with %s\n", nextLayer->name.c_str()));
2614 nextData->skip = true;
2615 ld.outputBlobs = layers[lpNext.lid].outputBlobs;
2616 ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers;
2617 if (nextData->consumers.size() == 1)
2618 {
2619 int nextLayerId = nextData->consumers[0].lid;
2620 nextData = &layers[nextLayerId];
2621 lpNext = LayerPin(nextLayerId, 0);
2622 }
2623 else
2624 {
2625 nextData = 0;
2626 break;
2627 }
2628 }
2629 else
2630 break;
2631 }
2632
2633 if (preferableBackend != DNN_BACKEND_OPENCV && preferableBackend != DNN_BACKEND_CUDA)
2634 continue; // Go to the next layer.
2635
2636 // TODO: OpenCL target support more fusion styles.
2637 if ( preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget) &&
2638 (!cv::ocl::useOpenCL() || (ld.layerInstance->type != "Convolution" &&
2639 ld.layerInstance->type != "MVN" && ld.layerInstance->type != "Pooling" &&
2640 ld.layerInstance->type != "Concat")) )
2641 continue;
2642
2643 if (preferableBackend == DNN_BACKEND_CUDA && IS_DNN_CUDA_TARGET(preferableTarget)
2644 && ld.layerInstance->type != "Convolution"
2645 && ld.layerInstance->type != "Concat")
2646 continue;
2647
2648 while (nextData)
2649 {
2650 // For now, OpenCL target support fusion with activation of ReLU/ChannelsPReLU/Power/Tanh
2651 if (IS_DNN_OPENCL_TARGET(preferableTarget) &&
2652 nextData->type != "ReLU" &&
2653 nextData->type != "ChannelsPReLU" &&
2654 nextData->type != "ReLU6" &&
2655 nextData->type != "TanH" &&
2656 nextData->type != "Power")
2657 break;
2658
2659 Ptr<ActivationLayer> nextActivLayer = nextData->layerInstance.dynamicCast<ActivationLayer>();
2660 if (nextActivLayer.empty())
2661 break;
2662
2663 if (currLayer->setActivation(nextActivLayer))
2664 {
2665 printf_(("\tfused with %s\n", nextActivLayer->name.c_str()));
2666 nextData->skip = true;
2667 ld.outputBlobs = layers[lpNext.lid].outputBlobs;
2668 ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers;
2669 if (nextData->consumers.size() == 1)
2670 {
2671 int nextLayerId = nextData->consumers[0].lid;
2672 nextData = &layers[nextLayerId];
2673 lpNext = LayerPin(nextLayerId, 0);
2674 }
2675 else
2676 {
2677 nextData = 0;
2678 break;
2679 }
2680 }
2681 else
2682 break;
2683 }
2684
2685 // OpenCL: fuse convolution layer followed by eltwise + relu
2686 // CUDA: fuse convolution layer followed by eltwise (and optional activation)
2687 while (nextData &&
2688 (IS_DNN_OPENCL_TARGET(preferableTarget) || IS_DNN_CUDA_TARGET(preferableTarget)) &&
2689 ld.layerInstance->type == "Convolution"
2690 ) // semantic of 'if'
2691 {
2692 Ptr<EltwiseLayer> nextEltwiseLayer = nextData->layerInstance.dynamicCast<EltwiseLayer>();
2693 if (nextEltwiseLayer.empty())
2694 break;
2695
2696 #ifdef HAVE_CUDA
2697 // CUDA backend supports fusion with eltwise sum (without variable channels)
2698 if (IS_DNN_CUDA_TARGET(preferableTarget) && !nextEltwiseLayer.empty())
2699 {
2700 // we create a temporary backend node for eltwise layer to obtain the eltwise configuration
2701 cuda4dnn::csl::CSLContext context; // assume that initCUDA and EltwiseOp do not use the context during init
2702 const auto node = nextData->layerInstance->initCUDA(&context, nextData->inputBlobsWrappers, nextData->outputBlobsWrappers);
2703 const auto eltwiseNode = node.dynamicCast<cuda4dnn::EltwiseOpBase>();
2704 // CUDA backend uses EltwiseOp when all operands have the same number of channels; otherwise, ShortcutOp is used.
2705 // Hence, a successful cast to EltwiseOp implies that the number of channels is same in all operand tensors.
2706 if (eltwiseNode.empty() || eltwiseNode->op != cuda4dnn::EltwiseOpType::SUM || !eltwiseNode->coeffs.empty())
2707 break;
2708 }
2709 #endif
2710
2711 if (IS_DNN_OPENCL_TARGET(preferableTarget) && pinsToKeep.count(lpNext) != 0)
2712 break;
2713 if (nextData->inputBlobsId.size() != 2)
2714 break;
2715
2716 if (IS_DNN_OPENCL_TARGET(preferableTarget))
2717 {
2718 if (!nextData->params.has("operation") || toLowerCase(nextData->params.get<String>("operation")) == "sum")
2719 {
2720 if (nextData->params.has("coeff"))
2721 {
2722 DictValue paramCoeff = nextData->params.get("coeff");
2723 int n = paramCoeff.size();
2724 bool isCoeffOneOne = (n == 2);
2725 for (int i = 0; isCoeffOneOne && i < n; i++)
2726 {
2727 float c = paramCoeff.get<float>(i);
2728 isCoeffOneOne &= (c == 1.0f);
2729 }
2730 if (!isCoeffOneOne)
2731 {
2732 CV_LOG_DEBUG(NULL, "DNN/OpenCL: fusion of 'Sum' without coeffs (or {1.0, 1.0}) is supported only");
2733 break;
2734 }
2735 }
2736 }
2737 else
2738 {
2739 CV_LOG_DEBUG(NULL, "DNN/OpenCL: fusion with eltwise operation is not supported: " << nextData->params.get<String>("operation"));
2740 break;
2741 }
2742 }
2743
2744 {
2745 LayerData *eltwiseData = nextData;
2746
2747 // Eltwise layer has two inputs. We need to determine which
2748 // is a base convolution layer and which could be used as it's bias.
2749 LayerData* biasLayerData = 0;
2750 for (int i = 0; i < 2; ++i)
2751 {
2752 LayerData *downLayerData = &layers[eltwiseData->inputBlobsId[i].lid];
2753 CV_Assert(downLayerData);
2754 while (downLayerData->skip)
2755 {
2756 if (downLayerData->inputBlobsId.size() == 1)
2757 downLayerData = &layers[downLayerData->inputBlobsId[0].lid];
2758 else
2759 {
2760 downLayerData = 0;
2761 break;
2762 }
2763 }
2764 if (downLayerData && ld.id == downLayerData->id)
2765 {
2766 biasLayerData = &layers[eltwiseData->inputBlobsId[1 - i].lid];
2767 break;
2768 }
2769 }
2770 CV_Assert(biasLayerData);
2771 {
2772 // fuse eltwise + activation layer
2773 // bias must already be computed to fuse => bias layer must appear before convolution
2774 if (biasLayerData->id < ld.id)
2775 {
2776 /* we can fuse activation if:
2777 * => activation layer that follows is the only consumer of eltwise output
2778 * => activation layer does not process multiple inputs
2779 * => we do not require to keep the output of eltwise
2780 */
2781 Ptr<ActivationLayer> nextFusabeleActivLayer;
2782 if (eltwiseData->consumers.size() == 1 && pinsToKeep.count(lpNext) == 0)
2783 {
2784 nextData = &layers[eltwiseData->consumers[0].lid];
2785 lpNext = LayerPin(eltwiseData->consumers[0].lid, 0);
2786 CV_Assert(nextData);
2787 if (nextData->outputBlobs.size() == 1)
2788 nextFusabeleActivLayer = nextData->layerInstance.dynamicCast<ActivationLayer>();
2789 }
2790 else
2791 {
2792 // OCL backend cannot fuse in this case but the CUDA backend can continue with just eltwise
2793 nextData = 0;
2794 }
2795
2796 // the requirements of OCV OpenCL backend and CUDA backend are different
2797 // we need to check them separately; hence, the fuse variables
2798 bool fuse_eltwise = false, fuse_activation = false;
2799
2800 Ptr<PowerLayer> activ_power;
2801 if (IS_DNN_OPENCL_TARGET(preferableTarget) && !nextFusabeleActivLayer.empty() &&
2802 nextData &&
2803 (!nextData->type.compare("ReLU") ||
2804 !nextData->type.compare("ChannelsPReLU") ||
2805 (!nextData->type.compare("Power") && (activ_power = nextFusabeleActivLayer.dynamicCast<PowerLayer>()) && activ_power->scale == 1.0f)
2806 ) &&
2807 currLayer->setActivation(nextFusabeleActivLayer))
2808 {
2809 fuse_eltwise = true;
2810 fuse_activation = true;
2811 }
2812
2813 if (IS_DNN_CUDA_TARGET(preferableTarget))
2814 {
2815 /* supported fusion options:
2816 * => convolution + eltwise
2817 * => activation(convolution) + eltwise
2818 * > convolution + activation would have been fused already; we have to fuse eltwise
2819 * => activation(convolution + eltwise)
2820 * > fuse eltwise and then activation
2821 */
2822 auto layer = nextEltwiseLayer.staticCast<Layer>();
2823 if (currLayer->tryFuse(layer))
2824 {
2825 fuse_eltwise = true; /* eltwise was successfully fused */
2826 if (!nextFusabeleActivLayer.empty() && nextData)
2827 {
2828 if ((!nextData->type.compare("ReLU") ||
2829 !nextData->type.compare("ReLU6") ||
2830 !nextData->type.compare("Power") ||
2831 !nextData->type.compare("TanH") ||
2832 !nextData->type.compare("Sigmoid") ||
2833 !nextData->type.compare("Swish") ||
2834 !nextData->type.compare("Mish")) &&
2835 currLayer->setActivation(nextFusabeleActivLayer))
2836 {
2837 // activation was fused
2838 fuse_activation = true;
2839 }
2840 }
2841 }
2842 }
2843
2844 CV_Assert(!fuse_activation || fuse_eltwise); /* cannot fuse activation without eltwise */
2845 if(fuse_eltwise && fuse_activation)
2846 {
2847 CV_Assert(nextData);
2848 CV_Assert_N(biasLayerData->outputBlobsWrappers.size() == 1, ld.inputBlobsWrappers.size() == 1);
2849 ld.inputBlobsWrappers.push_back(biasLayerData->outputBlobsWrappers[0]);
2850 printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str()));
2851 printf_(("\tfused with %s\n", nextFusabeleActivLayer->name.c_str()));
2852 eltwiseData->skip = true;
2853 nextData->skip = true;
2854 // This optimization for cases like
2855 // some_layer conv
2856 // | |
2857 // +-- eltwise --+
2858 // |
2859 // activ
2860 // This way all the element-wise computations
2861 // (i.e. some_layer+conv or some_layer*conv)
2862 // would be done at [conv] layer. So we need to
2863 // replace [conv]'s output blob to [eltwise]'s one
2864 // considering that [activ] is an in-place layer.
2865 // Also we need to move all the consumers' references.
2866 // To prevent memory collisions (i.e. when input of
2867 // [conv] and output of [eltwise] is the same blob)
2868 // we allocate a new blob.
2869 CV_Assert_N(ld.outputBlobs.size() == 1, ld.outputBlobsWrappers.size() == 1);
2870 ld.outputBlobs[0] = ld.outputBlobs[0].clone();
2871 ld.outputBlobsWrappers[0] = wrap(ld.outputBlobs[0]);
2872
2873 eltwiseData->outputBlobs = ld.outputBlobs;
2874 nextData->outputBlobs = ld.outputBlobs;
2875 eltwiseData->outputBlobsWrappers = ld.outputBlobsWrappers;
2876 nextData->outputBlobsWrappers = ld.outputBlobsWrappers;
2877
2878 // Move references of [activ] layer consumers to the newly allocated blob.
2879 for (int i = 0; i < nextData->consumers.size(); ++i)
2880 {
2881 LayerData& consumer = layers[nextData->consumers[i].lid];
2882 for (int j = 0; j < consumer.inputBlobsId.size(); ++j)
2883 {
2884 if (consumer.inputBlobsId[j].lid == lpNext.lid)
2885 {
2886 consumer.inputBlobs[j] = &ld.outputBlobs[0];
2887 consumer.inputBlobsWrappers[j] = ld.outputBlobsWrappers[0];
2888 break;
2889 }
2890 }
2891 }
2892 }
2893 else if (fuse_eltwise) // conv + eltwise (note: conv could have fused activations before eltwise)
2894 {
2895 CV_Assert(IS_DNN_CUDA_TARGET(preferableTarget));
2896 CV_Assert_N(biasLayerData->outputBlobsWrappers.size() == 1, ld.inputBlobsWrappers.size() == 1);
2897 ld.inputBlobsWrappers.push_back(biasLayerData->outputBlobsWrappers[0]);
2898 printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str()));
2899 eltwiseData->skip = true;
2900 // This optimization is for cases like
2901 // some_layer conv (maybe fused with activ)
2902 // | |
2903 // +-- eltwise --+
2904 //
2905 // This way all the element-wise computations
2906 // (i.e. some_layer+conv or some_layer*conv)
2907 // would be done at [conv] layer. So we need to
2908 // replace [conv]'s output blob to [eltwise]'s one.
2909 // Also we need to move all the consumers' references.
2910 // To prevent memory collisions (i.e. when input of
2911 // [conv] and output of [eltwise] is the same blob)
2912 // we allocate a new blob.
2913 CV_Assert_N(ld.outputBlobs.size() == 1, ld.outputBlobsWrappers.size() == 1);
2914 ld.outputBlobs[0] = ld.outputBlobs[0].clone();
2915 ld.outputBlobsWrappers[0] = wrap(ld.outputBlobs[0]);
2916
2917 eltwiseData->outputBlobs = ld.outputBlobs;
2918 eltwiseData->outputBlobsWrappers = ld.outputBlobsWrappers;
2919
2920 // Move references of [eltwise] layer consumers to the newly allocated blob.
2921 for (int i = 0; i < eltwiseData->consumers.size(); ++i)
2922 {
2923 LayerData& consumer = layers[eltwiseData->consumers[i].lid];
2924 for (int j = 0; j < consumer.inputBlobsId.size(); ++j)
2925 {
2926 if (consumer.inputBlobsId[j].lid == eltwiseData->id)
2927 {
2928 consumer.inputBlobs[j] = &ld.outputBlobs[0];
2929 consumer.inputBlobsWrappers[j] = ld.outputBlobsWrappers[0];
2930 break;
2931 }
2932 }
2933 }
2934 }
2935 }
2936 }
2937 }
2938
2939 break;
2940 }
2941 }
2942
2943 if (preferableBackend != DNN_BACKEND_OPENCV && preferableBackend != DNN_BACKEND_CUDA)
2944 continue; // Go to the next layer.
2945
2946 // the optimization #2. if there is concat layer that concatenates channels
2947 // from the inputs together (i.e. axis == 1) then we make the inputs of
2948 // the concat layer to write to the concatenation output buffer
2949 // (and so we eliminate the concatenation layer, because the channels
2950 // are concatenated implicitly).
2951 Ptr<ConcatLayer> concatLayer = ld.layerInstance.dynamicCast<ConcatLayer>();
2952 if( !concatLayer.empty() && !concatLayer->padding && ld.outputBlobs.size() == 1 )
2953 {
2954 Mat& output = ld.outputBlobs[0];
2955 UMat umat_output;
2956 #ifdef HAVE_OPENCL
2957 if (!ld.outputBlobsWrappers.empty() &&
2958 (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget)))
2959 {
2960 size_t i, ninputs = ld.inputBlobsId.size();
2961 bool conv_layer = true;
2962 for( i = 0; i < ninputs; i++ )
2963 {
2964 LayerPin pin = ld.inputBlobsId[i];
2965 LayerData* inp_i_data = &layers[pin.lid];
2966 while(inp_i_data->skip &&
2967 inp_i_data->inputBlobsId.size() == 1 &&
2968 inp_i_data->consumers.size() == 1)
2969 {
2970 pin = inp_i_data->inputBlobsId[0];
2971 inp_i_data = &layers[pin.lid];
2972 }
2973 conv_layer = conv_layer && (inp_i_data->getLayerInstance()->type == "Convolution");
2974 }
2975 if (!conv_layer)
2976 continue;
2977 std::vector<UMat> umat_outputBlobs;
2978 umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
2979 umat_output = umat_outputBlobs[0];
2980 }
2981 #endif
2982
2983 // TODO: in general, this optimization can always be done, but
2984 // many layers currently check that the input/output blobs are
2985 // continuous arrays. Unfortunately, this is not true when
2986 // the concatenation optimization is applied with batch_size > 1.
2987 // so, for now, we only apply this optimization in the most popular
2988 // case batch_size == 1.
2989 int axis = normalize_axis(concatLayer->axis, output.dims);
2990 if( output.total(0, axis) == 1 )
2991 {
2992 size_t i, ninputs = ld.inputBlobsId.size();
2993 std::vector<LayerPin> realinputs(ninputs);
2994 for( i = 0; i < ninputs; i++ )
2995 {
2996 LayerPin pin = ld.inputBlobsId[i];
2997 LayerData* inp_i_data = &layers[pin.lid];
2998 while(inp_i_data->skip &&
2999 inp_i_data->inputBlobsId.size() == 1 &&
3000 inp_i_data->consumers.size() == 1)
3001 {
3002 pin = inp_i_data->inputBlobsId[0];
3003 inp_i_data = &layers[pin.lid];
3004 }
3005 printf_(("\treal input for %s is %s\n",
3006 layers[ld.inputBlobsId[i].lid].getLayerInstance()->name.c_str(),
3007 inp_i_data->getLayerInstance()->name.c_str()));
3008
3009 if(inp_i_data->skip || inp_i_data->consumers.size() != 1)
3010 break;
3011 #ifdef HAVE_CUDA
3012 if (preferableBackend == DNN_BACKEND_CUDA &&
3013 (inp_i_data->layerInstance->supportBackend(DNN_BACKEND_CUDA) == false ||
3014 (inp_i_data->layerInstance->type != "Convolution" &&
3015 inp_i_data->layerInstance->type != "Pooling" &&
3016 inp_i_data->layerInstance->type != "Resize" &&
3017 inp_i_data->layerInstance->type != "Flatten" &&
3018 inp_i_data->layerInstance->type != "Permute" &&
3019 inp_i_data->layerInstance->type != "Reorg" &&
3020 inp_i_data->layerInstance->type != "Eltwise" &&
3021 inp_i_data->layerInstance.dynamicCast<ActivationLayer>().empty())))
3022 {
3023 break;
3024 }
3025 #endif
3026 realinputs[i] = pin;
3027 }
3028
3029 if( i >= ninputs )
3030 {
3031 // Allocate new memory to prevent collisions during memory
3032 // reusing (see https://github.com/opencv/opencv/pull/10456).
3033 output = output.clone();
3034 #ifdef HAVE_OPENCL
3035 if (preferableBackend == DNN_BACKEND_OPENCV &&
3036 IS_DNN_OPENCL_TARGET(preferableTarget))
3037 {
3038 std::vector<UMat> umats(1);
3039 umat_output = umat_output.clone();
3040 umats[0] = umat_output;
3041 OpenCLBackendWrapper::update(ld.outputBlobsWrappers, umats);
3042 }
3043 #endif
3044
3045 #ifdef HAVE_CUDA
3046 if (preferableBackend == DNN_BACKEND_CUDA)
3047 ld.outputBlobsWrappers[0] = wrap(output);
3048 #endif
3049 std::vector<Range> chrange(output.dims, Range::all());
3050 int ofs = 0;
3051 for( i = 0; i < ninputs; i++ )
3052 {
3053 LayerPin pin = realinputs[i];
3054 LayerData* inp_i_data = &layers[pin.lid];
3055 int channels_i = ld.inputBlobs[i]->size[axis];
3056 chrange[axis] = Range(ofs, ofs + channels_i);
3057 printf_(("\toutput %s(%d) to channels (%d, %d)\n", inp_i_data->layerInstance->name.c_str(),
3058 pin.oid, ofs, ofs + channels_i));
3059 ofs += channels_i;
3060 Mat output_slice = output(chrange);
3061 Mat& curr_output = inp_i_data->outputBlobs[pin.oid];
3062 CV_Assert(output_slice.isContinuous() && output_slice.size == curr_output.size);
3063 Mat* oldPtr = &curr_output;
3064 curr_output = output_slice;
3065 #ifdef HAVE_OPENCL
3066 if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
3067 {
3068 std::vector<UMat> umats(inp_i_data->outputBlobsWrappers.size());
3069 umats[pin.oid] = umat_output(chrange);
3070 OpenCLBackendWrapper::update(inp_i_data->outputBlobsWrappers, umats);
3071 }
3072 #endif
3073 #ifdef HAVE_CUDA
3074 if (preferableBackend == DNN_BACKEND_CUDA)
3075 {
3076 auto cuda_wrapper = wrap(output).dynamicCast<CUDABackendWrapper>();
3077 auto offset = chrange[axis].start * output_slice.total(axis + 1, output.dims);
3078 auto new_shape = shape(output_slice);
3079 cuda_wrapper->update(new_shape, offset);
3080 inp_i_data->outputBlobsWrappers[pin.oid] = cuda_wrapper.staticCast<BackendWrapper>();
3081 }
3082 #endif
3083 // Layers that refer old input Mat will refer to the
3084 // new data but the same Mat object.
3085 CV_Assert_N(curr_output.data == output_slice.data, oldPtr == &curr_output);
3086 }
3087
3088 #ifdef HAVE_CUDA
3089 if (preferableBackend == DNN_BACKEND_CUDA)
3090 {
3091 for (int i = 0; i < ld.consumers.size(); i++)
3092 {
3093 LayerData& consumer = layers[ld.consumers[i].lid];
3094 for (int j = 0; j < consumer.inputBlobsId.size(); j++)
3095 {
3096 if (consumer.inputBlobsId[j].lid == ld.id)
3097 {
3098 CV_Assert(consumer.inputBlobs[j]->data == ld.outputBlobs[0].data);
3099 consumer.inputBlobsWrappers[j] = ld.outputBlobsWrappers[0];
3100 break;
3101 }
3102 }
3103 }
3104 }
3105 #endif
3106 ld.skip = true;
3107 printf_(("\toptimized out Concat layer %s\n", concatLayer->name.c_str()));
3108 }
3109 }
3110 }
3111 }
3112 }
3113
allocateLayerscv::dnn::Net::Impl3114 void allocateLayers(const std::vector<LayerPin>& blobsToKeep_)
3115 {
3116 CV_TRACE_FUNCTION();
3117
3118 MapIdToLayerData::iterator it;
3119 for (it = layers.begin(); it != layers.end(); it++)
3120 it->second.flag = 0;
3121
3122 CV_Assert(!layers[0].outputBlobs.empty());
3123 ShapesVec inputShapes;
3124 for(int i = 0; i < layers[0].outputBlobs.size(); i++)
3125 {
3126 Mat& inp = layers[0].outputBlobs[i];
3127 CV_Assert(inp.total());
3128 if (preferableBackend == DNN_BACKEND_OPENCV &&
3129 preferableTarget == DNN_TARGET_OPENCL_FP16)
3130 {
3131 layers[0].outputBlobs[i].create(inp.dims, inp.size, CV_16S);
3132 }
3133 inputShapes.push_back(shape(inp));
3134 }
3135 LayersShapesMap layersShapes;
3136 getLayersShapes(inputShapes, layersShapes);
3137
3138 blobManager.reset();
3139 backendWrappers.clear();
3140
3141 for(auto& layer : layers)
3142 {
3143 auto& ld = layer.second;
3144 ld.inputBlobsWrappers.clear();
3145 ld.outputBlobsWrappers.clear();
3146 ld.internalBlobsWrappers.clear();
3147 }
3148
3149 // Fake references to input blobs.
3150 for (int i = 0; i < layers[0].outputBlobs.size(); ++i)
3151 blobManager.addReference(LayerPin(0, i));
3152 for (it = layers.begin(); it != layers.end(); ++it)
3153 {
3154 const LayerData& ld = it->second;
3155 blobManager.addReferences(ld.inputBlobsId);
3156 }
3157
3158 for (int i = 0; i < blobsToKeep_.size(); i++)
3159 {
3160 blobManager.addReference(blobsToKeep_[i]);
3161 }
3162
3163 for (it = layers.begin(); it != layers.end(); it++)
3164 {
3165 int lid = it->first;
3166 allocateLayer(lid, layersShapes);
3167 }
3168
3169 layersTimings.resize(lastLayerId + 1, 0);
3170 fuseLayers(blobsToKeep_);
3171 }
3172
forwardLayercv::dnn::Net::Impl3173 void forwardLayer(LayerData &ld)
3174 {
3175 CV_TRACE_FUNCTION();
3176
3177 Ptr<Layer> layer = ld.layerInstance;
3178
3179 if( !ld.skip )
3180 {
3181 TickMeter tm;
3182 tm.start();
3183
3184 std::map<int, Ptr<BackendNode> >::iterator it = ld.backendNodes.find(preferableBackend);
3185 if (preferableBackend == DNN_BACKEND_OPENCV || it == ld.backendNodes.end() || it->second.empty())
3186 {
3187 if (isAsync)
3188 CV_Error(Error::StsNotImplemented, "Default implementation fallbacks in asynchronous mode");
3189
3190 if (!layer->supportBackend(DNN_BACKEND_OPENCV))
3191 CV_Error(Error::StsNotImplemented, format("Layer \"%s\" of type \"%s\" unsupported on OpenCV backend",
3192 ld.name.c_str(), ld.type.c_str()));
3193
3194 #ifdef HAVE_OPENCL
3195 if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
3196 {
3197 std::vector<UMat> umat_inputBlobs = OpenCLBackendWrapper::getUMatVector(ld.inputBlobsWrappers);
3198 std::vector<UMat> umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
3199 std::vector<UMat> umat_internalBlobs = OpenCLBackendWrapper::getUMatVector(ld.internalBlobsWrappers);
3200 layer->forward(umat_inputBlobs,
3201 umat_outputBlobs,
3202 umat_internalBlobs);
3203 if (DNN_CHECK_NAN_INF)
3204 {
3205 bool fail = false;
3206 for (size_t i = 0; i < umat_outputBlobs.size(); ++i)
3207 {
3208 UMat& u = umat_outputBlobs[i];
3209 Mat m;
3210 if (u.depth() == CV_16S) // FP16
3211 convertFp16(u, m);
3212 else
3213 m = u.getMat(ACCESS_READ);
3214 if (!checkRange(m))
3215 {
3216 std::cerr << "WARNING: NaN detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
3217 std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
3218 fail = true;
3219 }
3220 else if (!checkRange(m, true, NULL, -1e6, 1e6))
3221 {
3222 std::cerr << "WARNING: Inf detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
3223 std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
3224 fail = true;
3225 }
3226 }
3227 if (fail)
3228 {
3229 for (size_t i = 0; i < umat_inputBlobs.size(); ++i)
3230 {
3231 UMat& u = umat_inputBlobs[i];
3232 Mat m;
3233 if (u.depth() == CV_16S) // FP16
3234 convertFp16(u, m);
3235 else
3236 m = u.getMat(ACCESS_READ);
3237 std::cout << "INPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl;
3238 if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
3239 }
3240 for (size_t i = 0; i < umat_outputBlobs.size(); ++i)
3241 {
3242 UMat& u = umat_outputBlobs[i];
3243 Mat m;
3244 if (u.depth() == CV_16S) // FP16
3245 convertFp16(u, m);
3246 else
3247 m = u.getMat(ACCESS_READ);
3248 std::cout << "OUTPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl;
3249 if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
3250 }
3251 for (size_t i = 0; i < umat_internalBlobs.size(); ++i)
3252 {
3253 UMat& u = umat_internalBlobs[i];
3254 Mat m;
3255 if (u.depth() == CV_16S) // FP16
3256 convertFp16(u, m);
3257 else
3258 m = u.getMat(ACCESS_READ);
3259 std::cout << "INTERNAL " << i << " " << shape(m) << std::endl;
3260 if (DNN_CHECK_NAN_INF_DUMP) std::cout << cv::typeToString(u.type()) << " " << m.reshape(1, 1) << std::endl;
3261 }
3262 if (DNN_CHECK_NAN_INF_RAISE_ERROR)
3263 CV_Assert(!fail);
3264 }
3265 }
3266 OpenCLBackendWrapper::update(ld.outputBlobsWrappers, umat_outputBlobs);
3267 }
3268 else
3269 #endif
3270 {
3271 for (int i = 0, n = ld.inputBlobsWrappers.size(); i < n; ++i)
3272 {
3273 if (!ld.inputBlobsWrappers[i].empty())
3274 ld.inputBlobsWrappers[i]->copyToHost();
3275 }
3276
3277 std::vector<Mat> inps(ld.inputBlobs.size());
3278 for (int i = 0; i < ld.inputBlobs.size(); ++i)
3279 {
3280 inps[i] = *ld.inputBlobs[i];
3281 }
3282 layer->forward(inps, ld.outputBlobs, ld.internals);
3283
3284 if (DNN_CHECK_NAN_INF)
3285 {
3286 bool fail = false;
3287 for (size_t i = 0; i < ld.outputBlobs.size(); ++i)
3288 {
3289 const Mat& m = ld.outputBlobs[i];
3290 if (!checkRange(m))
3291 {
3292 std::cerr << "WARNING: NaN detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
3293 std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
3294 fail = true;
3295 }
3296 else if (!checkRange(m, true, NULL, -1e6, 1e6))
3297 {
3298 std::cerr << "WARNING: Inf detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
3299 std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
3300 fail = true;
3301 }
3302 }
3303 if (fail)
3304 {
3305 for (size_t i = 0; i < ld.inputBlobs.size(); ++i)
3306 {
3307 const Mat* pM = ld.inputBlobs[i];
3308 if (!pM)
3309 {
3310 std::cout << "INPUT " << i << " is NULL" << std::endl;
3311 continue;
3312 }
3313 const Mat& m = *pM;
3314 std::cout << "INPUT " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl;
3315 if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
3316 }
3317 for (size_t i = 0; i < ld.outputBlobs.size(); ++i)
3318 {
3319 const Mat& m = ld.outputBlobs[i];
3320 std::cout << "OUTPUT " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl;
3321 if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
3322 }
3323 for (size_t i = 0; i < ld.internals.size(); ++i)
3324 {
3325 const Mat& m = ld.internals[i];
3326 std::cout << "INTERNAL " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl;
3327 if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
3328 }
3329 if (DNN_CHECK_NAN_INF_RAISE_ERROR)
3330 CV_Assert(!fail);
3331 }
3332 }
3333
3334 for (int i = 0, n = ld.outputBlobsWrappers.size(); i < n; ++i)
3335 {
3336 if (!ld.outputBlobsWrappers[i].empty())
3337 ld.outputBlobsWrappers[i]->setHostDirty();
3338 }
3339 }
3340 }
3341 else
3342 {
3343 Ptr<BackendNode> node = it->second;
3344 CV_Assert(!node.empty());
3345 if (preferableBackend == DNN_BACKEND_CUDA)
3346 {
3347 CV_Assert(haveCUDA());
3348
3349 #ifdef HAVE_CUDA
3350 Ptr<CUDABackendNode> cudaNode = node.dynamicCast<CUDABackendNode>();
3351 CV_Assert(!cudaNode.empty());
3352
3353 cudaNode->forward(ld.inputBlobsWrappers, ld.outputBlobsWrappers, cudaInfo->workspace);
3354
3355 for (auto id : ld.cudaD2HBackgroundTransfers)
3356 {
3357 auto wrapper = ld.outputBlobsWrappers[id].dynamicCast<CUDABackendWrapper>();
3358 wrapper->copyToHostInBackground();
3359 }
3360 #endif
3361 }
3362 else if (preferableBackend == DNN_BACKEND_HALIDE)
3363 {
3364 forwardHalide(ld.outputBlobsWrappers, node);
3365 }
3366 else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
3367 {
3368 forwardInfEngine(ld.outputBlobsWrappers, node, isAsync);
3369 }
3370 else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
3371 {
3372 forwardNgraph(ld.outputBlobsWrappers, node, isAsync);
3373 }
3374 else if (preferableBackend == DNN_BACKEND_VKCOM)
3375 {
3376 try
3377 {
3378 forwardVkCom(ld.outputBlobsWrappers, node);
3379 }
3380 catch (const cv::Exception& e)
3381 {
3382 CV_LOG_ERROR(NULL, "forwardVkCom failed, fallback to CPU implementation. " << e.what());
3383 it->second = Ptr<BackendNode>();
3384 forwardLayer(ld);
3385 }
3386 }
3387 else
3388 {
3389 CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
3390 }
3391 }
3392
3393 tm.stop();
3394 int64 t = tm.getTimeTicks();
3395 layersTimings[ld.id] = (t > 0) ? t : t + 1; // zero for skipped layers only
3396 }
3397 else
3398 {
3399 layersTimings[ld.id] = 0;
3400 }
3401
3402 ld.flag = 1;
3403 }
3404
forwardToLayercv::dnn::Net::Impl3405 void forwardToLayer(LayerData &ld, bool clearFlags = true)
3406 {
3407 CV_TRACE_FUNCTION();
3408
3409 if (clearFlags)
3410 {
3411 MapIdToLayerData::iterator it;
3412 for (it = layers.begin(); it != layers.end(); it++)
3413 it->second.flag = 0;
3414 }
3415
3416 //already was forwarded
3417 if (ld.flag)
3418 return;
3419
3420 //forward parents
3421 MapIdToLayerData::iterator it;
3422 for (it = layers.begin(); it != layers.end() && (it->second.id < ld.id); ++it)
3423 {
3424 LayerData &ld = it->second;
3425 if (ld.flag)
3426 continue;
3427 forwardLayer(ld);
3428 }
3429
3430 //forward itself
3431 forwardLayer(ld);
3432
3433 #ifdef HAVE_CUDA
3434 if (preferableBackend == DNN_BACKEND_CUDA)
3435 cudaInfo->context.stream.synchronize();
3436 #endif
3437 }
3438
getLayerShapesRecursivelycv::dnn::Net::Impl3439 void getLayerShapesRecursively(int id, LayersShapesMap& inOutShapes)
3440 {
3441 std::vector<LayerPin>& inputLayerIds = layers[id].inputBlobsId;
3442
3443 if (id == 0 && inOutShapes[id].in[0].empty())
3444 {
3445 if (!layers[0].outputBlobs.empty())
3446 {
3447 ShapesVec shapes;
3448 for (int i = 0; i < layers[0].outputBlobs.size(); i++)
3449 {
3450 Mat& inp = layers[0].outputBlobs[i];
3451 CV_Assert(inp.total());
3452 shapes.push_back(shape(inp));
3453 }
3454 inOutShapes[0].in = shapes;
3455 }
3456 else
3457 {
3458 const std::vector<MatShape>& inputShapes = netInputLayer->shapes;
3459 bool none = true;
3460 for (size_t i = 0; i < inputShapes.size(); i++)
3461 {
3462 if (!inputShapes[i].empty())
3463 {
3464 none = false;
3465 break;
3466 }
3467 }
3468 if (none)
3469 {
3470 inOutShapes[0].out.clear();
3471 return;
3472 }
3473 else
3474 {
3475 inOutShapes[0].in = inputShapes;
3476 }
3477 }
3478 }
3479
3480 if (inOutShapes[id].in.empty())
3481 {
3482 for(int i = 0; i < inputLayerIds.size(); i++)
3483 {
3484 int layerId = inputLayerIds[i].lid;
3485 LayersShapesMap::iterator it =
3486 inOutShapes.find(layerId);
3487 if(it == inOutShapes.end() ||
3488 it->second.out.empty())
3489 {
3490 getLayerShapesRecursively(layerId, inOutShapes);
3491 }
3492 const MatShape& shape = inOutShapes[layerId].out[inputLayerIds[i].oid];
3493 inOutShapes[id].in.push_back(shape);
3494 }
3495 }
3496 const ShapesVec& is = inOutShapes[id].in;
3497 ShapesVec& os = inOutShapes[id].out;
3498 ShapesVec& ints = inOutShapes[id].internal;
3499 int requiredOutputs = layers[id].requiredOutputs.size();
3500 Ptr<Layer> l = layers[id].getLayerInstance();
3501 CV_Assert(l);
3502 bool layerSupportInPlace = false;
3503 try
3504 {
3505 layerSupportInPlace = l->getMemoryShapes(is, requiredOutputs, os, ints);
3506 }
3507 catch (const cv::Exception& e)
3508 {
3509 CV_LOG_ERROR(NULL, "OPENCV/DNN: [" << l->type << "]:(" << l->name << "): getMemoryShapes() throws exception." <<
3510 " inputs=" << is.size() <<
3511 " outputs=" << os.size() << "/" << requiredOutputs <<
3512 " blobs=" << l->blobs.size());
3513 for (size_t i = 0; i < is.size(); ++i)
3514 {
3515 CV_LOG_ERROR(NULL, " input[" << i << "] = " << toString(is[i]));
3516 }
3517 for (size_t i = 0; i < os.size(); ++i)
3518 {
3519 CV_LOG_ERROR(NULL, " output[" << i << "] = " << toString(os[i]));
3520 }
3521 for (size_t i = 0; i < l->blobs.size(); ++i)
3522 {
3523 CV_LOG_ERROR(NULL, " blobs[" << i << "] = " << typeToString(l->blobs[i].type()) << " " << toString(shape(l->blobs[i])));
3524 }
3525 CV_LOG_ERROR(NULL, "Exception message: " << e.what());
3526 throw;
3527 }
3528 inOutShapes[id].supportInPlace = layerSupportInPlace;
3529
3530 for (int i = 0; i < ints.size(); i++)
3531 CV_Assert(total(ints[i]) > 0);
3532
3533 for (int i = 0; i < os.size(); i++)
3534 CV_Assert(total(os[i]) > 0);
3535 }
3536
getLayersShapescv::dnn::Net::Impl3537 void getLayersShapes(const ShapesVec& netInputShapes,
3538 LayersShapesMap& inOutShapes)
3539 {
3540 inOutShapes.clear();
3541
3542 inOutShapes[0].in = netInputShapes; //insert shape for first input layer
3543 for (MapIdToLayerData::iterator it = layers.begin();
3544 it != layers.end(); it++)
3545 {
3546 getLayerShapesRecursively(it->first, inOutShapes);
3547 }
3548 }
3549
getLayerShapescv::dnn::Net::Impl3550 void getLayerShapes(const ShapesVec& netInputShapes,
3551 const int layerId,
3552 LayerShapes& shapes)
3553 {
3554 LayersShapesMap inOutShapes;
3555 inOutShapes[0].in = netInputShapes; //insert shape for first input layer
3556 getLayerShapesRecursively(layerId, inOutShapes);
3557 shapes = inOutShapes[layerId];
3558 }
3559
updateLayersShapescv::dnn::Net::Impl3560 void updateLayersShapes()
3561 {
3562 CV_Assert(!layers[0].outputBlobs.empty());
3563 ShapesVec inputShapes;
3564 for(int i = 0; i < layers[0].outputBlobs.size(); i++)
3565 {
3566 Mat& inp = layers[0].outputBlobs[i];
3567 CV_Assert(inp.total());
3568 if (preferableBackend == DNN_BACKEND_OPENCV &&
3569 preferableTarget == DNN_TARGET_OPENCL_FP16)
3570 {
3571 layers[0].outputBlobs[i].create(inp.dims, inp.size, CV_16S);
3572 }
3573 inputShapes.push_back(shape(inp));
3574 }
3575 LayersShapesMap layersShapes;
3576 layersShapes[0].in = inputShapes;
3577 for (MapIdToLayerData::iterator it = layers.begin();
3578 it != layers.end(); it++)
3579 {
3580 int layerId = it->first;
3581 std::vector<LayerPin>& inputLayerIds = it->second.inputBlobsId;
3582 if (layersShapes[layerId].in.empty())
3583 {
3584 for(int i = 0; i < inputLayerIds.size(); i++)
3585 {
3586 int inputLayerId = inputLayerIds[i].lid;
3587 LayersShapesMap::iterator inputIt = layersShapes.find(inputLayerId);
3588 if(inputIt == layersShapes.end() || inputIt->second.out.empty())
3589 {
3590 getLayerShapesRecursively(inputLayerId, layersShapes);
3591 }
3592 const MatShape& shape = layersShapes[inputLayerId].out[inputLayerIds[i].oid];
3593 layersShapes[layerId].in.push_back(shape);
3594 }
3595 it->second.layerInstance->updateMemoryShapes(layersShapes[layerId].in);
3596 }
3597 }
3598 }
3599
getLatestLayerPincv::dnn::Net::Impl3600 LayerPin getLatestLayerPin(const std::vector<LayerPin>& pins)
3601 {
3602 return *std::max_element(pins.begin(), pins.end());
3603 }
3604
getBlobcv::dnn::Net::Impl3605 Mat getBlob(const LayerPin& pin)
3606 {
3607 CV_TRACE_FUNCTION();
3608
3609 if (!pin.valid())
3610 CV_Error(Error::StsObjectNotFound, "Requested blob not found");
3611
3612 LayerData &ld = layers[pin.lid];
3613 if ((size_t)pin.oid >= ld.outputBlobs.size())
3614 {
3615 CV_Error(Error::StsOutOfRange, format("Layer \"%s\" produce only %zu outputs, "
3616 "the #%d was requested", ld.name.c_str(),
3617 ld.outputBlobs.size(), pin.oid));
3618 }
3619 if (preferableTarget != DNN_TARGET_CPU)
3620 {
3621 CV_Assert(!ld.outputBlobsWrappers.empty() && !ld.outputBlobsWrappers[pin.oid].empty());
3622 // Transfer data to CPU if it's require.
3623 ld.outputBlobsWrappers[pin.oid]->copyToHost();
3624 }
3625
3626 if (ld.outputBlobs[pin.oid].depth() == CV_16S)
3627 {
3628 convertFp16(ld.outputBlobs[pin.oid], output_blob);
3629 return output_blob;
3630 }
3631 else
3632 return ld.outputBlobs[pin.oid];
3633 }
3634
getBlobcv::dnn::Net::Impl3635 Mat getBlob(String outputName)
3636 {
3637 return getBlob(getPinByAlias(outputName));
3638 }
3639
3640 #ifdef CV_CXX11
getBlobAsynccv::dnn::Net::Impl3641 AsyncArray getBlobAsync(const LayerPin& pin)
3642 {
3643 CV_TRACE_FUNCTION();
3644 #ifdef HAVE_INF_ENGINE
3645 if (!pin.valid())
3646 CV_Error(Error::StsObjectNotFound, "Requested blob not found");
3647
3648 LayerData &ld = layers[pin.lid];
3649 if ((size_t)pin.oid >= ld.outputBlobs.size())
3650 {
3651 CV_Error(Error::StsOutOfRange, format("Layer \"%s\" produce only %d outputs, "
3652 "the #%d was requested", ld.name.c_str(),
3653 (int)ld.outputBlobs.size(), (int)pin.oid));
3654 }
3655 if (preferableTarget != DNN_TARGET_CPU)
3656 {
3657 CV_Assert(!ld.outputBlobsWrappers.empty() && !ld.outputBlobsWrappers[pin.oid].empty());
3658 // Transfer data to CPU if it's require.
3659 ld.outputBlobsWrappers[pin.oid]->copyToHost();
3660 }
3661 CV_Assert(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH);
3662
3663 if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) {
3664 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
3665 Ptr<InfEngineBackendWrapper> wrapper = ld.outputBlobsWrappers[pin.oid].dynamicCast<InfEngineBackendWrapper>();
3666 return std::move(wrapper->futureMat);
3667 #else
3668 CV_Error(Error::StsNotImplemented, "This OpenCV version is built without Inference Engine NN Builder API support");
3669 #endif
3670 }
3671 else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
3672 {
3673 #ifdef HAVE_DNN_NGRAPH
3674 Ptr<NgraphBackendWrapper> wrapper = ld.outputBlobsWrappers[pin.oid].dynamicCast<NgraphBackendWrapper>();
3675 return std::move(wrapper->futureMat);
3676 #else
3677 CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of Inference Engine + nGraph");
3678 #endif
3679 }
3680 #endif // HAVE_INF_ENGINE
3681 CV_Error(Error::StsNotImplemented, "DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 backend is required");
3682 }
3683
getBlobAsynccv::dnn::Net::Impl3684 AsyncArray getBlobAsync(String outputName)
3685 {
3686 return getBlobAsync(getPinByAlias(outputName));
3687 }
3688 #endif // CV_CXX11
3689
3690 #ifdef HAVE_INF_ENGINE
3691 static
3692 Net createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNet);
3693 #endif
3694
3695 string dump();
3696
dumpNetworkToFilecv::dnn::Net::Impl3697 void dumpNetworkToFile()
3698 {
3699 #ifndef OPENCV_DNN_DISABLE_NETWORK_AUTO_DUMP
3700 string dumpFileNameBase = getDumpFileNameBase();
3701 string dumpFileName = dumpFileNameBase + ".dot";
3702 try
3703 {
3704 string dumpStr = dump();
3705 std::ofstream out(dumpFileName.c_str(), std::ios::out | std::ios::binary);
3706 out << dumpStr;
3707 }
3708 catch (const std::exception& e)
3709 {
3710 std::ofstream out((dumpFileName + ".error").c_str(), std::ios::out);
3711 out << "Exception: " << e.what() << std::endl;
3712 }
3713 catch (...)
3714 {
3715 std::ofstream out((dumpFileName + ".error").c_str(), std::ios::out);
3716 out << "Can't dump: unknown exception" << std::endl;
3717 }
3718 #endif
3719 }
3720 };
3721
Net()3722 Net::Net() : impl(new Net::Impl)
3723 {
3724 }
3725
3726 #ifdef HAVE_INF_ENGINE
3727 /*static*/
createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork & ieNet)3728 Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNet)
3729 {
3730 CV_TRACE_FUNCTION();
3731
3732 CV_TRACE_REGION("register_inputs");
3733
3734 std::vector<String> inputsNames;
3735 std::vector<MatShape> inp_shapes;
3736 for (auto& it : ieNet.getInputsInfo())
3737 {
3738 inputsNames.push_back(it.first);
3739 std::vector<size_t> dims = it.second->getTensorDesc().getDims();
3740 inp_shapes.push_back(std::vector<int>(dims.begin(), dims.end()));
3741 }
3742
3743 Net cvNet;
3744 cvNet.setInputsNames(inputsNames);
3745
3746 // set empty input to determine input shapes
3747 for (int inp_id = 0; inp_id < inputsNames.size(); ++inp_id)
3748 {
3749 cvNet.setInputShape(inputsNames[inp_id], inp_shapes[inp_id]);
3750 }
3751
3752 CV_TRACE_REGION_NEXT("backendNode");
3753
3754 Ptr<BackendNode> backendNode;
3755 #ifdef HAVE_DNN_NGRAPH
3756 if (DNN_BACKEND_INFERENCE_ENGINE_NGRAPH == getInferenceEngineBackendTypeParam())
3757 {
3758 auto fake_node = std::make_shared<ngraph::op::Parameter>(ngraph::element::f32, ngraph::Shape{});
3759 Ptr<InfEngineNgraphNode> backendNodeNGraph(new InfEngineNgraphNode(fake_node));
3760 backendNodeNGraph->net = Ptr<InfEngineNgraphNet>(new InfEngineNgraphNet(*(cvNet.impl), ieNet));
3761 backendNode = backendNodeNGraph;
3762 }
3763 else
3764 #endif
3765 {
3766 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
3767 Ptr<InfEngineBackendNode> backendNodeNN(new InfEngineBackendNode(InferenceEngine::Builder::Layer("")));
3768 backendNodeNN->net = Ptr<InfEngineBackendNet>(new InfEngineBackendNet(ieNet));
3769 backendNode = backendNodeNN;
3770 #else
3771 CV_Error(Error::StsNotImplemented, "This OpenCV version is built without Inference Engine NN Builder API support");
3772 #endif
3773 }
3774
3775 CV_TRACE_REGION_NEXT("register_outputs");
3776
3777 #ifdef HAVE_DNN_NGRAPH
3778 auto ngraphFunction = ieNet.getFunction();
3779 #if INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2020_2)
3780 std::list< std::shared_ptr<ngraph::Node> > ngraphOperations;
3781 #else
3782 std::vector< std::shared_ptr<ngraph::Node> > ngraphOperations;
3783 #endif
3784 if (ngraphFunction)
3785 {
3786 ngraphOperations = ngraphFunction->get_ops();
3787 }
3788 #endif
3789
3790 for (auto& it : ieNet.getOutputsInfo())
3791 {
3792 CV_TRACE_REGION("output");
3793 const auto& outputName = it.first;
3794
3795 LayerParams lp;
3796 int lid = cvNet.addLayer(it.first, "", lp);
3797
3798 LayerData& ld = cvNet.impl->layers[lid];
3799
3800 #ifdef HAVE_DNN_NGRAPH
3801 if (DNN_BACKEND_INFERENCE_ENGINE_NGRAPH == getInferenceEngineBackendTypeParam())
3802 {
3803 Ptr<Layer> cvLayer(new NgraphBackendLayer(ieNet));
3804 cvLayer->name = outputName;
3805 cvLayer->type = "_unknown_";
3806
3807 auto process_layer = [&](const std::string& name) -> bool
3808 {
3809 if (ngraphFunction)
3810 {
3811 CV_TRACE_REGION("ngraph_function");
3812 for (const auto& op : ngraphOperations)
3813 {
3814 CV_Assert(op);
3815 if (op->get_friendly_name() == name)
3816 {
3817 const std::string typeName = op->get_type_info().name;
3818 cvLayer->type = typeName;
3819 return true;
3820 }
3821 }
3822 return false;
3823 }
3824 else
3825 {
3826 #if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
3827 CV_Error(Error::StsNotImplemented, "This OpenCV version is built with Inference Engine which has dropped IR v7 support");
3828 #else
3829 CV_TRACE_REGION("legacy_cnn_layer");
3830 try
3831 {
3832 InferenceEngine::CNNLayerPtr ieLayer = ieNet.getLayerByName(name.c_str());
3833 CV_Assert(ieLayer);
3834
3835 cvLayer->type = ieLayer->type;
3836 return true;
3837 }
3838 catch (const std::exception& e)
3839 {
3840 CV_UNUSED(e);
3841 CV_LOG_DEBUG(NULL, "IE layer extraction failure: '" << name << "' - " << e.what());
3842 return false;
3843 }
3844 #endif
3845
3846 }
3847 };
3848
3849 bool found = process_layer(outputName);
3850 if (!found)
3851 {
3852 auto pos = outputName.rfind('.'); // cut port number: ".0"
3853 if (pos != std::string::npos)
3854 {
3855 std::string layerName = outputName.substr(0, pos);
3856 found = process_layer(layerName);
3857 }
3858 }
3859 if (!found)
3860 CV_LOG_WARNING(NULL, "DNN/IE: Can't determine output layer type: '" << outputName << "'");
3861
3862 ld.layerInstance = cvLayer;
3863 ld.backendNodes[DNN_BACKEND_INFERENCE_ENGINE_NGRAPH] = backendNode;
3864 }
3865 else
3866 #endif
3867 {
3868 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
3869 Ptr<Layer> cvLayer(new InfEngineBackendLayer(ieNet));
3870
3871 InferenceEngine::CNNLayerPtr ieLayer;
3872 try
3873 {
3874 ieLayer = ieNet.getLayerByName(outputName.c_str());
3875 }
3876 catch (...)
3877 {
3878 auto pos = outputName.rfind('.'); // cut port number: ".0"
3879 if (pos != std::string::npos)
3880 {
3881 std::string layerName = outputName.substr(0, pos);
3882 ieLayer = ieNet.getLayerByName(layerName.c_str());
3883 }
3884 }
3885 CV_Assert(ieLayer);
3886
3887 cvLayer->name = outputName;
3888 cvLayer->type = ieLayer->type;
3889 ld.layerInstance = cvLayer;
3890
3891 ld.backendNodes[DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019] = backendNode;
3892 #else
3893 CV_Error(Error::StsNotImplemented, "This OpenCV version is built without Inference Engine NN Builder API support");
3894 #endif
3895 }
3896
3897 for (int i = 0; i < inputsNames.size(); ++i)
3898 cvNet.connect(0, i, lid, i);
3899 }
3900
3901 CV_TRACE_REGION_NEXT("finalize");
3902
3903 cvNet.setPreferableBackend(getInferenceEngineBackendTypeParam());
3904
3905 cvNet.impl->skipInfEngineInit = true;
3906 return cvNet;
3907 }
3908 #endif // HAVE_INF_ENGINE
3909
readFromModelOptimizer(const String & xml,const String & bin)3910 Net Net::readFromModelOptimizer(const String& xml, const String& bin)
3911 {
3912 CV_TRACE_FUNCTION();
3913 #ifndef HAVE_INF_ENGINE
3914 CV_UNUSED(xml); CV_UNUSED(bin);
3915 CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer.");
3916 #else
3917 #if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R3)
3918 InferenceEngine::CNNNetReader reader;
3919 reader.ReadNetwork(xml);
3920 reader.ReadWeights(bin);
3921
3922 InferenceEngine::CNNNetwork ieNet = reader.getNetwork();
3923 #else
3924 InferenceEngine::Core& ie = getCore("");
3925 InferenceEngine::CNNNetwork ieNet = ie.ReadNetwork(xml, bin);
3926 #endif
3927
3928 return Impl::createNetworkFromModelOptimizer(ieNet);
3929 #endif // HAVE_INF_ENGINE
3930 }
3931
readFromModelOptimizer(const std::vector<uchar> & bufferModelConfig,const std::vector<uchar> & bufferWeights)3932 Net Net::readFromModelOptimizer(const std::vector<uchar>& bufferModelConfig, const std::vector<uchar>& bufferWeights)
3933 {
3934 CV_TRACE_FUNCTION();
3935 CV_Assert(!bufferModelConfig.empty());
3936 CV_Assert(!bufferWeights.empty());
3937 return readFromModelOptimizer(bufferModelConfig.data(), bufferModelConfig.size(),
3938 bufferWeights.data(), bufferWeights.size());
3939 }
3940
readFromModelOptimizer(const uchar * bufferModelConfigPtr,size_t bufferModelConfigSize,const uchar * bufferWeightsPtr,size_t bufferWeightsSize)3941 Net Net::readFromModelOptimizer(
3942 const uchar* bufferModelConfigPtr, size_t bufferModelConfigSize,
3943 const uchar* bufferWeightsPtr, size_t bufferWeightsSize
3944 )
3945 {
3946 CV_TRACE_FUNCTION();
3947 #ifndef HAVE_INF_ENGINE
3948 CV_UNUSED(bufferModelConfigPtr); CV_UNUSED(bufferWeightsPtr);
3949 CV_UNUSED(bufferModelConfigSize); CV_UNUSED(bufferModelConfigSize);
3950 CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer.");
3951 #else
3952
3953 #if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R3)
3954 InferenceEngine::CNNNetReader reader;
3955
3956 try
3957 {
3958 reader.ReadNetwork(bufferModelConfigPtr, bufferModelConfigSize);
3959
3960 InferenceEngine::TensorDesc tensorDesc(InferenceEngine::Precision::U8, { bufferWeightsSize }, InferenceEngine::Layout::C);
3961 InferenceEngine::TBlob<uint8_t>::Ptr weightsBlobPtr(new InferenceEngine::TBlob<uint8_t>(tensorDesc));
3962 weightsBlobPtr->allocate();
3963 std::memcpy(weightsBlobPtr->buffer(), (uchar*)bufferWeightsPtr, bufferWeightsSize);
3964 reader.SetWeights(weightsBlobPtr);
3965 }
3966 catch (const std::exception& e)
3967 {
3968 CV_Error(Error::StsError, std::string("DNN: IE failed to load model: ") + e.what());
3969 }
3970
3971 InferenceEngine::CNNNetwork ieNet = reader.getNetwork();
3972 #else
3973 InferenceEngine::Core& ie = getCore("");
3974
3975 std::string model; model.assign((char*)bufferModelConfigPtr, bufferModelConfigSize);
3976
3977 InferenceEngine::CNNNetwork ieNet;
3978 try
3979 {
3980 InferenceEngine::TensorDesc tensorDesc(InferenceEngine::Precision::U8, { bufferWeightsSize }, InferenceEngine::Layout::C);
3981 InferenceEngine::Blob::CPtr weights_blob = InferenceEngine::make_shared_blob<uint8_t>(tensorDesc, (uint8_t*)bufferWeightsPtr, bufferWeightsSize);
3982
3983 ieNet = ie.ReadNetwork(model, weights_blob);
3984 }
3985 catch (const std::exception& e)
3986 {
3987 CV_Error(Error::StsError, std::string("DNN: IE failed to load model: ") + e.what());
3988 }
3989 #endif
3990
3991 return Impl::createNetworkFromModelOptimizer(ieNet);
3992 #endif // HAVE_INF_ENGINE
3993 }
3994
3995
~Net()3996 Net::~Net()
3997 {
3998 }
3999
addLayer(const String & name,const String & type,LayerParams & params)4000 int Net::addLayer(const String &name, const String &type, LayerParams ¶ms)
4001 {
4002 CV_TRACE_FUNCTION();
4003
4004 if (impl->getLayerId(name) >= 0)
4005 {
4006 CV_Error(Error::StsBadArg, "Layer \"" + name + "\" already into net");
4007 return -1;
4008 }
4009
4010 int id = ++impl->lastLayerId;
4011 impl->layerNameToId.insert(std::make_pair(name, id));
4012 impl->layers.insert(std::make_pair(id, LayerData(id, name, type, params)));
4013 if (params.get<bool>("has_dynamic_shapes", false))
4014 impl->hasDynamicShapes = true;
4015
4016 return id;
4017 }
4018
addLayerToPrev(const String & name,const String & type,LayerParams & params)4019 int Net::addLayerToPrev(const String &name, const String &type, LayerParams ¶ms)
4020 {
4021 CV_TRACE_FUNCTION();
4022
4023 int prvLid = impl->lastLayerId;
4024 int newLid = this->addLayer(name, type, params);
4025 this->connect(prvLid, 0, newLid, 0);
4026 return newLid;
4027 }
4028
connect(int outLayerId,int outNum,int inpLayerId,int inpNum)4029 void Net::connect(int outLayerId, int outNum, int inpLayerId, int inpNum)
4030 {
4031 CV_TRACE_FUNCTION();
4032
4033 impl->connect(outLayerId, outNum, inpLayerId, inpNum);
4034 }
4035
connect(String _outPin,String _inPin)4036 void Net::connect(String _outPin, String _inPin)
4037 {
4038 CV_TRACE_FUNCTION();
4039
4040 LayerPin outPin = impl->getPinByAlias(_outPin);
4041 LayerPin inpPin = impl->getPinByAlias(_inPin);
4042
4043 CV_Assert(outPin.valid() && inpPin.valid());
4044
4045 impl->connect(outPin.lid, outPin.oid, inpPin.lid, inpPin.oid);
4046 }
4047
forward(const String & outputName)4048 Mat Net::forward(const String& outputName)
4049 {
4050 CV_TRACE_FUNCTION();
4051 CV_Assert(!empty());
4052
4053 String layerName = outputName;
4054
4055 if (layerName.empty())
4056 {
4057 std::vector<String> layerNames = getLayerNames();
4058 CV_Assert(!layerNames.empty());
4059 layerName = layerNames.back();
4060 }
4061
4062 std::vector<LayerPin> pins(1, impl->getPinByAlias(layerName));
4063 impl->setUpNet(pins);
4064 impl->forwardToLayer(impl->getLayerData(layerName));
4065
4066 return impl->getBlob(layerName);
4067 }
4068
forwardAsync(const String & outputName)4069 AsyncArray Net::forwardAsync(const String& outputName)
4070 {
4071 CV_TRACE_FUNCTION();
4072 CV_Assert(!empty());
4073
4074 #ifdef CV_CXX11
4075 String layerName = outputName;
4076
4077 if (layerName.empty())
4078 {
4079 std::vector<String> layerNames = getLayerNames();
4080 CV_Assert(!layerNames.empty());
4081 layerName = layerNames.back();
4082 }
4083
4084 std::vector<LayerPin> pins(1, impl->getPinByAlias(layerName));
4085 impl->setUpNet(pins);
4086
4087 if (!(impl->preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || impl->preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH))
4088 CV_Error(Error::StsNotImplemented, "DNN: Asynchronous forward is supported for Inference Engine backends only");
4089
4090 impl->isAsync = true;
4091 impl->forwardToLayer(impl->getLayerData(layerName));
4092 impl->isAsync = false;
4093
4094 return impl->getBlobAsync(layerName);
4095 #else
4096 CV_Error(Error::StsNotImplemented, "DNN: Asynchronous forward requires build with enabled C++11");
4097 #endif // CV_CXX11
4098 }
4099
forward(OutputArrayOfArrays outputBlobs,const String & outputName)4100 void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName)
4101 {
4102 CV_TRACE_FUNCTION();
4103 CV_Assert(!empty());
4104
4105 String layerName = outputName;
4106
4107 if (layerName.empty())
4108 {
4109 std::vector<String> layerNames = getLayerNames();
4110 CV_Assert(!layerNames.empty());
4111 layerName = layerNames.back();
4112 }
4113
4114 std::vector<LayerPin> pins(1, impl->getPinByAlias(layerName));
4115 impl->setUpNet(pins);
4116 impl->forwardToLayer(impl->getLayerData(layerName));
4117
4118 LayerPin pin = impl->getPinByAlias(layerName);
4119 LayerData &ld = impl->layers[pin.lid];
4120
4121 if (outputBlobs.isUMat())
4122 {
4123 impl->getBlob(layerName).copyTo(outputBlobs);
4124 }
4125 else if (outputBlobs.isMat())
4126 {
4127 outputBlobs.assign(impl->getBlob(layerName));
4128 }
4129 else if (outputBlobs.isMatVector())
4130 {
4131 if (impl->preferableTarget != DNN_TARGET_CPU)
4132 {
4133 for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
4134 {
4135 CV_Assert(!ld.outputBlobsWrappers[i].empty());
4136 ld.outputBlobsWrappers[i]->copyToHost();
4137 }
4138 }
4139 if (ld.outputBlobs[0].depth() == CV_32F)
4140 {
4141 std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj();
4142 outputvec = ld.outputBlobs;
4143 } else {
4144 std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj();
4145 outputvec.resize(ld.outputBlobs.size());
4146 for (int i = 0; i < outputvec.size(); i++)
4147 convertFp16(ld.outputBlobs[i], outputvec[i]);
4148 }
4149 }
4150 else if (outputBlobs.isUMatVector())
4151 {
4152 std::vector<UMat> & outputvec = *(std::vector<UMat> *)outputBlobs.getObj();
4153
4154 #ifdef HAVE_OPENCL
4155 if (impl->preferableBackend == DNN_BACKEND_OPENCV &&
4156 IS_DNN_OPENCL_TARGET(impl->preferableTarget))
4157 {
4158 if (impl->preferableTarget == DNN_TARGET_OPENCL)
4159 outputvec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
4160 else if (impl->preferableTarget == DNN_TARGET_OPENCL_FP16)
4161 {
4162 std::vector<UMat> out_vec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
4163 outputvec.resize(out_vec.size());
4164 for (int i = 0; i < out_vec.size(); i++)
4165 convertFp16(out_vec[i], outputvec[i]);
4166 }
4167 }
4168 else
4169 #endif
4170 {
4171 outputvec.resize(ld.outputBlobs.size());
4172 for (int i = 0; i < outputvec.size(); ++i)
4173 ld.outputBlobs[i].copyTo(outputvec[i]);
4174 }
4175 }
4176 }
4177
forward(OutputArrayOfArrays outputBlobs,const std::vector<String> & outBlobNames)4178 void Net::forward(OutputArrayOfArrays outputBlobs,
4179 const std::vector<String>& outBlobNames)
4180 {
4181 CV_TRACE_FUNCTION();
4182
4183 std::vector<LayerPin> pins;
4184 for (int i = 0; i < outBlobNames.size(); i++)
4185 {
4186 pins.push_back(impl->getPinByAlias(outBlobNames[i]));
4187 }
4188
4189 impl->setUpNet(pins);
4190
4191 LayerPin out = impl->getLatestLayerPin(pins);
4192
4193 impl->forwardToLayer(impl->getLayerData(out.lid));
4194
4195 std::vector<Mat> matvec;
4196 for (int i = 0; i < pins.size(); i++)
4197 {
4198 matvec.push_back(impl->getBlob(pins[i]));
4199 }
4200
4201 std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj();
4202 outputvec = matvec;
4203 }
4204
forward(std::vector<std::vector<Mat>> & outputBlobs,const std::vector<String> & outBlobNames)4205 void Net::forward(std::vector<std::vector<Mat> >& outputBlobs,
4206 const std::vector<String>& outBlobNames)
4207 {
4208 CV_TRACE_FUNCTION();
4209
4210 std::vector<LayerPin> pins;
4211 for (int i = 0; i < outBlobNames.size(); i++)
4212 {
4213 pins.push_back(impl->getPinByAlias(outBlobNames[i]));
4214 }
4215
4216 impl->setUpNet(pins);
4217
4218 LayerPin out = impl->getLatestLayerPin(pins);
4219
4220 impl->forwardToLayer(impl->getLayerData(out.lid));
4221
4222 outputBlobs.resize(outBlobNames.size());
4223 for (int i = 0; i < outBlobNames.size(); i++)
4224 {
4225 std::vector<LayerPin> lp = impl->getLayerOutPins(outBlobNames[i]);
4226 outputBlobs[i].resize(lp.size());
4227 for (int j = 0; j < lp.size(); j++)
4228 {
4229 outputBlobs[i][j] = impl->getBlob(lp[j]);
4230 }
4231 }
4232 }
4233
setPreferableBackend(int backendId)4234 void Net::setPreferableBackend(int backendId)
4235 {
4236 CV_TRACE_FUNCTION();
4237 CV_TRACE_ARG(backendId);
4238
4239 #ifdef HAVE_INF_ENGINE
4240 if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
4241 backendId = getInferenceEngineBackendTypeParam();
4242 #endif
4243
4244 if( impl->preferableBackend != backendId )
4245 {
4246 impl->preferableBackend = backendId;
4247 impl->netWasAllocated = false;
4248 impl->clear();
4249 }
4250 }
4251
setPreferableTarget(int targetId)4252 void Net::setPreferableTarget(int targetId)
4253 {
4254 CV_TRACE_FUNCTION();
4255 CV_TRACE_ARG(targetId);
4256
4257 if( impl->preferableTarget != targetId )
4258 {
4259 impl->preferableTarget = targetId;
4260 if (IS_DNN_OPENCL_TARGET(targetId))
4261 {
4262 #ifndef HAVE_OPENCL
4263 #ifdef HAVE_INF_ENGINE
4264 if (impl->preferableBackend == DNN_BACKEND_OPENCV)
4265 #else
4266 if (impl->preferableBackend == DNN_BACKEND_DEFAULT ||
4267 impl->preferableBackend == DNN_BACKEND_OPENCV)
4268 #endif // HAVE_INF_ENGINE
4269 impl->preferableTarget = DNN_TARGET_CPU;
4270 #else
4271 bool fp16 = ocl::Device::getDefault().isExtensionSupported("cl_khr_fp16");
4272 if (!fp16 && targetId == DNN_TARGET_OPENCL_FP16)
4273 impl->preferableTarget = DNN_TARGET_OPENCL;
4274 #endif
4275 }
4276 impl->netWasAllocated = false;
4277 impl->clear();
4278 }
4279 }
4280
setInputsNames(const std::vector<String> & inputBlobNames)4281 void Net::setInputsNames(const std::vector<String> &inputBlobNames)
4282 {
4283 CV_TRACE_FUNCTION();
4284
4285 impl->netInputLayer->setNames(inputBlobNames);
4286 }
4287
setInputShape(const String & inputName,const MatShape & shape)4288 void Net::setInputShape(const String &inputName, const MatShape& shape)
4289 {
4290 CV_TRACE_FUNCTION();
4291
4292 impl->netInputLayer->setInputShape(inputName, shape);
4293 }
4294
setInput(InputArray blob,const String & name,double scalefactor,const Scalar & mean)4295 void Net::setInput(InputArray blob, const String& name, double scalefactor, const Scalar& mean)
4296 {
4297 CV_TRACE_FUNCTION();
4298 CV_TRACE_ARG_VALUE(name, "name", name.c_str());
4299
4300 LayerPin pin;
4301 pin.lid = 0;
4302 pin.oid = impl->resolvePinOutputName(impl->getLayerData(pin.lid), name);
4303
4304 if (!pin.valid())
4305 CV_Error(Error::StsObjectNotFound, "Requested blob \"" + name + "\" not found");
4306
4307 Mat blob_ = blob.getMat(); // can't use InputArray directly due MatExpr stuff
4308 MatShape blobShape = shape(blob_);
4309
4310 if (pin.lid == 0)
4311 {
4312 CV_Assert(!impl->netInputLayer.empty());
4313 const DataLayer& netInputLayer = *impl->netInputLayer.get();
4314 if (!netInputLayer.shapes.empty())
4315 {
4316 CV_CheckLT(pin.oid, (int)netInputLayer.shapes.size(), "");
4317 const MatShape& inputShapeLimitation = netInputLayer.shapes[pin.oid];
4318 if (!inputShapeLimitation.empty())
4319 {
4320 CV_CheckEQ(inputShapeLimitation.size(), blobShape.size(), "");
4321 #if 0 // TODO: DNNTestNetwork.MobileNet_SSD_Caffe_Different_Width_Height/0
4322 const size_t dims = inputShapeLimitation.size();
4323 for (size_t dim = 0; dim < dims; dim++)
4324 {
4325 if (dims >= 3 && dim == 0 && inputShapeLimitation[0] == 1)
4326 continue; // don't limit batch
4327 CV_CheckEQ(inputShapeLimitation[dim], blobShape[dim], "");
4328 }
4329 #endif
4330 }
4331 }
4332 }
4333
4334 LayerData &ld = impl->layers[pin.lid];
4335 const int numInputs = std::max(pin.oid+1, (int)ld.requiredOutputs.size());
4336 ld.outputBlobs.resize(numInputs);
4337 ld.outputBlobsWrappers.resize(numInputs);
4338 impl->netInputLayer->inputsData.resize(numInputs);
4339 impl->netInputLayer->scaleFactors.resize(numInputs);
4340 impl->netInputLayer->means.resize(numInputs);
4341
4342 MatShape prevShape = shape(impl->netInputLayer->inputsData[pin.oid]);
4343 bool oldShape = prevShape == blobShape;
4344
4345 blob_.copyTo(impl->netInputLayer->inputsData[pin.oid]);
4346 if (!oldShape) {
4347 ld.outputBlobs[pin.oid] = impl->netInputLayer->inputsData[pin.oid];
4348 if (impl->hasDynamicShapes)
4349 {
4350 impl->updateLayersShapes();
4351 }
4352 }
4353
4354 if (!ld.outputBlobsWrappers[pin.oid].empty())
4355 {
4356 ld.outputBlobsWrappers[pin.oid]->setHostDirty();
4357 }
4358 impl->netInputLayer->scaleFactors[pin.oid] = scalefactor;
4359 impl->netInputLayer->means[pin.oid] = mean;
4360 impl->netWasAllocated = impl->netWasAllocated && oldShape;
4361 }
4362
getParam(LayerId layer,int numParam)4363 Mat Net::getParam(LayerId layer, int numParam)
4364 {
4365 LayerData &ld = impl->getLayerData(layer);
4366 std::vector<Mat> &layerBlobs = ld.getLayerInstance()->blobs;
4367 CV_Assert(numParam < (int)layerBlobs.size());
4368 return layerBlobs[numParam];
4369 }
4370
setParam(LayerId layer,int numParam,const Mat & blob)4371 void Net::setParam(LayerId layer, int numParam, const Mat &blob)
4372 {
4373 LayerData &ld = impl->getLayerData(layer);
4374
4375 std::vector<Mat> &layerBlobs = ld.getLayerInstance()->blobs;
4376 CV_Assert(numParam < (int)layerBlobs.size());
4377 //we don't make strong checks, use this function carefully
4378 layerBlobs[numParam] = blob;
4379 }
4380
getLayerId(const String & layer)4381 int Net::getLayerId(const String &layer)
4382 {
4383 return impl->getLayerId(layer);
4384 }
4385
4386 static
dumpLayerParameterSize(const string & name,const LayerParams & lp)4387 string dumpLayerParameterSize(const string& name, const LayerParams& lp)
4388 {
4389 std::ostringstream out(name, std::ios::ate);
4390 DictValue param = lp.get(name);
4391 switch (param.size())
4392 {
4393 case 1: out << " : "; break;
4394 case 2: out << " (HxW): "; break;
4395 case 3: out << " (DxHxW): "; break;
4396 default:
4397 CV_LOG_INFO(NULL, format("DNN/dumpLayerParameterSize(): Unsupported '%s' size = %d", name.c_str(), param.size()));
4398 out << ": ";
4399 }
4400 for (size_t i = 0; i < param.size(); i++)
4401 {
4402 if (i > 0)
4403 out << " x ";
4404 out << param.get<int>(i);
4405 }
4406 return out.str();
4407 }
4408
dump()4409 String Net::dump()
4410 {
4411 CV_Assert(!empty());
4412
4413 bool hasInput = !impl->netInputLayer->inputsData.empty();
4414
4415 if (hasInput)
4416 {
4417 if (!impl->netWasAllocated)
4418 impl->setUpNet();
4419 }
4420
4421 return impl->dump();
4422 }
4423
dump()4424 string Net::Impl::dump()
4425 {
4426 bool hasInput = !netInputLayer->inputsData.empty();
4427
4428 std::ostringstream out;
4429 const std::map<int, LayerData>& map = layers;
4430
4431 Backend prefBackend = (Backend)preferableBackend;
4432 std::vector<std::vector<int> > skippedLayers;
4433 std::vector<int> skipId;
4434 std::vector<int> allLayers(map.size(), -1);
4435 int idPrev = -1;
4436 Ptr<BackendNode> prevNode;
4437 for (std::map<int, LayerData>::const_reverse_iterator rit = map.rbegin(); rit != map.rend(); ++rit)
4438 {
4439 std::map<int, Ptr<BackendNode> >::const_iterator itBackend = rit->second.backendNodes.find(prefBackend);
4440 if (prefBackend == DNN_BACKEND_OPENCV || itBackend == rit->second.backendNodes.end() ||
4441 itBackend->second.empty())
4442 {
4443 if (rit->second.skip)
4444 skipId.push_back(rit->first);
4445 else if (!skipId.empty())
4446 {
4447 if (prefBackend == DNN_BACKEND_OPENCV || prevNode.empty())
4448 skipId.push_back(rit->first);
4449 else if (idPrev != -1)
4450 skipId.push_back(idPrev);
4451
4452 std::sort(skipId.begin(), skipId.end());
4453 for (int i = 0; i < skipId.size(); i++) {
4454 allLayers[skipId[i]] = skippedLayers.size();
4455 }
4456 skippedLayers.push_back(skipId);
4457 skipId.clear();
4458 }
4459 }
4460 else
4461 {
4462 if (itBackend->second == prevNode)
4463 skipId.push_back(idPrev);
4464 else if (!skipId.empty())
4465 {
4466 skipId.push_back(idPrev);
4467 std::sort(skipId.begin(), skipId.end());
4468 for (int i = 0; i < skipId.size(); i++) {
4469 allLayers[skipId[i]] = skippedLayers.size();
4470 }
4471 skippedLayers.push_back(skipId);
4472 skipId.clear();
4473 }
4474 idPrev = rit->first;
4475 prevNode = itBackend->second;
4476 }
4477 }
4478 std::vector<string> colors = {"#ffffb3", "#fccde5", "#8dd3c7", "#bebada", "#80b1d3", "#fdb462", "#ff4848", "#b35151", "#b266ff"};
4479 string backend;
4480 switch (prefBackend)
4481 {
4482 case DNN_BACKEND_DEFAULT: backend = "DEFAULT/"; break;
4483 case DNN_BACKEND_HALIDE: backend = "HALIDE/"; break;
4484 case DNN_BACKEND_INFERENCE_ENGINE: // fallthru
4485 case DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019: backend = "DLIE/"; break;
4486 case DNN_BACKEND_INFERENCE_ENGINE_NGRAPH: backend = "NGRAPH/"; break;
4487 case DNN_BACKEND_OPENCV: backend = "OCV/"; break;
4488 case DNN_BACKEND_VKCOM: backend = "VULKAN/"; break;
4489 case DNN_BACKEND_CUDA: backend = "CUDA/"; break;
4490 // don't use default:
4491 }
4492 out << "digraph G {\n";
4493 // Add nodes
4494 for (std::map<int, LayerData>::const_iterator it = map.begin(); it != map.end(); ++it)
4495 {
4496 const LayerData& ld = it->second;
4497 string name = ld.params.name;
4498 std::vector<int> clusterIds(1, it->first);
4499 if (allLayers[it->first] == -1 && !name.empty())
4500 {
4501 out << "\t\"" << name << "\" [label=\"";
4502 }
4503 else if (name.empty() || it->first != skippedLayers[allLayers[it->first]][0])
4504 {
4505 continue;
4506 }
4507 else // first node in cluster : it->first == skippedLayers[allLayers[it->first]][0]
4508 {
4509 int cluster = allLayers[it->first];
4510 out << "\t\"" << "cluster_" << cluster << "\" [label=\"{";
4511 clusterIds = skippedLayers[allLayers[it->first]]; // vertices in current cluster
4512 }
4513 for (int i = 0; i < clusterIds.size(); i++)
4514 {
4515 CV_DbgAssert(map.find(clusterIds[i]) != map.end());
4516 const LayerParams& lp = map.find(clusterIds[i])->second.params;
4517 if (!lp.name.empty()) {
4518 if (i > 0) {
4519 out << " | ";
4520 }
4521 out << lp.name << "\\n" << lp.type << "\\n"; // align center
4522 if (lp.has("kernel_size"))
4523 {
4524 string kernel = dumpLayerParameterSize("kernel_size", lp);
4525 out << kernel;
4526 out << "\\l"; // align left
4527 } else if (lp.has("kernel_h") && lp.has("kernel_w")) {
4528 DictValue h = lp.get("kernel_h");
4529 DictValue w = lp.get("kernel_w");
4530 out << "kernel (HxW): " << h << " x " << w;
4531 out << "\\l"; // align left
4532 }
4533 if (lp.has("stride")) {
4534 string stride = dumpLayerParameterSize("stride", lp);
4535 out << stride;
4536 out << "\\l"; // align left
4537 } else if (lp.has("stride_h") && lp.has("stride_w")) {
4538 DictValue h = lp.get("stride_h");
4539 DictValue w = lp.get("stride_w");
4540 out << "stride (HxW): " << h << " x " << w;
4541 out << "\\l"; // align left
4542 }
4543 if (lp.has("dilation")) {
4544 string dilation = dumpLayerParameterSize("dilation", lp);
4545 out << dilation;
4546 out << "\\l"; // align left
4547 } else if (lp.has("dilation_h") && lp.has("dilation_w")) {
4548 DictValue h = lp.get("dilation_h");
4549 DictValue w = lp.get("dilation_w");
4550 out << "dilation (HxW): " << h << " x " << w;
4551 out << "\\l"; // align left
4552 }
4553 if (lp.has("pad")) {
4554 DictValue pad = lp.get("pad");
4555 out << "pad ";
4556 switch (pad.size())
4557 {
4558 case 1: out << ": " << pad; break;
4559 case 2:
4560 out << "(HxW): (" << pad.get<int>(0) << " x " << pad.get<int>(1) << ")";
4561 break;
4562 case 4:
4563 out << "(HxW): (" << pad.get<int>(0) << ", " << pad.get<int>(2)
4564 << ") x (" << pad.get<int>(1) << ", " << pad.get<int>(3) << ")";
4565 break;
4566 case 6:
4567 out << "(DxHxW): (" << pad.get<int>(0) << ", " << pad.get<int>(3)
4568 << ") x (" << pad.get<int>(1) << ", " << pad.get<int>(4)
4569 << ") x (" << pad.get<int>(2) << ", " << pad.get<int>(5) << ")";
4570 break;
4571 default: CV_Error(Error::StsNotImplemented, format("Unsupported pad size = %d", pad.size()));
4572 }
4573 out << "\\l"; // align left
4574 } else if (lp.has("pad_l") && lp.has("pad_t") && lp.has("pad_r") && lp.has("pad_b")) {
4575 DictValue l = lp.get("pad_l");
4576 DictValue t = lp.get("pad_t");
4577 DictValue r = lp.get("pad_r");
4578 DictValue b = lp.get("pad_b");
4579 out << "pad (HxW): (" << t << ", " << b << ") x (" << l << ", " << r << ")";
4580 out << "\\l"; // align left
4581 }
4582 else if (lp.has("pooled_w") || lp.has("pooled_h")) {
4583 DictValue h = lp.get("pooled_h");
4584 DictValue w = lp.get("pooled_w");
4585 out << "pad pooled (HxW): " << h << " x " << w;
4586 out << "\\l"; // align left
4587 }
4588 if (lp.has("pool")) {
4589 out << "pool: " << lp.get("pool");
4590 out << "\\l"; // align left
4591 }
4592 if (lp.has("global_pooling")) {
4593 out << "global_pooling: " << lp.get("global_pooling");
4594 out << "\\l"; // align left
4595 }
4596 if (lp.has("group")) {
4597 out << "group: " << lp.get("group");
4598 out << "\\l"; // align left
4599 }
4600 }
4601 }
4602 if (!ld.outputBlobs.empty())
4603 {
4604 out << "output: " << ld.outputBlobs[0].size;
4605 out << "\\l"; // align left
4606 }
4607
4608 Ptr<BackendNode> layerBackend;
4609 std::map<int, Ptr<BackendNode> >::const_iterator ibn = ld.backendNodes.find(prefBackend);
4610 if (ibn != ld.backendNodes.end())
4611 layerBackend = ibn->second;
4612 out << (!layerBackend.empty() ? backend : "OCV/");
4613 int colorId = 0;
4614 const Target target = ld.layerInstance.empty()
4615 ? DNN_TARGET_CPU
4616 : (Target)(ld.layerInstance->preferableTarget); // TODO fix preferableTarget type
4617 switch (target)
4618 {
4619 case DNN_TARGET_CPU: out << "CPU"; colorId = layerBackend.empty() ? 0 : 5; break;
4620 case DNN_TARGET_OPENCL: out << "OCL"; colorId = 1; break;
4621 case DNN_TARGET_OPENCL_FP16: out << "OCL_FP16"; colorId = 2; break;
4622 case DNN_TARGET_MYRIAD: out << "MYRIAD"; colorId = 3; break;
4623 case DNN_TARGET_HDDL: out << "HDDL"; colorId = 8; break;
4624 case DNN_TARGET_VULKAN: out << "VULKAN"; colorId = 7; break;
4625 case DNN_TARGET_FPGA: out << "FPGA"; colorId = 4; break;
4626 case DNN_TARGET_CUDA: out << "CUDA"; colorId = 5; break;
4627 case DNN_TARGET_CUDA_FP16: out << "CUDA_FP16"; colorId = 6; break;
4628 // don't use default:
4629 }
4630 CV_Assert(colorId < colors.size());
4631 out << "\\n"; // align center
4632 out << ((clusterIds.size() == 1)? "\" " : " }\" ");
4633 out << "fillcolor=\"" << colors[colorId] << "\" ";
4634 out << "style=filled ";
4635 out << "shape=" << ((clusterIds.size() == 1)? "box" : "record") << "]\n";
4636 }
4637 out << '\n';
4638 // Add edges
4639 int inputsSize = hasInput ? netInputLayer->outNames.size() : 0;
4640 for (std::map<int, LayerData>::const_iterator it = map.begin(); it != map.end(); ++it)
4641 {
4642 const LayerData& ld = it->second;
4643 if (allLayers[it->first] == -1) // node
4644 {
4645 for (int i = 0; i < ld.consumers.size(); i++)
4646 {
4647 int outId = ld.consumers[i].lid;
4648 if (it == map.begin() && inputsSize > 1)
4649 out << "\t\"" << ld.name << "_" << i << "\"" << " -> ";
4650 else
4651 out << "\t\"" << ld.name << "\"" << " -> ";
4652 if (allLayers[outId] == -1) // node
4653 {
4654 CV_DbgAssert(map.find(outId) != map.end());
4655 out << "\"" << map.find(outId)->second.name << "\"\n";
4656 }
4657 else // cluster
4658 {
4659 out << "\"" << "cluster_" << allLayers[outId] << "\"\n";
4660 }
4661 }
4662 }
4663 else if (it->first == skippedLayers[allLayers[it->first]].back()) // edges from last layer in cluster
4664 {
4665 for (int i = 0; i < ld.consumers.size(); i++)
4666 {
4667 int outId = ld.consumers[i].lid;
4668 if (allLayers[outId] == -1) // node
4669 {
4670 CV_DbgAssert(map.find(outId) != map.end());
4671 out << "\t\"" << "cluster_" << allLayers[it->first] << "\"" << " -> ";
4672 out << "\"" << map.find(outId)->second.name << "\"\n";
4673 }
4674 else if (allLayers[outId] != allLayers[it->first]) { // another cluster
4675 out << "\t\"" << "cluster_" << allLayers[it->first] << "\"" << " -> ";
4676 out << "\"" << "cluster_" << allLayers[outId] << "\"\n";
4677 }
4678 }
4679 }
4680 }
4681 out << "}\n";
4682 return out.str();
4683 }
4684
dumpToFile(const String & path)4685 void Net::dumpToFile(const String& path) {
4686 std::ofstream file(path.c_str());
4687 file << dump();
4688 file.close();
4689 }
4690
getLayer(LayerId layerId)4691 Ptr<Layer> Net::getLayer(LayerId layerId)
4692 {
4693 LayerData &ld = impl->getLayerData(layerId);
4694 return ld.getLayerInstance();
4695 }
4696
getLayerInputs(LayerId layerId)4697 std::vector<Ptr<Layer> > Net::getLayerInputs(LayerId layerId)
4698 {
4699 LayerData &ld = impl->getLayerData(layerId);
4700
4701 std::vector<Ptr<Layer> > inputLayers;
4702 inputLayers.reserve(ld.inputBlobsId.size());
4703 for (int i = 0; i < ld.inputBlobsId.size(); ++i) {
4704 inputLayers.push_back(getLayer(ld.inputBlobsId[i].lid));
4705 }
4706 return inputLayers;
4707 }
4708
getLayerNames() const4709 std::vector<String> Net::getLayerNames() const
4710 {
4711 CV_TRACE_FUNCTION();
4712
4713 std::vector<String> res;
4714 res.reserve(impl->layers.size());
4715
4716 Impl::MapIdToLayerData::iterator it;
4717 for (it = impl->layers.begin(); it != impl->layers.end(); it++)
4718 {
4719 if (it->second.id) //skip Data layer
4720 res.push_back(it->second.name);
4721 }
4722
4723 return res;
4724 }
4725
empty() const4726 bool Net::empty() const
4727 {
4728 return impl->layers.size() <= 1; //first layer is default Data layer
4729 }
4730
getUnconnectedOutLayers() const4731 std::vector<int> Net::getUnconnectedOutLayers() const
4732 {
4733 std::vector<int> layersIds;
4734
4735 Impl::MapIdToLayerData::iterator it;
4736 for (it = impl->layers.begin(); it != impl->layers.end(); it++)
4737 {
4738 int lid = it->first;
4739 LayerData &ld = it->second;
4740
4741 if (ld.requiredOutputs.size() == 0)
4742 layersIds.push_back(lid);
4743 }
4744
4745 return layersIds;
4746 }
4747
getUnconnectedOutLayersNames() const4748 std::vector<String> Net::getUnconnectedOutLayersNames() const
4749 {
4750 std::vector<int> ids = getUnconnectedOutLayers();
4751 const size_t n = ids.size();
4752 std::vector<String> names(n);
4753 for (size_t i = 0; i < n; ++i)
4754 {
4755 names[i] = impl->layers[ids[i]].name;
4756 }
4757 return names;
4758 }
4759
getLayersShapes(const ShapesVec & netInputShapes,std::vector<int> & layersIds,std::vector<ShapesVec> & inLayersShapes,std::vector<ShapesVec> & outLayersShapes) const4760 void Net::getLayersShapes(const ShapesVec& netInputShapes,
4761 std::vector<int>& layersIds,
4762 std::vector<ShapesVec>& inLayersShapes,
4763 std::vector<ShapesVec>& outLayersShapes) const
4764 {
4765 layersIds.clear();
4766 inLayersShapes.clear();
4767 outLayersShapes.clear();
4768
4769 Impl::LayersShapesMap inOutShapes;
4770 impl->getLayersShapes(netInputShapes, inOutShapes);
4771
4772 for(Impl::LayersShapesMap::const_iterator it = inOutShapes.begin();
4773 it != inOutShapes.end(); it++)
4774 {
4775 layersIds.push_back(it->first);
4776 inLayersShapes.push_back(it->second.in);
4777 outLayersShapes.push_back(it->second.out);
4778 }
4779 }
4780
getLayersShapes(const MatShape & netInputShape,std::vector<int> & layerIds,std::vector<ShapesVec> & inLayersShapes,std::vector<ShapesVec> & outLayersShapes) const4781 void Net::getLayersShapes(const MatShape& netInputShape,
4782 std::vector<int>& layerIds,
4783 std::vector<ShapesVec>& inLayersShapes,
4784 std::vector<ShapesVec>& outLayersShapes) const
4785 {
4786 getLayersShapes(ShapesVec(1, netInputShape),
4787 layerIds, inLayersShapes, outLayersShapes);
4788 }
4789
getLayerShapes(const MatShape & netInputShape,const int layerId,ShapesVec & inLayerShapes,ShapesVec & outLayerShapes) const4790 void Net::getLayerShapes(const MatShape& netInputShape,
4791 const int layerId,
4792 ShapesVec& inLayerShapes,
4793 ShapesVec& outLayerShapes) const
4794 {
4795 getLayerShapes(ShapesVec(1, netInputShape),
4796 layerId, inLayerShapes, outLayerShapes);
4797
4798 }
4799
getLayerShapes(const ShapesVec & netInputShapes,const int layerId,ShapesVec & inLayerShapes,ShapesVec & outLayerShapes) const4800 void Net::getLayerShapes(const ShapesVec& netInputShapes,
4801 const int layerId,
4802 ShapesVec& inLayerShapes,
4803 ShapesVec& outLayerShapes) const
4804 {
4805 LayerShapes shapes;
4806 impl->getLayerShapes(netInputShapes, layerId, shapes);
4807 inLayerShapes = shapes.in;
4808 outLayerShapes = shapes.out;
4809 }
4810
getFLOPS(const std::vector<MatShape> & netInputShapes) const4811 int64 Net::getFLOPS(const std::vector<MatShape>& netInputShapes) const
4812 {
4813 CV_TRACE_FUNCTION();
4814
4815 int64 flops = 0;
4816 std::vector<int> ids;
4817 std::vector<std::vector<MatShape> > inShapes, outShapes;
4818 getLayersShapes(netInputShapes, ids, inShapes, outShapes);
4819 CV_Assert(inShapes.size() == outShapes.size());
4820 CV_Assert(inShapes.size() == ids.size());
4821
4822 for(int i = 0; i < ids.size(); i++)
4823 {
4824 flops += impl->layers[ids[i]].getLayerInstance()->getFLOPS(inShapes[i],
4825 outShapes[i]);
4826 }
4827
4828 return flops;
4829 }
4830
getFLOPS(const MatShape & netInputShape) const4831 int64 Net::getFLOPS(const MatShape& netInputShape) const
4832 {
4833 return getFLOPS(std::vector<MatShape>(1, netInputShape));
4834 }
4835
getFLOPS(const int layerId,const std::vector<MatShape> & netInputShapes) const4836 int64 Net::getFLOPS(const int layerId,
4837 const std::vector<MatShape>& netInputShapes) const
4838 {
4839 Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerId);
4840 CV_Assert(layer != impl->layers.end());
4841
4842 LayerShapes shapes;
4843 impl->getLayerShapes(netInputShapes, layerId, shapes);
4844
4845 return layer->second.getLayerInstance()->getFLOPS(shapes.in, shapes.out);
4846 }
4847
getFLOPS(const int layerId,const MatShape & netInputShape) const4848 int64 Net::getFLOPS(const int layerId,
4849 const MatShape& netInputShape) const
4850 {
4851 return getFLOPS(layerId, std::vector<MatShape>(1, netInputShape));
4852 }
4853
getLayerTypes(std::vector<String> & layersTypes) const4854 void Net::getLayerTypes(std::vector<String>& layersTypes) const
4855 {
4856 layersTypes.clear();
4857
4858 std::map<String, int> layers;
4859 for (Impl::MapIdToLayerData::iterator it = impl->layers.begin();
4860 it != impl->layers.end(); it++)
4861 {
4862 if (layers.find(it->second.type) == layers.end())
4863 layers[it->second.type] = 0;
4864 layers[it->second.type]++;
4865 }
4866
4867 for (std::map<String, int>::iterator it = layers.begin();
4868 it != layers.end(); it++)
4869 {
4870 layersTypes.push_back(it->first);
4871 }
4872 }
4873
getLayersCount(const String & layerType) const4874 int Net::getLayersCount(const String& layerType) const
4875 {
4876 int count = 0;
4877 for (Impl::MapIdToLayerData::iterator it = impl->layers.begin();
4878 it != impl->layers.end(); it++)
4879 {
4880 if (it->second.type == layerType)
4881 count++;
4882 }
4883 return count;
4884 }
4885
getMemoryConsumption(const int layerId,const std::vector<MatShape> & netInputShapes,size_t & weights,size_t & blobs) const4886 void Net::getMemoryConsumption(const int layerId,
4887 const std::vector<MatShape>& netInputShapes,
4888 size_t& weights, size_t& blobs) const
4889 {
4890 CV_TRACE_FUNCTION();
4891
4892 Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerId);
4893 CV_Assert(layer != impl->layers.end());
4894
4895 weights = blobs = 0;
4896
4897 for(int i = 0; i < layer->second.params.blobs.size(); i++)
4898 {
4899 const Mat& weightsBlob = layer->second.params.blobs[i];
4900 weights += weightsBlob.total()*weightsBlob.elemSize();
4901 }
4902
4903 ShapesVec inLayerShapes, outLayerShapes;
4904 getLayerShapes(netInputShapes, layerId, inLayerShapes, outLayerShapes);
4905 for(int i = 0; i < outLayerShapes.size(); i++)
4906 {
4907 blobs += total(outLayerShapes[i]) * sizeof(float);
4908 }
4909 }
4910
getMemoryConsumption(const std::vector<MatShape> & netInputShapes,size_t & weights,size_t & blobs) const4911 void Net::getMemoryConsumption(const std::vector<MatShape>& netInputShapes,
4912 size_t& weights, size_t& blobs) const
4913 {
4914 CV_TRACE_FUNCTION();
4915
4916 std::vector<int> layerIds;
4917 std::vector<size_t> w, b;
4918 getMemoryConsumption(netInputShapes, layerIds, w, b);
4919
4920 weights = blobs = 0;
4921 for(int i = 0; i < layerIds.size(); i++)
4922 {
4923 weights += w[i];
4924 blobs += b[i];
4925 }
4926 }
4927
getMemoryConsumption(const int layerId,const MatShape & netInputShape,size_t & weights,size_t & blobs) const4928 void Net::getMemoryConsumption(const int layerId,
4929 const MatShape& netInputShape,
4930 size_t& weights, size_t& blobs) const
4931 {
4932 getMemoryConsumption(layerId, std::vector<MatShape>(1, netInputShape),
4933 weights, blobs);
4934 }
4935
getMemoryConsumption(const MatShape & netInputShape,size_t & weights,size_t & blobs) const4936 void Net::getMemoryConsumption(const MatShape& netInputShape,
4937 size_t& weights, size_t& blobs) const
4938 {
4939 getMemoryConsumption(std::vector<MatShape>(1, netInputShape),
4940 weights, blobs);
4941 }
4942
getMemoryConsumption(const std::vector<MatShape> & netInputShapes,std::vector<int> & layerIds,std::vector<size_t> & weights,std::vector<size_t> & blobs) const4943 void Net::getMemoryConsumption(const std::vector<MatShape>& netInputShapes,
4944 std::vector<int>& layerIds, std::vector<size_t>& weights,
4945 std::vector<size_t>& blobs) const
4946 {
4947 CV_TRACE_FUNCTION();
4948
4949 layerIds.clear();
4950 weights.clear();
4951 blobs.clear();
4952
4953 std::vector<std::vector<MatShape> > inLayerShapes, outLayerShapes;
4954
4955 getLayersShapes(netInputShapes, layerIds, inLayerShapes, outLayerShapes);
4956
4957 for(int i = 0; i < layerIds.size(); i++)
4958 {
4959 int w = 0, b = 0;
4960 Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerIds[i]);
4961 CV_Assert(layer != impl->layers.end());
4962
4963 for(int j = 0; j < layer->second.params.blobs.size(); j++)
4964 {
4965 const Mat& weightsBlob = layer->second.params.blobs[j];
4966 w += weightsBlob.total()*weightsBlob.elemSize();
4967 }
4968
4969 for(int j = 0; j < outLayerShapes[i].size(); j++)
4970 {
4971 b += total(outLayerShapes[i][j]) * sizeof(float);
4972 }
4973
4974 weights.push_back(w);
4975 blobs.push_back(b);
4976 }
4977 }
4978
getMemoryConsumption(const MatShape & netInputShape,std::vector<int> & layerIds,std::vector<size_t> & weights,std::vector<size_t> & blobs) const4979 void Net::getMemoryConsumption(const MatShape& netInputShape, std::vector<int>& layerIds,
4980 std::vector<size_t>& weights, std::vector<size_t>& blobs) const
4981 {
4982 getMemoryConsumption(std::vector<MatShape>(1, netInputShape), layerIds,
4983 weights, blobs);
4984 }
4985
enableFusion(bool fusion)4986 void Net::enableFusion(bool fusion)
4987 {
4988 if( impl->fusion != fusion )
4989 {
4990 impl->fusion = fusion;
4991 impl->netWasAllocated = false;
4992 impl->clear();
4993 }
4994 }
4995
setHalideScheduler(const String & scheduler)4996 void Net::setHalideScheduler(const String& scheduler)
4997 {
4998 CV_TRACE_FUNCTION();
4999 CV_TRACE_ARG_VALUE(scheduler, "scheduler", scheduler.c_str());
5000
5001 impl->halideConfigFile = scheduler;
5002 }
5003
getPerfProfile(std::vector<double> & timings)5004 int64 Net::getPerfProfile(std::vector<double>& timings)
5005 {
5006 timings = std::vector<double>(impl->layersTimings.begin() + 1, impl->layersTimings.end());
5007 int64 total = (int64)std::accumulate(timings.begin(), timings.end(), 0.0);
5008 return total;
5009 }
5010
5011 //////////////////////////////////////////////////////////////////////////
5012
Layer()5013 Layer::Layer() { preferableTarget = DNN_TARGET_CPU; }
5014
Layer(const LayerParams & params)5015 Layer::Layer(const LayerParams ¶ms)
5016 : blobs(params.blobs), name(params.name), type(params.type)
5017 {
5018 preferableTarget = DNN_TARGET_CPU;
5019 }
5020
setParamsFrom(const LayerParams & params)5021 void Layer::setParamsFrom(const LayerParams ¶ms)
5022 {
5023 blobs = params.blobs;
5024 name = params.name;
5025 type = params.type;
5026 }
5027
inputNameToIndex(String)5028 int Layer::inputNameToIndex(String)
5029 {
5030 return -1;
5031 }
5032
outputNameToIndex(const String &)5033 int Layer::outputNameToIndex(const String&)
5034 {
5035 return 0;
5036 }
5037
supportBackend(int backendId)5038 bool Layer::supportBackend(int backendId)
5039 {
5040 return backendId == DNN_BACKEND_OPENCV;
5041 }
5042
initCUDA(void *,const std::vector<Ptr<BackendWrapper>> &,const std::vector<Ptr<BackendWrapper>> &)5043 Ptr<BackendNode> Layer::initCUDA(
5044 void*,
5045 const std::vector<Ptr<BackendWrapper>>&,
5046 const std::vector<Ptr<BackendWrapper>>&)
5047 {
5048 CV_Error(Error::StsNotImplemented, "CUDA pipeline of " + type +
5049 " layers is not defined.");
5050 return Ptr<BackendNode>();
5051 }
5052
initVkCom(const std::vector<Ptr<BackendWrapper>> &)5053 Ptr<BackendNode> Layer::initVkCom(const std::vector<Ptr<BackendWrapper> > &)
5054 {
5055 CV_Error(Error::StsNotImplemented, "VkCom pipeline of " + type +
5056 " layers is not defined.");
5057 return Ptr<BackendNode>();
5058 }
5059
initHalide(const std::vector<Ptr<BackendWrapper>> &)5060 Ptr<BackendNode> Layer::initHalide(const std::vector<Ptr<BackendWrapper> > &)
5061 {
5062 CV_Error(Error::StsNotImplemented, "Halide pipeline of " + type +
5063 " layers is not defined.");
5064 return Ptr<BackendNode>();
5065 }
5066
initInfEngine(const std::vector<Ptr<BackendWrapper>> &)5067 Ptr<BackendNode> Layer::initInfEngine(const std::vector<Ptr<BackendWrapper> > &)
5068 {
5069 CV_Error(Error::StsNotImplemented, "Inference Engine pipeline of " + type +
5070 " layers is not defined.");
5071 return Ptr<BackendNode>();
5072 }
5073
initNgraph(const std::vector<Ptr<BackendWrapper>> & inputs,const std::vector<Ptr<BackendNode>> & nodes)5074 Ptr<BackendNode> Layer::initNgraph(const std::vector<Ptr<BackendWrapper> > & inputs, const std::vector<Ptr<BackendNode> >& nodes)
5075 {
5076 CV_Error(Error::StsNotImplemented, "Inference Engine pipeline of " + type +
5077 " layers is not defined.");
5078 return Ptr<BackendNode>();
5079 }
5080
applyHalideScheduler(Ptr<BackendNode> & node,const std::vector<Mat * > & inputs,const std::vector<Mat> & outputs,int targetId) const5081 void Layer::applyHalideScheduler(Ptr<BackendNode>& node, const std::vector<Mat*> &inputs,
5082 const std::vector<Mat> &outputs, int targetId) const
5083 {
5084 #ifdef HAVE_HALIDE
5085 CV_TRACE_FUNCTION();
5086
5087 Halide::Var x("x"), y("y"), c("c"), n("n"), co("co"), ci("ci"),
5088 xo("xo"), xi("xi"), yo("yo"), yi("yi"), tile("tile");
5089 Halide::Func& top = node.dynamicCast<HalideBackendNode>()->funcs.back();
5090
5091 int outW, outH, outC, outN;
5092 getCanonicalSize(outputs[0].size, &outW, &outH, &outC, &outN);
5093
5094 if (targetId == DNN_TARGET_CPU)
5095 {
5096 if (outW == 1 && outH == 1)
5097 {
5098 if (outC + outN == 1)
5099 return;
5100
5101 if (outC > 8)
5102 top.split(c, co, ci, 8)
5103 .fuse(x, y, tile).fuse(co, tile, tile).fuse(n, tile, tile)
5104 .parallel(tile)
5105 .vectorize(ci, 8);
5106 else
5107 top.fuse(x, y, tile).fuse(c, tile, tile).fuse(n, tile, tile)
5108 .parallel(tile);
5109 }
5110 else
5111 {
5112 if (outH > 2)
5113 {
5114 top.reorder(x, c, y)
5115 .split(y, yo, yi, 2)
5116 .fuse(yo, n, tile)
5117 .parallel(tile)
5118 .unroll(yi)
5119 .vectorize(x, outW >= 16 ? 16 : outW);
5120 }
5121 }
5122 }
5123 else if (targetId == DNN_TARGET_OPENCL)
5124 {
5125 if (outW == 1 && outH == 1)
5126 {
5127 int c_split = outC > 8 ? (outC > 16 ? 8 : 4) : outC;
5128 top.split(c, co, ci, c_split)
5129 .fuse(x, y, tile).fuse(co, tile, tile).fuse(n, tile, tile)
5130 .gpu_blocks(tile)
5131 .gpu_threads(ci);
5132 }
5133 else
5134 {
5135 int x_split = outW > 8 ? (outW >= 32 ? 16 : 8) : outW;
5136 int y_split = outH > 8 ? (outH >= 32 ? 16 : 8) : outH;
5137 // Supported vectorization widths: 2, 3, 4, 8, 16
5138 int c_split = outC > 8 ? (outC > 16 ? 8 : 4) : std::min(4, outC);
5139 top.split(x, xo, xi, x_split).split(y, yo, yi, y_split)
5140 .split(c, co, ci, c_split)
5141 .gpu_blocks(xo, yo, co)
5142 .gpu_threads(xi, yi)
5143 .reorder(xi, yi, ci, xo, yo, co)
5144 .vectorize(ci);
5145 }
5146 }
5147 else
5148 CV_Error(Error::StsNotImplemented, "Unknown target identifier");
5149 #endif // HAVE_HALIDE
5150 }
5151
tryAttach(const Ptr<BackendNode> & node)5152 Ptr<BackendNode> Layer::tryAttach(const Ptr<BackendNode>& node)
5153 {
5154 return Ptr<BackendNode>();
5155 }
5156
setActivation(const Ptr<ActivationLayer> &)5157 bool Layer::setActivation(const Ptr<ActivationLayer>&) { return false; }
tryFuse(Ptr<Layer> &)5158 bool Layer::tryFuse(Ptr<Layer>&) { return false; }
getScaleShift(Mat & scale,Mat & shift) const5159 void Layer::getScaleShift(Mat& scale, Mat& shift) const
5160 {
5161 scale = Mat();
5162 shift = Mat();
5163 }
5164
unsetAttached()5165 void Layer::unsetAttached()
5166 {
5167 setActivation(Ptr<ActivationLayer>());
5168 }
5169
5170 template <typename T>
vecToPVec(const std::vector<T> & v,std::vector<T * > & pv)5171 static void vecToPVec(const std::vector<T> &v, std::vector<T*> &pv)
5172 {
5173 pv.resize(v.size());
5174 for (size_t i = 0; i < v.size(); i++)
5175 pv[i] = const_cast<T*>(&v[i]);
5176 }
5177
finalize(const std::vector<Mat> & inputs,std::vector<Mat> & outputs)5178 void Layer::finalize(const std::vector<Mat> &inputs, std::vector<Mat> &outputs)
5179 {
5180 CV_TRACE_FUNCTION();
5181 this->finalize((InputArrayOfArrays)inputs, (OutputArrayOfArrays)outputs);
5182 }
5183
finalize(const std::vector<Mat * > & input,std::vector<Mat> & output)5184 void Layer::finalize(const std::vector<Mat*> &input, std::vector<Mat> &output)
5185 {
5186 CV_UNUSED(input);CV_UNUSED(output);
5187 }
5188
finalize(InputArrayOfArrays inputs_arr,OutputArrayOfArrays outputs_arr)5189 void Layer::finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr)
5190 {
5191 CV_TRACE_FUNCTION();
5192 std::vector<Mat> inputs, outputs;
5193 inputs_arr.getMatVector(inputs);
5194 outputs_arr.getMatVector(outputs);
5195
5196 std::vector<Mat*> inputsp;
5197 vecToPVec(inputs, inputsp);
5198 this->finalize(inputsp, outputs);
5199 }
5200
finalize(const std::vector<Mat> & inputs)5201 std::vector<Mat> Layer::finalize(const std::vector<Mat> &inputs)
5202 {
5203 CV_TRACE_FUNCTION();
5204
5205 std::vector<Mat> outputs;
5206 this->finalize(inputs, outputs);
5207 return outputs;
5208 }
5209
forward(std::vector<Mat * > & input,std::vector<Mat> & output,std::vector<Mat> & internals)5210 void Layer::forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &internals)
5211 {
5212 // We kept this method for compatibility. DNN calls it now only to support users' implementations.
5213 }
5214
forward(InputArrayOfArrays inputs_arr,OutputArrayOfArrays outputs_arr,OutputArrayOfArrays internals_arr)5215 void Layer::forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
5216 {
5217 CV_TRACE_FUNCTION();
5218 CV_TRACE_ARG_VALUE(name, "name", name.c_str());
5219
5220 Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
5221 }
5222
forward_fallback(InputArrayOfArrays inputs_arr,OutputArrayOfArrays outputs_arr,OutputArrayOfArrays internals_arr)5223 void Layer::forward_fallback(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
5224 {
5225 CV_TRACE_FUNCTION();
5226 CV_TRACE_ARG_VALUE(name, "name", name.c_str());
5227
5228 if (preferableTarget == DNN_TARGET_OPENCL_FP16 && inputs_arr.depth() == CV_16S)
5229 {
5230 std::vector<UMat> inputs;
5231 std::vector<UMat> outputs;
5232 std::vector<UMat> internals;
5233
5234 std::vector<UMat> orig_inputs;
5235 std::vector<UMat> orig_outputs;
5236 std::vector<UMat> orig_internals;
5237
5238 inputs_arr.getUMatVector(orig_inputs);
5239 outputs_arr.getUMatVector(orig_outputs);
5240 internals_arr.getUMatVector(orig_internals);
5241
5242 inputs.resize(orig_inputs.size());
5243 for (size_t i = 0; i < orig_inputs.size(); i++)
5244 convertFp16(orig_inputs[i], inputs[i]);
5245
5246 outputs.resize(orig_outputs.size());
5247 for (size_t i = 0; i < orig_outputs.size(); i++)
5248 outputs[i].create(shape(orig_outputs[i]), CV_32F);
5249
5250 internals.resize(orig_internals.size());
5251 for (size_t i = 0; i < orig_internals.size(); i++)
5252 internals[i].create(shape(orig_internals[i]), CV_32F);
5253
5254 forward(inputs, outputs, internals);
5255
5256 for (size_t i = 0; i < outputs.size(); i++)
5257 convertFp16(outputs[i], orig_outputs[i]);
5258
5259 // sync results back
5260 outputs_arr.assign(orig_outputs);
5261 internals_arr.assign(orig_internals);
5262 return;
5263 }
5264 std::vector<Mat> inpvec;
5265 std::vector<Mat> outputs;
5266 std::vector<Mat> internals;
5267
5268 inputs_arr.getMatVector(inpvec);
5269 outputs_arr.getMatVector(outputs);
5270 internals_arr.getMatVector(internals);
5271
5272 std::vector<Mat*> inputs(inpvec.size());
5273 for (int i = 0; i < inpvec.size(); i++)
5274 inputs[i] = &inpvec[i];
5275
5276 this->forward(inputs, outputs, internals);
5277
5278 // sync results back
5279 outputs_arr.assign(outputs);
5280 internals_arr.assign(internals);
5281 }
5282
run(const std::vector<Mat> & inputs,std::vector<Mat> & outputs,std::vector<Mat> & internals)5283 void Layer::run(const std::vector<Mat> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
5284 {
5285 CV_TRACE_FUNCTION();
5286
5287 this->finalize(inputs, outputs);
5288 this->forward(inputs, outputs, internals);
5289 }
5290
~Layer()5291 Layer::~Layer() {}
5292
getMemoryShapes(const std::vector<MatShape> & inputs,const int requiredOutputs,std::vector<MatShape> & outputs,std::vector<MatShape> & internals) const5293 bool Layer::getMemoryShapes(const std::vector<MatShape> &inputs,
5294 const int requiredOutputs,
5295 std::vector<MatShape> &outputs,
5296 std::vector<MatShape> &internals) const
5297 {
5298 CV_Assert(inputs.size());
5299 outputs.assign(std::max(requiredOutputs, (int)inputs.size()), inputs[0]);
5300 return false;
5301 }
5302
updateMemoryShapes(const std::vector<MatShape> & inputs)5303 bool Layer::updateMemoryShapes(const std::vector<MatShape> &inputs)
5304 {
5305 return true;
5306 }
5307 //////////////////////////////////////////////////////////////////////////
5308
getLayerFactoryMutex()5309 static Mutex& getLayerFactoryMutex()
5310 {
5311 static Mutex* volatile instance = NULL;
5312 if (instance == NULL)
5313 {
5314 cv::AutoLock lock(getInitializationMutex());
5315 if (instance == NULL)
5316 instance = new Mutex();
5317 }
5318 return *instance;
5319 }
5320
getLayerFactoryImpl_()5321 static LayerFactory_Impl& getLayerFactoryImpl_()
5322 {
5323 static LayerFactory_Impl impl;
5324 return impl;
5325 }
5326
getLayerFactoryImpl()5327 LayerFactory_Impl& getLayerFactoryImpl()
5328 {
5329 static LayerFactory_Impl* volatile instance = NULL;
5330 if (instance == NULL)
5331 {
5332 cv::AutoLock lock(getLayerFactoryMutex());
5333 if (instance == NULL)
5334 {
5335 instance = &getLayerFactoryImpl_();
5336 initializeLayerFactory();
5337 }
5338 }
5339 return *instance;
5340 }
5341
registerLayer(const String & type,Constructor constructor)5342 void LayerFactory::registerLayer(const String &type, Constructor constructor)
5343 {
5344 CV_TRACE_FUNCTION();
5345 CV_TRACE_ARG_VALUE(type, "type", type.c_str());
5346
5347 cv::AutoLock lock(getLayerFactoryMutex());
5348 LayerFactory_Impl::iterator it = getLayerFactoryImpl().find(type);
5349
5350 if (it != getLayerFactoryImpl().end())
5351 {
5352 if (it->second.back() == constructor)
5353 CV_Error(cv::Error::StsBadArg, "Layer \"" + type + "\" already was registered");
5354 it->second.push_back(constructor);
5355 }
5356 getLayerFactoryImpl().insert(std::make_pair(type, std::vector<Constructor>(1, constructor)));
5357 }
5358
unregisterLayer(const String & type)5359 void LayerFactory::unregisterLayer(const String &type)
5360 {
5361 CV_TRACE_FUNCTION();
5362 CV_TRACE_ARG_VALUE(type, "type", type.c_str());
5363
5364 cv::AutoLock lock(getLayerFactoryMutex());
5365
5366 LayerFactory_Impl::iterator it = getLayerFactoryImpl().find(type);
5367 if (it != getLayerFactoryImpl().end())
5368 {
5369 if (it->second.size() > 1)
5370 it->second.pop_back();
5371 else
5372 getLayerFactoryImpl().erase(it);
5373 }
5374 }
5375
createLayerInstance(const String & type,LayerParams & params)5376 Ptr<Layer> LayerFactory::createLayerInstance(const String &type, LayerParams& params)
5377 {
5378 CV_TRACE_FUNCTION();
5379 CV_TRACE_ARG_VALUE(type, "type", type.c_str());
5380
5381 cv::AutoLock lock(getLayerFactoryMutex());
5382 LayerFactory_Impl::const_iterator it = getLayerFactoryImpl().find(type);
5383
5384 if (it != getLayerFactoryImpl().end())
5385 {
5386 CV_Assert(!it->second.empty());
5387 return it->second.back()(params);
5388 }
5389 else
5390 {
5391 return Ptr<Layer>(); //NULL
5392 }
5393 }
5394
BackendNode(int backendId)5395 BackendNode::BackendNode(int backendId) : backendId(backendId) {}
5396
~BackendNode()5397 BackendNode::~BackendNode() {};
5398
BackendWrapper(int backendId,int targetId)5399 BackendWrapper::BackendWrapper(int backendId, int targetId)
5400 : backendId(backendId), targetId(targetId) {}
5401
BackendWrapper(int targetId,const cv::Mat & m)5402 BackendWrapper::BackendWrapper(int targetId, const cv::Mat& m)
5403 {
5404 CV_Error(Error::StsNotImplemented,
5405 "Constructor of backend wrapper must be implemented");
5406 }
5407
BackendWrapper(const Ptr<BackendWrapper> & base,const MatShape & shape)5408 BackendWrapper::BackendWrapper(const Ptr<BackendWrapper>& base, const MatShape& shape)
5409 {
5410 CV_Error(Error::StsNotImplemented,
5411 "Constructor of backend wrapper must be implemented");
5412 }
5413
~BackendWrapper()5414 BackendWrapper::~BackendWrapper() {}
5415
readNet(const String & _model,const String & _config,const String & _framework)5416 Net readNet(const String& _model, const String& _config, const String& _framework)
5417 {
5418 String framework = toLowerCase(_framework);
5419 String model = _model;
5420 String config = _config;
5421 const std::string modelExt = model.substr(model.rfind('.') + 1);
5422 const std::string configExt = config.substr(config.rfind('.') + 1);
5423 if (framework == "caffe" || modelExt == "caffemodel" || configExt == "caffemodel" ||
5424 modelExt == "prototxt" || configExt == "prototxt")
5425 {
5426 if (modelExt == "prototxt" || configExt == "caffemodel")
5427 std::swap(model, config);
5428 return readNetFromCaffe(config, model);
5429 }
5430 if (framework == "tensorflow" || modelExt == "pb" || configExt == "pb" ||
5431 modelExt == "pbtxt" || configExt == "pbtxt")
5432 {
5433 if (modelExt == "pbtxt" || configExt == "pb")
5434 std::swap(model, config);
5435 return readNetFromTensorflow(model, config);
5436 }
5437 if (framework == "torch" || modelExt == "t7" || modelExt == "net" ||
5438 configExt == "t7" || configExt == "net")
5439 {
5440 return readNetFromTorch(model.empty() ? config : model);
5441 }
5442 if (framework == "darknet" || modelExt == "weights" || configExt == "weights" ||
5443 modelExt == "cfg" || configExt == "cfg")
5444 {
5445 if (modelExt == "cfg" || configExt == "weights")
5446 std::swap(model, config);
5447 return readNetFromDarknet(config, model);
5448 }
5449 if (framework == "dldt" || modelExt == "bin" || configExt == "bin" ||
5450 modelExt == "xml" || configExt == "xml")
5451 {
5452 if (modelExt == "xml" || configExt == "bin")
5453 std::swap(model, config);
5454 return readNetFromModelOptimizer(config, model);
5455 }
5456 if (framework == "onnx" || modelExt == "onnx")
5457 {
5458 return readNetFromONNX(model);
5459 }
5460 CV_Error(Error::StsError, "Cannot determine an origin framework of files: " +
5461 model + (config.empty() ? "" : ", " + config));
5462 }
5463
readNet(const String & _framework,const std::vector<uchar> & bufferModel,const std::vector<uchar> & bufferConfig)5464 Net readNet(const String& _framework, const std::vector<uchar>& bufferModel,
5465 const std::vector<uchar>& bufferConfig)
5466 {
5467 String framework = toLowerCase(_framework);
5468 if (framework == "caffe")
5469 return readNetFromCaffe(bufferConfig, bufferModel);
5470 else if (framework == "tensorflow")
5471 return readNetFromTensorflow(bufferModel, bufferConfig);
5472 else if (framework == "darknet")
5473 return readNetFromDarknet(bufferConfig, bufferModel);
5474 else if (framework == "torch")
5475 CV_Error(Error::StsNotImplemented, "Reading Torch models from buffers");
5476 else if (framework == "dldt")
5477 return readNetFromModelOptimizer(bufferConfig, bufferModel);
5478 CV_Error(Error::StsError, "Cannot determine an origin framework with a name " + framework);
5479 }
5480
readNetFromModelOptimizer(const String & xml,const String & bin)5481 Net readNetFromModelOptimizer(const String &xml, const String &bin)
5482 {
5483 return Net::readFromModelOptimizer(xml, bin);
5484 }
5485
readNetFromModelOptimizer(const std::vector<uchar> & bufferCfg,const std::vector<uchar> & bufferModel)5486 Net readNetFromModelOptimizer(const std::vector<uchar>& bufferCfg, const std::vector<uchar>& bufferModel)
5487 {
5488 return Net::readFromModelOptimizer(bufferCfg, bufferModel);
5489 }
5490
readNetFromModelOptimizer(const uchar * bufferModelConfigPtr,size_t bufferModelConfigSize,const uchar * bufferWeightsPtr,size_t bufferWeightsSize)5491 Net readNetFromModelOptimizer(
5492 const uchar* bufferModelConfigPtr, size_t bufferModelConfigSize,
5493 const uchar* bufferWeightsPtr, size_t bufferWeightsSize
5494 )
5495 {
5496 return Net::readFromModelOptimizer(
5497 bufferModelConfigPtr, bufferModelConfigSize,
5498 bufferWeightsPtr, bufferWeightsSize
5499 );
5500 }
5501
5502 CV__DNN_INLINE_NS_END
5503 }} // namespace
5504