1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved. 14 // Third party copyrights are property of their respective owners. 15 // 16 // Redistribution and use in source and binary forms, with or without modification, 17 // are permitted provided that the following conditions are met: 18 // 19 // * Redistribution's of source code must retain the above copyright notice, 20 // this list of conditions and the following disclaimer. 21 // 22 // * Redistribution's in binary form must reproduce the above copyright notice, 23 // this list of conditions and the following disclaimer in the documentation 24 // and/or other materials provided with the distribution. 25 // 26 // * The name of the copyright holders may not be used to endorse or promote products 27 // derived from this software without specific prior written permission. 28 // 29 // This software is provided by the copyright holders and contributors "as is" and 30 // any express or implied warranties, including, but not limited to, the implied 31 // warranties of merchantability and fitness for a particular purpose are disclaimed. 32 // In no event shall the Intel Corporation or contributors be liable for any direct, 33 // indirect, incidental, special, exemplary, or consequential damages 34 // (including, but not limited to, procurement of substitute goods or services; 35 // loss of use, data, or profits; or business interruption) however caused 36 // and on any theory of liability, whether in contract, strict liability, 37 // or tort (including negligence or otherwise) arising in any way out of 38 // the use of this software, even if advised of the possibility of such damage. 39 // 40 //M*/ 41 42 #ifndef OPENCV_DNN_DNN_HPP 43 #define OPENCV_DNN_DNN_HPP 44 45 #include <vector> 46 #include <opencv2/core.hpp> 47 #include "opencv2/core/async.hpp" 48 49 #include "../dnn/version.hpp" 50 51 #include <opencv2/dnn/dict.hpp> 52 53 namespace cv { 54 namespace dnn { 55 CV__DNN_INLINE_NS_BEGIN 56 //! @addtogroup dnn 57 //! @{ 58 59 typedef std::vector<int> MatShape; 60 61 /** 62 * @brief Enum of computation backends supported by layers. 63 * @see Net::setPreferableBackend 64 */ 65 enum Backend 66 { 67 //! DNN_BACKEND_DEFAULT equals to DNN_BACKEND_INFERENCE_ENGINE if 68 //! OpenCV is built with Intel's Inference Engine library or 69 //! DNN_BACKEND_OPENCV otherwise. 70 DNN_BACKEND_DEFAULT = 0, 71 DNN_BACKEND_HALIDE, 72 DNN_BACKEND_INFERENCE_ENGINE, //!< Intel's Inference Engine computational backend 73 //!< @sa setInferenceEngineBackendType 74 DNN_BACKEND_OPENCV, 75 DNN_BACKEND_VKCOM, 76 DNN_BACKEND_CUDA, 77 #ifdef __OPENCV_BUILD 78 DNN_BACKEND_INFERENCE_ENGINE_NGRAPH = 1000000, // internal - use DNN_BACKEND_INFERENCE_ENGINE + setInferenceEngineBackendType() 79 DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, // internal - use DNN_BACKEND_INFERENCE_ENGINE + setInferenceEngineBackendType() 80 #endif 81 }; 82 83 /** 84 * @brief Enum of target devices for computations. 85 * @see Net::setPreferableTarget 86 */ 87 enum Target 88 { 89 DNN_TARGET_CPU = 0, 90 DNN_TARGET_OPENCL, 91 DNN_TARGET_OPENCL_FP16, 92 DNN_TARGET_MYRIAD, 93 DNN_TARGET_VULKAN, 94 DNN_TARGET_FPGA, //!< FPGA device with CPU fallbacks using Inference Engine's Heterogeneous plugin. 95 DNN_TARGET_CUDA, 96 DNN_TARGET_CUDA_FP16, 97 DNN_TARGET_HDDL 98 }; 99 100 CV_EXPORTS std::vector< std::pair<Backend, Target> > getAvailableBackends(); 101 CV_EXPORTS_W std::vector<Target> getAvailableTargets(dnn::Backend be); 102 103 /** 104 * @brief Enables detailed logging of the DNN model loading with CV DNN API. 105 * @param[in] isDiagnosticsMode Indicates whether diagnostic mode should be set. 106 * 107 * Diagnostic mode provides detailed logging of the model loading stage to explore 108 * potential problems (ex.: not implemented layer type). 109 * 110 * @note In diagnostic mode series of assertions will be skipped, it can lead to the 111 * expected application crash. 112 */ 113 CV_EXPORTS void enableModelDiagnostics(bool isDiagnosticsMode); 114 115 /** @brief This class provides all data needed to initialize layer. 116 * 117 * It includes dictionary with scalar params (which can be read by using Dict interface), 118 * blob params #blobs and optional meta information: #name and #type of layer instance. 119 */ 120 class CV_EXPORTS LayerParams : public Dict 121 { 122 public: 123 //TODO: Add ability to name blob params 124 std::vector<Mat> blobs; //!< List of learned parameters stored as blobs. 125 126 String name; //!< Name of the layer instance (optional, can be used internal purposes). 127 String type; //!< Type name which was used for creating layer by layer factory (optional). 128 }; 129 130 /** 131 * @brief Derivatives of this class encapsulates functions of certain backends. 132 */ 133 class BackendNode 134 { 135 public: 136 BackendNode(int backendId); 137 138 virtual ~BackendNode(); //!< Virtual destructor to make polymorphism. 139 140 int backendId; //!< Backend identifier. 141 }; 142 143 /** 144 * @brief Derivatives of this class wraps cv::Mat for different backends and targets. 145 */ 146 class BackendWrapper 147 { 148 public: 149 BackendWrapper(int backendId, int targetId); 150 151 /** 152 * @brief Wrap cv::Mat for specific backend and target. 153 * @param[in] targetId Target identifier. 154 * @param[in] m cv::Mat for wrapping. 155 * 156 * Make CPU->GPU data transfer if it's require for the target. 157 */ 158 BackendWrapper(int targetId, const cv::Mat& m); 159 160 /** 161 * @brief Make wrapper for reused cv::Mat. 162 * @param[in] base Wrapper of cv::Mat that will be reused. 163 * @param[in] shape Specific shape. 164 * 165 * Initialize wrapper from another one. It'll wrap the same host CPU 166 * memory and mustn't allocate memory on device(i.e. GPU). It might 167 * has different shape. Use in case of CPU memory reusing for reuse 168 * associated memory on device too. 169 */ 170 BackendWrapper(const Ptr<BackendWrapper>& base, const MatShape& shape); 171 172 virtual ~BackendWrapper(); //!< Virtual destructor to make polymorphism. 173 174 /** 175 * @brief Transfer data to CPU host memory. 176 */ 177 virtual void copyToHost() = 0; 178 179 /** 180 * @brief Indicate that an actual data is on CPU. 181 */ 182 virtual void setHostDirty() = 0; 183 184 int backendId; //!< Backend identifier. 185 int targetId; //!< Target identifier. 186 }; 187 188 class CV_EXPORTS ActivationLayer; 189 190 /** @brief This interface class allows to build new Layers - are building blocks of networks. 191 * 192 * Each class, derived from Layer, must implement allocate() methods to declare own outputs and forward() to compute outputs. 193 * Also before using the new layer into networks you must register your layer by using one of @ref dnnLayerFactory "LayerFactory" macros. 194 */ 195 class CV_EXPORTS_W Layer : public Algorithm 196 { 197 public: 198 199 //! List of learned parameters must be stored here to allow read them by using Net::getParam(). 200 CV_PROP_RW std::vector<Mat> blobs; 201 202 /** @brief Computes and sets internal parameters according to inputs, outputs and blobs. 203 * @deprecated Use Layer::finalize(InputArrayOfArrays, OutputArrayOfArrays) instead 204 * @param[in] input vector of already allocated input blobs 205 * @param[out] output vector of already allocated output blobs 206 * 207 * If this method is called after network has allocated all memory for input and output blobs 208 * and before inferencing. 209 */ 210 CV_DEPRECATED_EXTERNAL 211 virtual void finalize(const std::vector<Mat*> &input, std::vector<Mat> &output); 212 213 /** @brief Computes and sets internal parameters according to inputs, outputs and blobs. 214 * @param[in] inputs vector of already allocated input blobs 215 * @param[out] outputs vector of already allocated output blobs 216 * 217 * If this method is called after network has allocated all memory for input and output blobs 218 * and before inferencing. 219 */ 220 CV_WRAP virtual void finalize(InputArrayOfArrays inputs, OutputArrayOfArrays outputs); 221 222 /** @brief Given the @p input blobs, computes the output @p blobs. 223 * @deprecated Use Layer::forward(InputArrayOfArrays, OutputArrayOfArrays, OutputArrayOfArrays) instead 224 * @param[in] input the input blobs. 225 * @param[out] output allocated output blobs, which will store results of the computation. 226 * @param[out] internals allocated internal blobs 227 */ 228 CV_DEPRECATED_EXTERNAL 229 virtual void forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &internals); 230 231 /** @brief Given the @p input blobs, computes the output @p blobs. 232 * @param[in] inputs the input blobs. 233 * @param[out] outputs allocated output blobs, which will store results of the computation. 234 * @param[out] internals allocated internal blobs 235 */ 236 virtual void forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals); 237 238 /** @brief Given the @p input blobs, computes the output @p blobs. 239 * @param[in] inputs the input blobs. 240 * @param[out] outputs allocated output blobs, which will store results of the computation. 241 * @param[out] internals allocated internal blobs 242 */ 243 void forward_fallback(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals); 244 245 /** @brief 246 * @overload 247 * @deprecated Use Layer::finalize(InputArrayOfArrays, OutputArrayOfArrays) instead 248 */ 249 CV_DEPRECATED_EXTERNAL 250 void finalize(const std::vector<Mat> &inputs, CV_OUT std::vector<Mat> &outputs); 251 252 /** @brief 253 * @overload 254 * @deprecated Use Layer::finalize(InputArrayOfArrays, OutputArrayOfArrays) instead 255 */ 256 CV_DEPRECATED std::vector<Mat> finalize(const std::vector<Mat> &inputs); 257 258 /** @brief Allocates layer and computes output. 259 * @deprecated This method will be removed in the future release. 260 */ 261 CV_DEPRECATED CV_WRAP void run(const std::vector<Mat> &inputs, CV_OUT std::vector<Mat> &outputs, 262 CV_IN_OUT std::vector<Mat> &internals); 263 264 /** @brief Returns index of input blob into the input array. 265 * @param inputName label of input blob 266 * 267 * Each layer input and output can be labeled to easily identify them using "%<layer_name%>[.output_name]" notation. 268 * This method maps label of input blob to its index into input vector. 269 */ 270 virtual int inputNameToIndex(String inputName); 271 /** @brief Returns index of output blob in output array. 272 * @see inputNameToIndex() 273 */ 274 CV_WRAP virtual int outputNameToIndex(const String& outputName); 275 276 /** 277 * @brief Ask layer if it support specific backend for doing computations. 278 * @param[in] backendId computation backend identifier. 279 * @see Backend 280 */ 281 virtual bool supportBackend(int backendId); 282 283 /** 284 * @brief Returns Halide backend node. 285 * @param[in] inputs Input Halide buffers. 286 * @see BackendNode, BackendWrapper 287 * 288 * Input buffers should be exactly the same that will be used in forward invocations. 289 * Despite we can use Halide::ImageParam based on input shape only, 290 * it helps prevent some memory management issues (if something wrong, 291 * Halide tests will be failed). 292 */ 293 virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs); 294 295 virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> > &inputs); 296 297 virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> > &inputs, const std::vector<Ptr<BackendNode> >& nodes); 298 299 virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &inputs); 300 301 /** 302 * @brief Returns a CUDA backend node 303 * 304 * @param context void pointer to CSLContext object 305 * @param inputs layer inputs 306 * @param outputs layer outputs 307 */ 308 virtual Ptr<BackendNode> initCUDA( 309 void *context, 310 const std::vector<Ptr<BackendWrapper>>& inputs, 311 const std::vector<Ptr<BackendWrapper>>& outputs 312 ); 313 314 /** 315 * @brief Automatic Halide scheduling based on layer hyper-parameters. 316 * @param[in] node Backend node with Halide functions. 317 * @param[in] inputs Blobs that will be used in forward invocations. 318 * @param[in] outputs Blobs that will be used in forward invocations. 319 * @param[in] targetId Target identifier 320 * @see BackendNode, Target 321 * 322 * Layer don't use own Halide::Func members because we can have applied 323 * layers fusing. In this way the fused function should be scheduled. 324 */ 325 virtual void applyHalideScheduler(Ptr<BackendNode>& node, 326 const std::vector<Mat*> &inputs, 327 const std::vector<Mat> &outputs, 328 int targetId) const; 329 330 /** 331 * @brief Implement layers fusing. 332 * @param[in] node Backend node of bottom layer. 333 * @see BackendNode 334 * 335 * Actual for graph-based backends. If layer attached successfully, 336 * returns non-empty cv::Ptr to node of the same backend. 337 * Fuse only over the last function. 338 */ 339 virtual Ptr<BackendNode> tryAttach(const Ptr<BackendNode>& node); 340 341 /** 342 * @brief Tries to attach to the layer the subsequent activation layer, i.e. do the layer fusion in a partial case. 343 * @param[in] layer The subsequent activation layer. 344 * 345 * Returns true if the activation layer has been attached successfully. 346 */ 347 virtual bool setActivation(const Ptr<ActivationLayer>& layer); 348 349 /** 350 * @brief Try to fuse current layer with a next one 351 * @param[in] top Next layer to be fused. 352 * @returns True if fusion was performed. 353 */ 354 virtual bool tryFuse(Ptr<Layer>& top); 355 356 /** 357 * @brief Returns parameters of layers with channel-wise multiplication and addition. 358 * @param[out] scale Channel-wise multipliers. Total number of values should 359 * be equal to number of channels. 360 * @param[out] shift Channel-wise offsets. Total number of values should 361 * be equal to number of channels. 362 * 363 * Some layers can fuse their transformations with further layers. 364 * In example, convolution + batch normalization. This way base layer 365 * use weights from layer after it. Fused layer is skipped. 366 * By default, @p scale and @p shift are empty that means layer has no 367 * element-wise multiplications or additions. 368 */ 369 virtual void getScaleShift(Mat& scale, Mat& shift) const; 370 371 /** 372 * @brief "Deattaches" all the layers, attached to particular layer. 373 */ 374 virtual void unsetAttached(); 375 376 virtual bool getMemoryShapes(const std::vector<MatShape> &inputs, 377 const int requiredOutputs, 378 std::vector<MatShape> &outputs, 379 std::vector<MatShape> &internals) const; 380 getFLOPS(const std::vector<MatShape> & inputs,const std::vector<MatShape> & outputs) const381 virtual int64 getFLOPS(const std::vector<MatShape> &inputs, 382 const std::vector<MatShape> &outputs) const {CV_UNUSED(inputs); CV_UNUSED(outputs); return 0;} 383 384 virtual bool updateMemoryShapes(const std::vector<MatShape> &inputs); 385 386 CV_PROP String name; //!< Name of the layer instance, can be used for logging or other internal purposes. 387 CV_PROP String type; //!< Type name which was used for creating layer by layer factory. 388 CV_PROP int preferableTarget; //!< prefer target for layer forwarding 389 390 Layer(); 391 explicit Layer(const LayerParams ¶ms); //!< Initializes only #name, #type and #blobs fields. 392 void setParamsFrom(const LayerParams ¶ms); //!< Initializes only #name, #type and #blobs fields. 393 virtual ~Layer(); 394 }; 395 396 /** @brief This class allows to create and manipulate comprehensive artificial neural networks. 397 * 398 * Neural network is presented as directed acyclic graph (DAG), where vertices are Layer instances, 399 * and edges specify relationships between layers inputs and outputs. 400 * 401 * Each network layer has unique integer id and unique string name inside its network. 402 * LayerId can store either layer name or layer id. 403 * 404 * This class supports reference counting of its instances, i. e. copies point to the same instance. 405 */ 406 class CV_EXPORTS_W_SIMPLE Net 407 { 408 public: 409 410 CV_WRAP Net(); //!< Default constructor. 411 CV_WRAP ~Net(); //!< Destructor frees the net only if there aren't references to the net anymore. 412 413 /** @brief Create a network from Intel's Model Optimizer intermediate representation (IR). 414 * @param[in] xml XML configuration file with network's topology. 415 * @param[in] bin Binary file with trained weights. 416 * Networks imported from Intel's Model Optimizer are launched in Intel's Inference Engine 417 * backend. 418 */ 419 CV_WRAP static Net readFromModelOptimizer(const String& xml, const String& bin); 420 421 /** @brief Create a network from Intel's Model Optimizer in-memory buffers with intermediate representation (IR). 422 * @param[in] bufferModelConfig buffer with model's configuration. 423 * @param[in] bufferWeights buffer with model's trained weights. 424 * @returns Net object. 425 */ 426 CV_WRAP static 427 Net readFromModelOptimizer(const std::vector<uchar>& bufferModelConfig, const std::vector<uchar>& bufferWeights); 428 429 /** @brief Create a network from Intel's Model Optimizer in-memory buffers with intermediate representation (IR). 430 * @param[in] bufferModelConfigPtr buffer pointer of model's configuration. 431 * @param[in] bufferModelConfigSize buffer size of model's configuration. 432 * @param[in] bufferWeightsPtr buffer pointer of model's trained weights. 433 * @param[in] bufferWeightsSize buffer size of model's trained weights. 434 * @returns Net object. 435 */ 436 static 437 Net readFromModelOptimizer(const uchar* bufferModelConfigPtr, size_t bufferModelConfigSize, 438 const uchar* bufferWeightsPtr, size_t bufferWeightsSize); 439 440 /** Returns true if there are no layers in the network. */ 441 CV_WRAP bool empty() const; 442 443 /** @brief Dump net to String 444 * @returns String with structure, hyperparameters, backend, target and fusion 445 * Call method after setInput(). To see correct backend, target and fusion run after forward(). 446 */ 447 CV_WRAP String dump(); 448 /** @brief Dump net structure, hyperparameters, backend, target and fusion to dot file 449 * @param path path to output file with .dot extension 450 * @see dump() 451 */ 452 CV_WRAP void dumpToFile(const String& path); 453 /** @brief Adds new layer to the net. 454 * @param name unique name of the adding layer. 455 * @param type typename of the adding layer (type must be registered in LayerRegister). 456 * @param params parameters which will be used to initialize the creating layer. 457 * @returns unique identifier of created layer, or -1 if a failure will happen. 458 */ 459 int addLayer(const String &name, const String &type, LayerParams ¶ms); 460 /** @brief Adds new layer and connects its first input to the first output of previously added layer. 461 * @see addLayer() 462 */ 463 int addLayerToPrev(const String &name, const String &type, LayerParams ¶ms); 464 465 /** @brief Converts string name of the layer to the integer identifier. 466 * @returns id of the layer, or -1 if the layer wasn't found. 467 */ 468 CV_WRAP int getLayerId(const String &layer); 469 470 CV_WRAP std::vector<String> getLayerNames() const; 471 472 /** @brief Container for strings and integers. */ 473 typedef DictValue LayerId; 474 475 /** @brief Returns pointer to layer with specified id or name which the network use. */ 476 CV_WRAP Ptr<Layer> getLayer(LayerId layerId); 477 478 /** @brief Returns pointers to input layers of specific layer. */ 479 std::vector<Ptr<Layer> > getLayerInputs(LayerId layerId); // FIXIT: CV_WRAP 480 481 /** @brief Connects output of the first layer to input of the second layer. 482 * @param outPin descriptor of the first layer output. 483 * @param inpPin descriptor of the second layer input. 484 * 485 * Descriptors have the following template <DFN><layer_name>[.input_number]</DFN>: 486 * - the first part of the template <DFN>layer_name</DFN> is string name of the added layer. 487 * If this part is empty then the network input pseudo layer will be used; 488 * - the second optional part of the template <DFN>input_number</DFN> 489 * is either number of the layer input, either label one. 490 * If this part is omitted then the first layer input will be used. 491 * 492 * @see setNetInputs(), Layer::inputNameToIndex(), Layer::outputNameToIndex() 493 */ 494 CV_WRAP void connect(String outPin, String inpPin); 495 496 /** @brief Connects #@p outNum output of the first layer to #@p inNum input of the second layer. 497 * @param outLayerId identifier of the first layer 498 * @param outNum number of the first layer output 499 * @param inpLayerId identifier of the second layer 500 * @param inpNum number of the second layer input 501 */ 502 void connect(int outLayerId, int outNum, int inpLayerId, int inpNum); 503 504 /** @brief Sets outputs names of the network input pseudo layer. 505 * 506 * Each net always has special own the network input pseudo layer with id=0. 507 * This layer stores the user blobs only and don't make any computations. 508 * In fact, this layer provides the only way to pass user data into the network. 509 * As any other layer, this layer can label its outputs and this function provides an easy way to do this. 510 */ 511 CV_WRAP void setInputsNames(const std::vector<String> &inputBlobNames); 512 513 /** @brief Specify shape of network input. 514 */ 515 CV_WRAP void setInputShape(const String &inputName, const MatShape& shape); 516 517 /** @brief Runs forward pass to compute output of layer with name @p outputName. 518 * @param outputName name for layer which output is needed to get 519 * @return blob for first output of specified layer. 520 * @details By default runs forward pass for the whole network. 521 */ 522 CV_WRAP Mat forward(const String& outputName = String()); 523 524 /** @brief Runs forward pass to compute output of layer with name @p outputName. 525 * @param outputName name for layer which output is needed to get 526 * @details By default runs forward pass for the whole network. 527 * 528 * This is an asynchronous version of forward(const String&). 529 * dnn::DNN_BACKEND_INFERENCE_ENGINE backend is required. 530 */ 531 CV_WRAP AsyncArray forwardAsync(const String& outputName = String()); 532 533 /** @brief Runs forward pass to compute output of layer with name @p outputName. 534 * @param outputBlobs contains all output blobs for specified layer. 535 * @param outputName name for layer which output is needed to get 536 * @details If @p outputName is empty, runs forward pass for the whole network. 537 */ 538 CV_WRAP void forward(OutputArrayOfArrays outputBlobs, const String& outputName = String()); 539 540 /** @brief Runs forward pass to compute outputs of layers listed in @p outBlobNames. 541 * @param outputBlobs contains blobs for first outputs of specified layers. 542 * @param outBlobNames names for layers which outputs are needed to get 543 */ 544 CV_WRAP void forward(OutputArrayOfArrays outputBlobs, 545 const std::vector<String>& outBlobNames); 546 547 /** @brief Runs forward pass to compute outputs of layers listed in @p outBlobNames. 548 * @param outputBlobs contains all output blobs for each layer specified in @p outBlobNames. 549 * @param outBlobNames names for layers which outputs are needed to get 550 */ 551 CV_WRAP_AS(forwardAndRetrieve) void forward(CV_OUT std::vector<std::vector<Mat> >& outputBlobs, 552 const std::vector<String>& outBlobNames); 553 554 /** 555 * @brief Compile Halide layers. 556 * @param[in] scheduler Path to YAML file with scheduling directives. 557 * @see setPreferableBackend 558 * 559 * Schedule layers that support Halide backend. Then compile them for 560 * specific target. For layers that not represented in scheduling file 561 * or if no manual scheduling used at all, automatic scheduling will be applied. 562 */ 563 CV_WRAP void setHalideScheduler(const String& scheduler); 564 565 /** 566 * @brief Ask network to use specific computation backend where it supported. 567 * @param[in] backendId backend identifier. 568 * @see Backend 569 * 570 * If OpenCV is compiled with Intel's Inference Engine library, DNN_BACKEND_DEFAULT 571 * means DNN_BACKEND_INFERENCE_ENGINE. Otherwise it equals to DNN_BACKEND_OPENCV. 572 */ 573 CV_WRAP void setPreferableBackend(int backendId); 574 575 /** 576 * @brief Ask network to make computations on specific target device. 577 * @param[in] targetId target identifier. 578 * @see Target 579 * 580 * List of supported combinations backend / target: 581 * | | DNN_BACKEND_OPENCV | DNN_BACKEND_INFERENCE_ENGINE | DNN_BACKEND_HALIDE | DNN_BACKEND_CUDA | 582 * |------------------------|--------------------|------------------------------|--------------------|-------------------| 583 * | DNN_TARGET_CPU | + | + | + | | 584 * | DNN_TARGET_OPENCL | + | + | + | | 585 * | DNN_TARGET_OPENCL_FP16 | + | + | | | 586 * | DNN_TARGET_MYRIAD | | + | | | 587 * | DNN_TARGET_FPGA | | + | | | 588 * | DNN_TARGET_CUDA | | | | + | 589 * | DNN_TARGET_CUDA_FP16 | | | | + | 590 * | DNN_TARGET_HDDL | | + | | | 591 */ 592 CV_WRAP void setPreferableTarget(int targetId); 593 594 /** @brief Sets the new input value for the network 595 * @param blob A new blob. Should have CV_32F or CV_8U depth. 596 * @param name A name of input layer. 597 * @param scalefactor An optional normalization scale. 598 * @param mean An optional mean subtraction values. 599 * @see connect(String, String) to know format of the descriptor. 600 * 601 * If scale or mean values are specified, a final input blob is computed 602 * as: 603 * \f[input(n,c,h,w) = scalefactor \times (blob(n,c,h,w) - mean_c)\f] 604 */ 605 CV_WRAP void setInput(InputArray blob, const String& name = "", 606 double scalefactor = 1.0, const Scalar& mean = Scalar()); 607 608 /** @brief Sets the new value for the learned param of the layer. 609 * @param layer name or id of the layer. 610 * @param numParam index of the layer parameter in the Layer::blobs array. 611 * @param blob the new value. 612 * @see Layer::blobs 613 * @note If shape of the new blob differs from the previous shape, 614 * then the following forward pass may fail. 615 */ 616 CV_WRAP void setParam(LayerId layer, int numParam, const Mat &blob); 617 618 /** @brief Returns parameter blob of the layer. 619 * @param layer name or id of the layer. 620 * @param numParam index of the layer parameter in the Layer::blobs array. 621 * @see Layer::blobs 622 */ 623 CV_WRAP Mat getParam(LayerId layer, int numParam = 0); 624 625 /** @brief Returns indexes of layers with unconnected outputs. 626 */ 627 CV_WRAP std::vector<int> getUnconnectedOutLayers() const; 628 629 /** @brief Returns names of layers with unconnected outputs. 630 */ 631 CV_WRAP std::vector<String> getUnconnectedOutLayersNames() const; 632 633 /** @brief Returns input and output shapes for all layers in loaded model; 634 * preliminary inferencing isn't necessary. 635 * @param netInputShapes shapes for all input blobs in net input layer. 636 * @param layersIds output parameter for layer IDs. 637 * @param inLayersShapes output parameter for input layers shapes; 638 * order is the same as in layersIds 639 * @param outLayersShapes output parameter for output layers shapes; 640 * order is the same as in layersIds 641 */ 642 CV_WRAP void getLayersShapes(const std::vector<MatShape>& netInputShapes, 643 CV_OUT std::vector<int>& layersIds, 644 CV_OUT std::vector<std::vector<MatShape> >& inLayersShapes, 645 CV_OUT std::vector<std::vector<MatShape> >& outLayersShapes) const; 646 647 /** @overload */ 648 CV_WRAP void getLayersShapes(const MatShape& netInputShape, 649 CV_OUT std::vector<int>& layersIds, 650 CV_OUT std::vector<std::vector<MatShape> >& inLayersShapes, 651 CV_OUT std::vector<std::vector<MatShape> >& outLayersShapes) const; 652 653 /** @brief Returns input and output shapes for layer with specified 654 * id in loaded model; preliminary inferencing isn't necessary. 655 * @param netInputShape shape input blob in net input layer. 656 * @param layerId id for layer. 657 * @param inLayerShapes output parameter for input layers shapes; 658 * order is the same as in layersIds 659 * @param outLayerShapes output parameter for output layers shapes; 660 * order is the same as in layersIds 661 */ 662 void getLayerShapes(const MatShape& netInputShape, 663 const int layerId, 664 CV_OUT std::vector<MatShape>& inLayerShapes, 665 CV_OUT std::vector<MatShape>& outLayerShapes) const; // FIXIT: CV_WRAP 666 667 /** @overload */ 668 void getLayerShapes(const std::vector<MatShape>& netInputShapes, 669 const int layerId, 670 CV_OUT std::vector<MatShape>& inLayerShapes, 671 CV_OUT std::vector<MatShape>& outLayerShapes) const; // FIXIT: CV_WRAP 672 673 /** @brief Computes FLOP for whole loaded model with specified input shapes. 674 * @param netInputShapes vector of shapes for all net inputs. 675 * @returns computed FLOP. 676 */ 677 CV_WRAP int64 getFLOPS(const std::vector<MatShape>& netInputShapes) const; 678 /** @overload */ 679 CV_WRAP int64 getFLOPS(const MatShape& netInputShape) const; 680 /** @overload */ 681 CV_WRAP int64 getFLOPS(const int layerId, 682 const std::vector<MatShape>& netInputShapes) const; 683 /** @overload */ 684 CV_WRAP int64 getFLOPS(const int layerId, 685 const MatShape& netInputShape) const; 686 687 /** @brief Returns list of types for layer used in model. 688 * @param layersTypes output parameter for returning types. 689 */ 690 CV_WRAP void getLayerTypes(CV_OUT std::vector<String>& layersTypes) const; 691 692 /** @brief Returns count of layers of specified type. 693 * @param layerType type. 694 * @returns count of layers 695 */ 696 CV_WRAP int getLayersCount(const String& layerType) const; 697 698 /** @brief Computes bytes number which are required to store 699 * all weights and intermediate blobs for model. 700 * @param netInputShapes vector of shapes for all net inputs. 701 * @param weights output parameter to store resulting bytes for weights. 702 * @param blobs output parameter to store resulting bytes for intermediate blobs. 703 */ 704 void getMemoryConsumption(const std::vector<MatShape>& netInputShapes, 705 CV_OUT size_t& weights, CV_OUT size_t& blobs) const; // FIXIT: CV_WRAP 706 /** @overload */ 707 CV_WRAP void getMemoryConsumption(const MatShape& netInputShape, 708 CV_OUT size_t& weights, CV_OUT size_t& blobs) const; 709 /** @overload */ 710 CV_WRAP void getMemoryConsumption(const int layerId, 711 const std::vector<MatShape>& netInputShapes, 712 CV_OUT size_t& weights, CV_OUT size_t& blobs) const; 713 /** @overload */ 714 CV_WRAP void getMemoryConsumption(const int layerId, 715 const MatShape& netInputShape, 716 CV_OUT size_t& weights, CV_OUT size_t& blobs) const; 717 718 /** @brief Computes bytes number which are required to store 719 * all weights and intermediate blobs for each layer. 720 * @param netInputShapes vector of shapes for all net inputs. 721 * @param layerIds output vector to save layer IDs. 722 * @param weights output parameter to store resulting bytes for weights. 723 * @param blobs output parameter to store resulting bytes for intermediate blobs. 724 */ 725 void getMemoryConsumption(const std::vector<MatShape>& netInputShapes, 726 CV_OUT std::vector<int>& layerIds, 727 CV_OUT std::vector<size_t>& weights, 728 CV_OUT std::vector<size_t>& blobs) const; // FIXIT: CV_WRAP 729 /** @overload */ 730 void getMemoryConsumption(const MatShape& netInputShape, 731 CV_OUT std::vector<int>& layerIds, 732 CV_OUT std::vector<size_t>& weights, 733 CV_OUT std::vector<size_t>& blobs) const; // FIXIT: CV_WRAP 734 735 /** @brief Enables or disables layer fusion in the network. 736 * @param fusion true to enable the fusion, false to disable. The fusion is enabled by default. 737 */ 738 CV_WRAP void enableFusion(bool fusion); 739 740 /** @brief Returns overall time for inference and timings (in ticks) for layers. 741 * 742 * Indexes in returned vector correspond to layers ids. Some layers can be fused with others, 743 * in this case zero ticks count will be return for that skipped layers. Supported by DNN_BACKEND_OPENCV on DNN_TARGET_CPU only. 744 * 745 * @param[out] timings vector for tick timings for all layers. 746 * @return overall ticks for model inference. 747 */ 748 CV_WRAP int64 getPerfProfile(CV_OUT std::vector<double>& timings); 749 750 private: 751 struct Impl; 752 Ptr<Impl> impl; 753 }; 754 755 /** @brief Reads a network model stored in <a href="https://pjreddie.com/darknet/">Darknet</a> model files. 756 * @param cfgFile path to the .cfg file with text description of the network architecture. 757 * @param darknetModel path to the .weights file with learned network. 758 * @returns Network object that ready to do forward, throw an exception in failure cases. 759 * @returns Net object. 760 */ 761 CV_EXPORTS_W Net readNetFromDarknet(const String &cfgFile, const String &darknetModel = String()); 762 763 /** @brief Reads a network model stored in <a href="https://pjreddie.com/darknet/">Darknet</a> model files. 764 * @param bufferCfg A buffer contains a content of .cfg file with text description of the network architecture. 765 * @param bufferModel A buffer contains a content of .weights file with learned network. 766 * @returns Net object. 767 */ 768 CV_EXPORTS_W Net readNetFromDarknet(const std::vector<uchar>& bufferCfg, 769 const std::vector<uchar>& bufferModel = std::vector<uchar>()); 770 771 /** @brief Reads a network model stored in <a href="https://pjreddie.com/darknet/">Darknet</a> model files. 772 * @param bufferCfg A buffer contains a content of .cfg file with text description of the network architecture. 773 * @param lenCfg Number of bytes to read from bufferCfg 774 * @param bufferModel A buffer contains a content of .weights file with learned network. 775 * @param lenModel Number of bytes to read from bufferModel 776 * @returns Net object. 777 */ 778 CV_EXPORTS Net readNetFromDarknet(const char *bufferCfg, size_t lenCfg, 779 const char *bufferModel = NULL, size_t lenModel = 0); 780 781 /** @brief Reads a network model stored in <a href="http://caffe.berkeleyvision.org">Caffe</a> framework's format. 782 * @param prototxt path to the .prototxt file with text description of the network architecture. 783 * @param caffeModel path to the .caffemodel file with learned network. 784 * @returns Net object. 785 */ 786 CV_EXPORTS_W Net readNetFromCaffe(const String &prototxt, const String &caffeModel = String()); 787 788 /** @brief Reads a network model stored in Caffe model in memory. 789 * @param bufferProto buffer containing the content of the .prototxt file 790 * @param bufferModel buffer containing the content of the .caffemodel file 791 * @returns Net object. 792 */ 793 CV_EXPORTS_W Net readNetFromCaffe(const std::vector<uchar>& bufferProto, 794 const std::vector<uchar>& bufferModel = std::vector<uchar>()); 795 796 /** @brief Reads a network model stored in Caffe model in memory. 797 * @details This is an overloaded member function, provided for convenience. 798 * It differs from the above function only in what argument(s) it accepts. 799 * @param bufferProto buffer containing the content of the .prototxt file 800 * @param lenProto length of bufferProto 801 * @param bufferModel buffer containing the content of the .caffemodel file 802 * @param lenModel length of bufferModel 803 * @returns Net object. 804 */ 805 CV_EXPORTS Net readNetFromCaffe(const char *bufferProto, size_t lenProto, 806 const char *bufferModel = NULL, size_t lenModel = 0); 807 808 /** @brief Reads a network model stored in <a href="https://www.tensorflow.org/">TensorFlow</a> framework's format. 809 * @param model path to the .pb file with binary protobuf description of the network architecture 810 * @param config path to the .pbtxt file that contains text graph definition in protobuf format. 811 * Resulting Net object is built by text graph using weights from a binary one that 812 * let us make it more flexible. 813 * @returns Net object. 814 */ 815 CV_EXPORTS_W Net readNetFromTensorflow(const String &model, const String &config = String()); 816 817 /** @brief Reads a network model stored in <a href="https://www.tensorflow.org/">TensorFlow</a> framework's format. 818 * @param bufferModel buffer containing the content of the pb file 819 * @param bufferConfig buffer containing the content of the pbtxt file 820 * @returns Net object. 821 */ 822 CV_EXPORTS_W Net readNetFromTensorflow(const std::vector<uchar>& bufferModel, 823 const std::vector<uchar>& bufferConfig = std::vector<uchar>()); 824 825 /** @brief Reads a network model stored in <a href="https://www.tensorflow.org/">TensorFlow</a> framework's format. 826 * @details This is an overloaded member function, provided for convenience. 827 * It differs from the above function only in what argument(s) it accepts. 828 * @param bufferModel buffer containing the content of the pb file 829 * @param lenModel length of bufferModel 830 * @param bufferConfig buffer containing the content of the pbtxt file 831 * @param lenConfig length of bufferConfig 832 */ 833 CV_EXPORTS Net readNetFromTensorflow(const char *bufferModel, size_t lenModel, 834 const char *bufferConfig = NULL, size_t lenConfig = 0); 835 836 /** 837 * @brief Reads a network model stored in <a href="http://torch.ch">Torch7</a> framework's format. 838 * @param model path to the file, dumped from Torch by using torch.save() function. 839 * @param isBinary specifies whether the network was serialized in ascii mode or binary. 840 * @param evaluate specifies testing phase of network. If true, it's similar to evaluate() method in Torch. 841 * @returns Net object. 842 * 843 * @note Ascii mode of Torch serializer is more preferable, because binary mode extensively use `long` type of C language, 844 * which has various bit-length on different systems. 845 * 846 * The loading file must contain serialized <a href="https://github.com/torch/nn/blob/master/doc/module.md">nn.Module</a> object 847 * with importing network. Try to eliminate a custom objects from serialazing data to avoid importing errors. 848 * 849 * List of supported layers (i.e. object instances derived from Torch nn.Module class): 850 * - nn.Sequential 851 * - nn.Parallel 852 * - nn.Concat 853 * - nn.Linear 854 * - nn.SpatialConvolution 855 * - nn.SpatialMaxPooling, nn.SpatialAveragePooling 856 * - nn.ReLU, nn.TanH, nn.Sigmoid 857 * - nn.Reshape 858 * - nn.SoftMax, nn.LogSoftMax 859 * 860 * Also some equivalents of these classes from cunn, cudnn, and fbcunn may be successfully imported. 861 */ 862 CV_EXPORTS_W Net readNetFromTorch(const String &model, bool isBinary = true, bool evaluate = true); 863 864 /** 865 * @brief Read deep learning network represented in one of the supported formats. 866 * @param[in] model Binary file contains trained weights. The following file 867 * extensions are expected for models from different frameworks: 868 * * `*.caffemodel` (Caffe, http://caffe.berkeleyvision.org/) 869 * * `*.pb` (TensorFlow, https://www.tensorflow.org/) 870 * * `*.t7` | `*.net` (Torch, http://torch.ch/) 871 * * `*.weights` (Darknet, https://pjreddie.com/darknet/) 872 * * `*.bin` (DLDT, https://software.intel.com/openvino-toolkit) 873 * * `*.onnx` (ONNX, https://onnx.ai/) 874 * @param[in] config Text file contains network configuration. It could be a 875 * file with the following extensions: 876 * * `*.prototxt` (Caffe, http://caffe.berkeleyvision.org/) 877 * * `*.pbtxt` (TensorFlow, https://www.tensorflow.org/) 878 * * `*.cfg` (Darknet, https://pjreddie.com/darknet/) 879 * * `*.xml` (DLDT, https://software.intel.com/openvino-toolkit) 880 * @param[in] framework Explicit framework name tag to determine a format. 881 * @returns Net object. 882 * 883 * This function automatically detects an origin framework of trained model 884 * and calls an appropriate function such @ref readNetFromCaffe, @ref readNetFromTensorflow, 885 * @ref readNetFromTorch or @ref readNetFromDarknet. An order of @p model and @p config 886 * arguments does not matter. 887 */ 888 CV_EXPORTS_W Net readNet(const String& model, const String& config = "", const String& framework = ""); 889 890 /** 891 * @brief Read deep learning network represented in one of the supported formats. 892 * @details This is an overloaded member function, provided for convenience. 893 * It differs from the above function only in what argument(s) it accepts. 894 * @param[in] framework Name of origin framework. 895 * @param[in] bufferModel A buffer with a content of binary file with weights 896 * @param[in] bufferConfig A buffer with a content of text file contains network configuration. 897 * @returns Net object. 898 */ 899 CV_EXPORTS_W Net readNet(const String& framework, const std::vector<uchar>& bufferModel, 900 const std::vector<uchar>& bufferConfig = std::vector<uchar>()); 901 902 /** @brief Loads blob which was serialized as torch.Tensor object of Torch7 framework. 903 * @warning This function has the same limitations as readNetFromTorch(). 904 */ 905 CV_EXPORTS_W Mat readTorchBlob(const String &filename, bool isBinary = true); 906 907 /** @brief Load a network from Intel's Model Optimizer intermediate representation. 908 * @param[in] xml XML configuration file with network's topology. 909 * @param[in] bin Binary file with trained weights. 910 * @returns Net object. 911 * Networks imported from Intel's Model Optimizer are launched in Intel's Inference Engine 912 * backend. 913 */ 914 CV_EXPORTS_W 915 Net readNetFromModelOptimizer(const String &xml, const String &bin); 916 917 /** @brief Load a network from Intel's Model Optimizer intermediate representation. 918 * @param[in] bufferModelConfig Buffer contains XML configuration with network's topology. 919 * @param[in] bufferWeights Buffer contains binary data with trained weights. 920 * @returns Net object. 921 * Networks imported from Intel's Model Optimizer are launched in Intel's Inference Engine 922 * backend. 923 */ 924 CV_EXPORTS_W 925 Net readNetFromModelOptimizer(const std::vector<uchar>& bufferModelConfig, const std::vector<uchar>& bufferWeights); 926 927 /** @brief Load a network from Intel's Model Optimizer intermediate representation. 928 * @param[in] bufferModelConfigPtr Pointer to buffer which contains XML configuration with network's topology. 929 * @param[in] bufferModelConfigSize Binary size of XML configuration data. 930 * @param[in] bufferWeightsPtr Pointer to buffer which contains binary data with trained weights. 931 * @param[in] bufferWeightsSize Binary size of trained weights data. 932 * @returns Net object. 933 * Networks imported from Intel's Model Optimizer are launched in Intel's Inference Engine 934 * backend. 935 */ 936 CV_EXPORTS 937 Net readNetFromModelOptimizer(const uchar* bufferModelConfigPtr, size_t bufferModelConfigSize, 938 const uchar* bufferWeightsPtr, size_t bufferWeightsSize); 939 940 /** @brief Reads a network model <a href="https://onnx.ai/">ONNX</a>. 941 * @param onnxFile path to the .onnx file with text description of the network architecture. 942 * @returns Network object that ready to do forward, throw an exception in failure cases. 943 */ 944 CV_EXPORTS_W Net readNetFromONNX(const String &onnxFile); 945 946 /** @brief Reads a network model from <a href="https://onnx.ai/">ONNX</a> 947 * in-memory buffer. 948 * @param buffer memory address of the first byte of the buffer. 949 * @param sizeBuffer size of the buffer. 950 * @returns Network object that ready to do forward, throw an exception 951 * in failure cases. 952 */ 953 CV_EXPORTS Net readNetFromONNX(const char* buffer, size_t sizeBuffer); 954 955 /** @brief Reads a network model from <a href="https://onnx.ai/">ONNX</a> 956 * in-memory buffer. 957 * @param buffer in-memory buffer that stores the ONNX model bytes. 958 * @returns Network object that ready to do forward, throw an exception 959 * in failure cases. 960 */ 961 CV_EXPORTS_W Net readNetFromONNX(const std::vector<uchar>& buffer); 962 963 /** @brief Creates blob from .pb file. 964 * @param path to the .pb file with input tensor. 965 * @returns Mat. 966 */ 967 CV_EXPORTS_W Mat readTensorFromONNX(const String& path); 968 969 /** @brief Creates 4-dimensional blob from image. Optionally resizes and crops @p image from center, 970 * subtract @p mean values, scales values by @p scalefactor, swap Blue and Red channels. 971 * @param image input image (with 1-, 3- or 4-channels). 972 * @param size spatial size for output image 973 * @param mean scalar with mean values which are subtracted from channels. Values are intended 974 * to be in (mean-R, mean-G, mean-B) order if @p image has BGR ordering and @p swapRB is true. 975 * @param scalefactor multiplier for @p image values. 976 * @param swapRB flag which indicates that swap first and last channels 977 * in 3-channel image is necessary. 978 * @param crop flag which indicates whether image will be cropped after resize or not 979 * @param ddepth Depth of output blob. Choose CV_32F or CV_8U. 980 * @details if @p crop is true, input image is resized so one side after resize is equal to corresponding 981 * dimension in @p size and another one is equal or larger. Then, crop from the center is performed. 982 * If @p crop is false, direct resize without cropping and preserving aspect ratio is performed. 983 * @returns 4-dimensional Mat with NCHW dimensions order. 984 */ 985 CV_EXPORTS_W Mat blobFromImage(InputArray image, double scalefactor=1.0, const Size& size = Size(), 986 const Scalar& mean = Scalar(), bool swapRB=false, bool crop=false, 987 int ddepth=CV_32F); 988 989 /** @brief Creates 4-dimensional blob from image. 990 * @details This is an overloaded member function, provided for convenience. 991 * It differs from the above function only in what argument(s) it accepts. 992 */ 993 CV_EXPORTS void blobFromImage(InputArray image, OutputArray blob, double scalefactor=1.0, 994 const Size& size = Size(), const Scalar& mean = Scalar(), 995 bool swapRB=false, bool crop=false, int ddepth=CV_32F); 996 997 998 /** @brief Creates 4-dimensional blob from series of images. Optionally resizes and 999 * crops @p images from center, subtract @p mean values, scales values by @p scalefactor, 1000 * swap Blue and Red channels. 1001 * @param images input images (all with 1-, 3- or 4-channels). 1002 * @param size spatial size for output image 1003 * @param mean scalar with mean values which are subtracted from channels. Values are intended 1004 * to be in (mean-R, mean-G, mean-B) order if @p image has BGR ordering and @p swapRB is true. 1005 * @param scalefactor multiplier for @p images values. 1006 * @param swapRB flag which indicates that swap first and last channels 1007 * in 3-channel image is necessary. 1008 * @param crop flag which indicates whether image will be cropped after resize or not 1009 * @param ddepth Depth of output blob. Choose CV_32F or CV_8U. 1010 * @details if @p crop is true, input image is resized so one side after resize is equal to corresponding 1011 * dimension in @p size and another one is equal or larger. Then, crop from the center is performed. 1012 * If @p crop is false, direct resize without cropping and preserving aspect ratio is performed. 1013 * @returns 4-dimensional Mat with NCHW dimensions order. 1014 */ 1015 CV_EXPORTS_W Mat blobFromImages(InputArrayOfArrays images, double scalefactor=1.0, 1016 Size size = Size(), const Scalar& mean = Scalar(), bool swapRB=false, bool crop=false, 1017 int ddepth=CV_32F); 1018 1019 /** @brief Creates 4-dimensional blob from series of images. 1020 * @details This is an overloaded member function, provided for convenience. 1021 * It differs from the above function only in what argument(s) it accepts. 1022 */ 1023 CV_EXPORTS void blobFromImages(InputArrayOfArrays images, OutputArray blob, 1024 double scalefactor=1.0, Size size = Size(), 1025 const Scalar& mean = Scalar(), bool swapRB=false, bool crop=false, 1026 int ddepth=CV_32F); 1027 1028 /** @brief Parse a 4D blob and output the images it contains as 2D arrays through a simpler data structure 1029 * (std::vector<cv::Mat>). 1030 * @param[in] blob_ 4 dimensional array (images, channels, height, width) in floating point precision (CV_32F) from 1031 * which you would like to extract the images. 1032 * @param[out] images_ array of 2D Mat containing the images extracted from the blob in floating point precision 1033 * (CV_32F). They are non normalized neither mean added. The number of returned images equals the first dimension 1034 * of the blob (batch size). Every image has a number of channels equals to the second dimension of the blob (depth). 1035 */ 1036 CV_EXPORTS_W void imagesFromBlob(const cv::Mat& blob_, OutputArrayOfArrays images_); 1037 1038 /** @brief Convert all weights of Caffe network to half precision floating point. 1039 * @param src Path to origin model from Caffe framework contains single 1040 * precision floating point weights (usually has `.caffemodel` extension). 1041 * @param dst Path to destination model with updated weights. 1042 * @param layersTypes Set of layers types which parameters will be converted. 1043 * By default, converts only Convolutional and Fully-Connected layers' 1044 * weights. 1045 * 1046 * @note Shrinked model has no origin float32 weights so it can't be used 1047 * in origin Caffe framework anymore. However the structure of data 1048 * is taken from NVidia's Caffe fork: https://github.com/NVIDIA/caffe. 1049 * So the resulting model may be used there. 1050 */ 1051 CV_EXPORTS_W void shrinkCaffeModel(const String& src, const String& dst, 1052 const std::vector<String>& layersTypes = std::vector<String>()); 1053 1054 /** @brief Create a text representation for a binary network stored in protocol buffer format. 1055 * @param[in] model A path to binary network. 1056 * @param[in] output A path to output text file to be created. 1057 * 1058 * @note To reduce output file size, trained weights are not included. 1059 */ 1060 CV_EXPORTS_W void writeTextGraph(const String& model, const String& output); 1061 1062 /** @brief Performs non maximum suppression given boxes and corresponding scores. 1063 1064 * @param bboxes a set of bounding boxes to apply NMS. 1065 * @param scores a set of corresponding confidences. 1066 * @param score_threshold a threshold used to filter boxes by score. 1067 * @param nms_threshold a threshold used in non maximum suppression. 1068 * @param indices the kept indices of bboxes after NMS. 1069 * @param eta a coefficient in adaptive threshold formula: \f$nms\_threshold_{i+1}=eta\cdot nms\_threshold_i\f$. 1070 * @param top_k if `>0`, keep at most @p top_k picked indices. 1071 */ 1072 CV_EXPORTS void NMSBoxes(const std::vector<Rect>& bboxes, const std::vector<float>& scores, 1073 const float score_threshold, const float nms_threshold, 1074 CV_OUT std::vector<int>& indices, 1075 const float eta = 1.f, const int top_k = 0); 1076 1077 CV_EXPORTS_W void NMSBoxes(const std::vector<Rect2d>& bboxes, const std::vector<float>& scores, 1078 const float score_threshold, const float nms_threshold, 1079 CV_OUT std::vector<int>& indices, 1080 const float eta = 1.f, const int top_k = 0); 1081 1082 CV_EXPORTS_AS(NMSBoxesRotated) void NMSBoxes(const std::vector<RotatedRect>& bboxes, const std::vector<float>& scores, 1083 const float score_threshold, const float nms_threshold, 1084 CV_OUT std::vector<int>& indices, 1085 const float eta = 1.f, const int top_k = 0); 1086 1087 1088 /** @brief This class is presented high-level API for neural networks. 1089 * 1090 * Model allows to set params for preprocessing input image. 1091 * Model creates net from file with trained weights and config, 1092 * sets preprocessing input and runs forward pass. 1093 */ 1094 class CV_EXPORTS_W_SIMPLE Model 1095 { 1096 public: 1097 CV_DEPRECATED_EXTERNAL // avoid using in C++ code, will be moved to "protected" (need to fix bindings first) 1098 Model(); 1099 1100 Model(const Model&) = default; 1101 Model(Model&&) = default; 1102 Model& operator=(const Model&) = default; 1103 Model& operator=(Model&&) = default; 1104 1105 /** 1106 * @brief Create model from deep learning network represented in one of the supported formats. 1107 * An order of @p model and @p config arguments does not matter. 1108 * @param[in] model Binary file contains trained weights. 1109 * @param[in] config Text file contains network configuration. 1110 */ 1111 CV_WRAP Model(const String& model, const String& config = ""); 1112 1113 /** 1114 * @brief Create model from deep learning network. 1115 * @param[in] network Net object. 1116 */ 1117 CV_WRAP Model(const Net& network); 1118 1119 /** @brief Set input size for frame. 1120 * @param[in] size New input size. 1121 * @note If shape of the new blob less than 0, then frame size not change. 1122 */ 1123 CV_WRAP Model& setInputSize(const Size& size); 1124 1125 /** @overload 1126 * @param[in] width New input width. 1127 * @param[in] height New input height. 1128 */ 1129 CV_WRAP inline setInputSize(int width,int height)1130 Model& setInputSize(int width, int height) { return setInputSize(Size(width, height)); } 1131 1132 /** @brief Set mean value for frame. 1133 * @param[in] mean Scalar with mean values which are subtracted from channels. 1134 */ 1135 CV_WRAP Model& setInputMean(const Scalar& mean); 1136 1137 /** @brief Set scalefactor value for frame. 1138 * @param[in] scale Multiplier for frame values. 1139 */ 1140 CV_WRAP Model& setInputScale(double scale); 1141 1142 /** @brief Set flag crop for frame. 1143 * @param[in] crop Flag which indicates whether image will be cropped after resize or not. 1144 */ 1145 CV_WRAP Model& setInputCrop(bool crop); 1146 1147 /** @brief Set flag swapRB for frame. 1148 * @param[in] swapRB Flag which indicates that swap first and last channels. 1149 */ 1150 CV_WRAP Model& setInputSwapRB(bool swapRB); 1151 1152 /** @brief Set preprocessing parameters for frame. 1153 * @param[in] size New input size. 1154 * @param[in] mean Scalar with mean values which are subtracted from channels. 1155 * @param[in] scale Multiplier for frame values. 1156 * @param[in] swapRB Flag which indicates that swap first and last channels. 1157 * @param[in] crop Flag which indicates whether image will be cropped after resize or not. 1158 * blob(n, c, y, x) = scale * resize( frame(y, x, c) ) - mean(c) ) 1159 */ 1160 CV_WRAP void setInputParams(double scale = 1.0, const Size& size = Size(), 1161 const Scalar& mean = Scalar(), bool swapRB = false, bool crop = false); 1162 1163 /** @brief Given the @p input frame, create input blob, run net and return the output @p blobs. 1164 * @param[in] frame The input image. 1165 * @param[out] outs Allocated output blobs, which will store results of the computation. 1166 */ 1167 CV_WRAP void predict(InputArray frame, OutputArrayOfArrays outs) const; 1168 1169 1170 // ============================== Net proxy methods ============================== 1171 // Never expose methods with network implementation details, like: 1172 // - addLayer, addLayerToPrev, connect, setInputsNames, setInputShape, setParam, getParam 1173 // - getLayer*, getUnconnectedOutLayers, getUnconnectedOutLayersNames, getLayersShapes 1174 // - forward* methods, setInput 1175 1176 /// @sa Net::setPreferableBackend 1177 CV_WRAP Model& setPreferableBackend(dnn::Backend backendId); 1178 /// @sa Net::setPreferableTarget 1179 CV_WRAP Model& setPreferableTarget(dnn::Target targetId); 1180 1181 CV_DEPRECATED_EXTERNAL operator Net&() const1182 operator Net&() const { return getNetwork_(); } 1183 1184 //protected: - internal/tests usage only 1185 Net& getNetwork_() const; getNetwork_()1186 inline Net& getNetwork_() { return const_cast<const Model*>(this)->getNetwork_(); } 1187 1188 struct Impl; getImpl() const1189 inline Impl* getImpl() const { return impl.get(); } getImplRef() const1190 inline Impl& getImplRef() const { CV_DbgAssert(impl); return *impl.get(); } 1191 protected: 1192 Ptr<Impl> impl; 1193 }; 1194 1195 /** @brief This class represents high-level API for classification models. 1196 * 1197 * ClassificationModel allows to set params for preprocessing input image. 1198 * ClassificationModel creates net from file with trained weights and config, 1199 * sets preprocessing input, runs forward pass and return top-1 prediction. 1200 */ 1201 class CV_EXPORTS_W_SIMPLE ClassificationModel : public Model 1202 { 1203 public: 1204 /** 1205 * @brief Create classification model from network represented in one of the supported formats. 1206 * An order of @p model and @p config arguments does not matter. 1207 * @param[in] model Binary file contains trained weights. 1208 * @param[in] config Text file contains network configuration. 1209 */ 1210 CV_WRAP ClassificationModel(const String& model, const String& config = ""); 1211 1212 /** 1213 * @brief Create model from deep learning network. 1214 * @param[in] network Net object. 1215 */ 1216 CV_WRAP ClassificationModel(const Net& network); 1217 1218 /** @brief Given the @p input frame, create input blob, run net and return top-1 prediction. 1219 * @param[in] frame The input image. 1220 */ 1221 std::pair<int, float> classify(InputArray frame); 1222 1223 /** @overload */ 1224 CV_WRAP void classify(InputArray frame, CV_OUT int& classId, CV_OUT float& conf); 1225 }; 1226 1227 /** @brief This class represents high-level API for keypoints models 1228 * 1229 * KeypointsModel allows to set params for preprocessing input image. 1230 * KeypointsModel creates net from file with trained weights and config, 1231 * sets preprocessing input, runs forward pass and returns the x and y coordinates of each detected keypoint 1232 */ 1233 class CV_EXPORTS_W_SIMPLE KeypointsModel: public Model 1234 { 1235 public: 1236 /** 1237 * @brief Create keypoints model from network represented in one of the supported formats. 1238 * An order of @p model and @p config arguments does not matter. 1239 * @param[in] model Binary file contains trained weights. 1240 * @param[in] config Text file contains network configuration. 1241 */ 1242 CV_WRAP KeypointsModel(const String& model, const String& config = ""); 1243 1244 /** 1245 * @brief Create model from deep learning network. 1246 * @param[in] network Net object. 1247 */ 1248 CV_WRAP KeypointsModel(const Net& network); 1249 1250 /** @brief Given the @p input frame, create input blob, run net 1251 * @param[in] frame The input image. 1252 * @param thresh minimum confidence threshold to select a keypoint 1253 * @returns a vector holding the x and y coordinates of each detected keypoint 1254 * 1255 */ 1256 CV_WRAP std::vector<Point2f> estimate(InputArray frame, float thresh=0.5); 1257 }; 1258 1259 /** @brief This class represents high-level API for segmentation models 1260 * 1261 * SegmentationModel allows to set params for preprocessing input image. 1262 * SegmentationModel creates net from file with trained weights and config, 1263 * sets preprocessing input, runs forward pass and returns the class prediction for each pixel. 1264 */ 1265 class CV_EXPORTS_W_SIMPLE SegmentationModel: public Model 1266 { 1267 public: 1268 /** 1269 * @brief Create segmentation model from network represented in one of the supported formats. 1270 * An order of @p model and @p config arguments does not matter. 1271 * @param[in] model Binary file contains trained weights. 1272 * @param[in] config Text file contains network configuration. 1273 */ 1274 CV_WRAP SegmentationModel(const String& model, const String& config = ""); 1275 1276 /** 1277 * @brief Create model from deep learning network. 1278 * @param[in] network Net object. 1279 */ 1280 CV_WRAP SegmentationModel(const Net& network); 1281 1282 /** @brief Given the @p input frame, create input blob, run net 1283 * @param[in] frame The input image. 1284 * @param[out] mask Allocated class prediction for each pixel 1285 */ 1286 CV_WRAP void segment(InputArray frame, OutputArray mask); 1287 }; 1288 1289 /** @brief This class represents high-level API for object detection networks. 1290 * 1291 * DetectionModel allows to set params for preprocessing input image. 1292 * DetectionModel creates net from file with trained weights and config, 1293 * sets preprocessing input, runs forward pass and return result detections. 1294 * For DetectionModel SSD, Faster R-CNN, YOLO topologies are supported. 1295 */ 1296 class CV_EXPORTS_W_SIMPLE DetectionModel : public Model 1297 { 1298 public: 1299 /** 1300 * @brief Create detection model from network represented in one of the supported formats. 1301 * An order of @p model and @p config arguments does not matter. 1302 * @param[in] model Binary file contains trained weights. 1303 * @param[in] config Text file contains network configuration. 1304 */ 1305 CV_WRAP DetectionModel(const String& model, const String& config = ""); 1306 1307 /** 1308 * @brief Create model from deep learning network. 1309 * @param[in] network Net object. 1310 */ 1311 CV_WRAP DetectionModel(const Net& network); 1312 1313 CV_DEPRECATED_EXTERNAL // avoid using in C++ code (need to fix bindings first) 1314 DetectionModel(); 1315 1316 /** 1317 * @brief nmsAcrossClasses defaults to false, 1318 * such that when non max suppression is used during the detect() function, it will do so per-class. 1319 * This function allows you to toggle this behaviour. 1320 * @param[in] value The new value for nmsAcrossClasses 1321 */ 1322 CV_WRAP DetectionModel& setNmsAcrossClasses(bool value); 1323 1324 /** 1325 * @brief Getter for nmsAcrossClasses. This variable defaults to false, 1326 * such that when non max suppression is used during the detect() function, it will do so only per-class 1327 */ 1328 CV_WRAP bool getNmsAcrossClasses(); 1329 1330 /** @brief Given the @p input frame, create input blob, run net and return result detections. 1331 * @param[in] frame The input image. 1332 * @param[out] classIds Class indexes in result detection. 1333 * @param[out] confidences A set of corresponding confidences. 1334 * @param[out] boxes A set of bounding boxes. 1335 * @param[in] confThreshold A threshold used to filter boxes by confidences. 1336 * @param[in] nmsThreshold A threshold used in non maximum suppression. 1337 */ 1338 CV_WRAP void detect(InputArray frame, CV_OUT std::vector<int>& classIds, 1339 CV_OUT std::vector<float>& confidences, CV_OUT std::vector<Rect>& boxes, 1340 float confThreshold = 0.5f, float nmsThreshold = 0.0f); 1341 }; 1342 1343 1344 /** @brief This class represents high-level API for text recognition networks. 1345 * 1346 * TextRecognitionModel allows to set params for preprocessing input image. 1347 * TextRecognitionModel creates net from file with trained weights and config, 1348 * sets preprocessing input, runs forward pass and return recognition result. 1349 * For TextRecognitionModel, CRNN-CTC is supported. 1350 */ 1351 class CV_EXPORTS_W_SIMPLE TextRecognitionModel : public Model 1352 { 1353 public: 1354 CV_DEPRECATED_EXTERNAL // avoid using in C++ code, will be moved to "protected" (need to fix bindings first) 1355 TextRecognitionModel(); 1356 1357 /** 1358 * @brief Create Text Recognition model from deep learning network 1359 * Call setDecodeType() and setVocabulary() after constructor to initialize the decoding method 1360 * @param[in] network Net object 1361 */ 1362 CV_WRAP TextRecognitionModel(const Net& network); 1363 1364 /** 1365 * @brief Create text recognition model from network represented in one of the supported formats 1366 * Call setDecodeType() and setVocabulary() after constructor to initialize the decoding method 1367 * @param[in] model Binary file contains trained weights 1368 * @param[in] config Text file contains network configuration 1369 */ 1370 CV_WRAP inline TextRecognitionModel(const std::string & model,const std::string & config="")1371 TextRecognitionModel(const std::string& model, const std::string& config = "") 1372 : TextRecognitionModel(readNet(model, config)) { /* nothing */ } 1373 1374 /** 1375 * @brief Set the decoding method of translating the network output into string 1376 * @param[in] decodeType The decoding method of translating the network output into string: {'CTC-greedy': greedy decoding for the output of CTC-based methods} 1377 */ 1378 CV_WRAP 1379 TextRecognitionModel& setDecodeType(const std::string& decodeType); 1380 1381 /** 1382 * @brief Get the decoding method 1383 * @return the decoding method 1384 */ 1385 CV_WRAP 1386 const std::string& getDecodeType() const; 1387 1388 /** 1389 * @brief Set the vocabulary for recognition. 1390 * @param[in] vocabulary the associated vocabulary of the network. 1391 */ 1392 CV_WRAP 1393 TextRecognitionModel& setVocabulary(const std::vector<std::string>& vocabulary); 1394 1395 /** 1396 * @brief Get the vocabulary for recognition. 1397 * @return vocabulary the associated vocabulary 1398 */ 1399 CV_WRAP 1400 const std::vector<std::string>& getVocabulary() const; 1401 1402 /** 1403 * @brief Given the @p input frame, create input blob, run net and return recognition result 1404 * @param[in] frame The input image 1405 * @return The text recognition result 1406 */ 1407 CV_WRAP 1408 std::string recognize(InputArray frame) const; 1409 1410 /** 1411 * @brief Given the @p input frame, create input blob, run net and return recognition result 1412 * @param[in] frame The input image 1413 * @param[in] roiRects List of text detection regions of interest (cv::Rect, CV_32SC4). ROIs is be cropped as the network inputs 1414 * @param[out] results A set of text recognition results. 1415 */ 1416 CV_WRAP 1417 void recognize(InputArray frame, InputArrayOfArrays roiRects, CV_OUT std::vector<std::string>& results) const; 1418 }; 1419 1420 1421 /** @brief Base class for text detection networks 1422 */ 1423 class CV_EXPORTS_W_SIMPLE TextDetectionModel : public Model 1424 { 1425 protected: 1426 CV_DEPRECATED_EXTERNAL // avoid using in C++ code, will be moved to "protected" (need to fix bindings first) 1427 TextDetectionModel(); 1428 1429 public: 1430 1431 /** @brief Performs detection 1432 * 1433 * Given the input @p frame, prepare network input, run network inference, post-process network output and return result detections. 1434 * 1435 * Each result is quadrangle's 4 points in this order: 1436 * - bottom-left 1437 * - top-left 1438 * - top-right 1439 * - bottom-right 1440 * 1441 * Use cv::getPerspectiveTransform function to retrive image region without perspective transformations. 1442 * 1443 * @note If DL model doesn't support that kind of output then result may be derived from detectTextRectangles() output. 1444 * 1445 * @param[in] frame The input image 1446 * @param[out] detections array with detections' quadrangles (4 points per result) 1447 * @param[out] confidences array with detection confidences 1448 */ 1449 CV_WRAP 1450 void detect( 1451 InputArray frame, 1452 CV_OUT std::vector< std::vector<Point> >& detections, 1453 CV_OUT std::vector<float>& confidences 1454 ) const; 1455 1456 /** @overload */ 1457 CV_WRAP 1458 void detect( 1459 InputArray frame, 1460 CV_OUT std::vector< std::vector<Point> >& detections 1461 ) const; 1462 1463 /** @brief Performs detection 1464 * 1465 * Given the input @p frame, prepare network input, run network inference, post-process network output and return result detections. 1466 * 1467 * Each result is rotated rectangle. 1468 * 1469 * @note Result may be inaccurate in case of strong perspective transformations. 1470 * 1471 * @param[in] frame the input image 1472 * @param[out] detections array with detections' RotationRect results 1473 * @param[out] confidences array with detection confidences 1474 */ 1475 CV_WRAP 1476 void detectTextRectangles( 1477 InputArray frame, 1478 CV_OUT std::vector<cv::RotatedRect>& detections, 1479 CV_OUT std::vector<float>& confidences 1480 ) const; 1481 1482 /** @overload */ 1483 CV_WRAP 1484 void detectTextRectangles( 1485 InputArray frame, 1486 CV_OUT std::vector<cv::RotatedRect>& detections 1487 ) const; 1488 }; 1489 1490 /** @brief This class represents high-level API for text detection DL networks compatible with EAST model. 1491 * 1492 * Configurable parameters: 1493 * - (float) confThreshold - used to filter boxes by confidences, default: 0.5f 1494 * - (float) nmsThreshold - used in non maximum suppression, default: 0.0f 1495 */ 1496 class CV_EXPORTS_W_SIMPLE TextDetectionModel_EAST : public TextDetectionModel 1497 { 1498 public: 1499 CV_DEPRECATED_EXTERNAL // avoid using in C++ code, will be moved to "protected" (need to fix bindings first) 1500 TextDetectionModel_EAST(); 1501 1502 /** 1503 * @brief Create text detection algorithm from deep learning network 1504 * @param[in] network Net object 1505 */ 1506 CV_WRAP TextDetectionModel_EAST(const Net& network); 1507 1508 /** 1509 * @brief Create text detection model from network represented in one of the supported formats. 1510 * An order of @p model and @p config arguments does not matter. 1511 * @param[in] model Binary file contains trained weights. 1512 * @param[in] config Text file contains network configuration. 1513 */ 1514 CV_WRAP inline TextDetectionModel_EAST(const std::string & model,const std::string & config="")1515 TextDetectionModel_EAST(const std::string& model, const std::string& config = "") 1516 : TextDetectionModel_EAST(readNet(model, config)) { /* nothing */ } 1517 1518 /** 1519 * @brief Set the detection confidence threshold 1520 * @param[in] confThreshold A threshold used to filter boxes by confidences 1521 */ 1522 CV_WRAP 1523 TextDetectionModel_EAST& setConfidenceThreshold(float confThreshold); 1524 1525 /** 1526 * @brief Get the detection confidence threshold 1527 */ 1528 CV_WRAP 1529 float getConfidenceThreshold() const; 1530 1531 /** 1532 * @brief Set the detection NMS filter threshold 1533 * @param[in] nmsThreshold A threshold used in non maximum suppression 1534 */ 1535 CV_WRAP 1536 TextDetectionModel_EAST& setNMSThreshold(float nmsThreshold); 1537 1538 /** 1539 * @brief Get the detection confidence threshold 1540 */ 1541 CV_WRAP 1542 float getNMSThreshold() const; 1543 }; 1544 1545 /** @brief This class represents high-level API for text detection DL networks compatible with DB model. 1546 * 1547 * Related publications: @cite liao2020real 1548 * Paper: https://arxiv.org/abs/1911.08947 1549 * For more information about the hyper-parameters setting, please refer to https://github.com/MhLiao/DB 1550 * 1551 * Configurable parameters: 1552 * - (float) binaryThreshold - The threshold of the binary map. It is usually set to 0.3. 1553 * - (float) polygonThreshold - The threshold of text polygons. It is usually set to 0.5, 0.6, and 0.7. Default is 0.5f 1554 * - (double) unclipRatio - The unclip ratio of the detected text region, which determines the output size. It is usually set to 2.0. 1555 * - (int) maxCandidates - The max number of the output results. 1556 */ 1557 class CV_EXPORTS_W_SIMPLE TextDetectionModel_DB : public TextDetectionModel 1558 { 1559 public: 1560 CV_DEPRECATED_EXTERNAL // avoid using in C++ code, will be moved to "protected" (need to fix bindings first) 1561 TextDetectionModel_DB(); 1562 1563 /** 1564 * @brief Create text detection algorithm from deep learning network. 1565 * @param[in] network Net object. 1566 */ 1567 CV_WRAP TextDetectionModel_DB(const Net& network); 1568 1569 /** 1570 * @brief Create text detection model from network represented in one of the supported formats. 1571 * An order of @p model and @p config arguments does not matter. 1572 * @param[in] model Binary file contains trained weights. 1573 * @param[in] config Text file contains network configuration. 1574 */ 1575 CV_WRAP inline TextDetectionModel_DB(const std::string & model,const std::string & config="")1576 TextDetectionModel_DB(const std::string& model, const std::string& config = "") 1577 : TextDetectionModel_DB(readNet(model, config)) { /* nothing */ } 1578 1579 CV_WRAP TextDetectionModel_DB& setBinaryThreshold(float binaryThreshold); 1580 CV_WRAP float getBinaryThreshold() const; 1581 1582 CV_WRAP TextDetectionModel_DB& setPolygonThreshold(float polygonThreshold); 1583 CV_WRAP float getPolygonThreshold() const; 1584 1585 CV_WRAP TextDetectionModel_DB& setUnclipRatio(double unclipRatio); 1586 CV_WRAP double getUnclipRatio() const; 1587 1588 CV_WRAP TextDetectionModel_DB& setMaxCandidates(int maxCandidates); 1589 CV_WRAP int getMaxCandidates() const; 1590 }; 1591 1592 //! @} 1593 CV__DNN_INLINE_NS_END 1594 } 1595 } 1596 1597 #include <opencv2/dnn/layer.hpp> 1598 #include <opencv2/dnn/dnn.inl.hpp> 1599 1600 /// @deprecated Include this header directly from application. Automatic inclusion will be removed 1601 #include <opencv2/dnn/utils/inference_engine.hpp> 1602 1603 #endif /* OPENCV_DNN_DNN_HPP */ 1604